在PyTorch中执行CNN时如何评估未经训练的模型

Question

我刚开始学习pytorch，今天才开始的。我在kaggle上看到了一些例子，想把我的卷积神经网络从tensorflow转到torch，以便更好地利用GPU。

我想让训练集和测试集的数据更加平衡，所以我用了train_test_split。现在我遇到了两个问题：

我无法在训练模型之前评估模型。我想在不训练模型的情况下查看val_loss和val_accuracy，但这似乎不可能。
我只测试了2个epoch，但验证准确率显示的值很不正常，这是为什么呢？

有人能帮帮我吗？

我在另一个python文件中定义了customdataset，并在主模块中导入了它。


import os
import cv2
import random
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import torchvision
from torchsummary import summary

class CustomDataset(Dataset):

        def __init__(self, root_folder_path):

            self.root_folder_path = root_folder_path
            self.image_files = []
            self.labels = []


            # Collect image paths and corresponding labels

            folders = sorted([f for f in os.listdir(root_folder_path) if os.path.isdir(os.path.join(root_folder_path, f))])
            self.label_dict = {folder: i for i, folder in enumerate(folders)}


            for folder in folders:

                folder_path = os.path.join(root_folder_path, folder)
                image_files = sorted([f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f)) and f.endswith('.jpg')])
                self.image_files.extend([os.path.join(folder_path, img) for img in image_files])
                self.labels.extend([self.label_dict[folder]] * len(image_files))


            self.transform = transforms.Compose([
                transforms.ToPILImage(),
                transforms.Resize((900, 300)),
                transforms.Grayscale(),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.5], std=[0.5])
            ])

            

        def __len__(self):

            return len(self.image_files)


        def __getitem__(self, idx):

            image_path = self.image_files[idx]
            label = self.labels[idx]
            image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
            image = self.transform(image)
            #print("Image shape:", image.shape)  # Print the shape of the image
            one_hot_label = torch.zeros(len(self.label_dict))
            one_hot_label[label] = 1

            return image, one_hot_label

这是我的主脚本。


if __name__ == '__main__':
    
    
    # Instantiate your custom dataset and dataloaders
    root_folder_path = r'W:\MASTER_BAGCHI_SCHALDACH\THESIS\code and dataset\image_dataset_300_900_10_classes'
    dataset = CustomDataset(root_folder_path)

    print("Labels:", sorted(dataset.label_dict.keys()))
    print("Total number of labels:", len(dataset.label_dict))


    # Display some images from each folder
    n_images_to_display = 4
    n_folders = len(dataset.label_dict)
    fig, ax = plt.subplots(n_images_to_display, n_folders, figsize=(n_folders * 4, n_images_to_display * 4))

    for i, (folder, label) in enumerate(dataset.label_dict.items()):
        folder_images = [dataset[i][0] for i, lbl in enumerate(dataset.labels) if lbl == label]
        indices_to_display = random.sample(range(len(folder_images)), min(n_images_to_display, len(folder_images)))
        for j, ind in enumerate(indices_to_display):
            ax[j, i].imshow(folder_images[ind].squeeze(), cmap='gray')  # Squeeze to remove the channel dimension for grayscale images
            ax[j, i].axis('off')
        ax[0, i].set_title(folder, fontsize=30)

    plt.show()
    fig.tight_layout(pad=0, w_pad=0, h_pad=0)

  

    from torch.utils.data import DataLoader, Subset
    from sklearn.model_selection import train_test_split

    TEST_SIZE = 0.2
    BATCH_SIZE = 64
    SEED = 42

    # Get the labels from the dataset
    labels = np.array([label for _, label in dataset])
    

    # generate indices: instead of the actual data we pass in integers instead
    train_indices, test_indices, _, _ = train_test_split(
        range(len(dataset)),
        labels,
        stratify=labels,
        test_size=TEST_SIZE,
        random_state=SEED
    )

    # generate subset based on indices
    train_split = Subset(dataset, train_indices)
    test_split = Subset(dataset, test_indices)
    print('Length of train_batch:',len(train_split))
    print('Length of test_batch:',len(test_split))

   
    # create batches
    train_loader = DataLoader(train_split, batch_size=BATCH_SIZE, num_workers=6,shuffle=True,pin_memory=True)
    test_loader = DataLoader(test_split, batch_size=BATCH_SIZE,num_workers=6,pin_memory=True)

    class ImageClassificationBase(nn.Module):
        
        def training_step(self, batch):
            images, labels = batch 
            out = self(images)                  # Generate predictions
            loss = F.cross_entropy(out, labels) # Calculate loss
            return loss
        
        def accuracy(self,outputs, labels):
            #_, preds = torch.max(outputs, dim=1)
            preds = torch.argmax(outputs, dim=1)
            preds_one_hot = F.one_hot(preds, num_classes=labels.shape[1])  # Convert predictions to one-hot encoding
            print("Shape of preds:", preds_one_hot.shape)  # Check the shape of preds
            return torch.sum(preds_one_hot == labels).float().mean()                 
           
        
        
        
        def validation_step(self, batch):
            images, labels = batch 
            out = self(images)                    # Generate predictions
            loss = F.cross_entropy(out, labels)   # Calculate loss
            acc = self.accuracy(out, labels)           # Calculate accuracy
            return {'val_loss': loss.detach(), 'val_acc': acc}
            
        def validation_epoch_end(self, outputs):
            batch_losses = [x['val_loss'] for x in outputs]
            epoch_loss = torch.stack(batch_losses).mean()   # Combine losses
            batch_accs = [x['val_acc'] for x in outputs]
            epoch_acc = torch.stack(batch_accs).mean()      # Combine accuracies
            return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
        
        def epoch_end(self, epoch, result):
            print("Epoch [{}], train_loss: {:.4f}, val_loss: {:.4f}, val_acc: {:.4f}".format(
                epoch, result['train_loss'], result['val_loss'], result['val_acc']))

    import torch.nn.init as init
    class ImageClassification(ImageClassificationBase):
        def __init__(self):
            super().__init__()
            self.network = nn.Sequential(
                #image size is [1,900,300] as [channel, height,width]
                nn.Conv2d(1, 32, kernel_size = 3, padding = 1),
                nn.LeakyReLU(0.01),
                nn.BatchNorm2d(32),
                nn.AvgPool2d(kernel_size=2, stride=2),

                nn.Conv2d(32,32, kernel_size = 3,  padding = 1),
                nn.LeakyReLU(0.01),
                nn.BatchNorm2d(32),
                nn.AvgPool2d(kernel_size=2, stride=2),
            
                nn.Conv2d(32, 64, kernel_size = 3, padding = 1),
                nn.LeakyReLU(0.01),
                nn.BatchNorm2d(64),
                nn.AvgPool2d(kernel_size=2, stride=2),
            
                nn.Conv2d(64 ,64, kernel_size = 3, padding = 1),
                nn.LeakyReLU(0.01),
                nn.BatchNorm2d(64),
                nn.AvgPool2d(kernel_size=2, stride=2),
                                    
                nn.Flatten(),
                nn.Dropout(0.3),

                nn.Linear(64 * 56 * 18, 64),  # Assuming input size after convolutional layers is 64 * 56 * 18
                nn.LeakyReLU(0.01),
                nn.BatchNorm1d(64),
                nn.Dropout(0.2),
            
                nn.Linear(64, 64),
                nn.LeakyReLU(0.01),
                nn.BatchNorm1d(64),
                nn.Dropout(0.2),
            
                nn.Linear(64, 10)  # Output layer
            )
            # Initialize the weights of convolutional layers
            self._initialize_weights()

        def _initialize_weights(self):
            for m in self.modules():
                if isinstance(m, nn.Conv2d):
                    init.kaiming_uniform_(m.weight, mode='fan_in', nonlinearity='leaky_relu')
        
        def forward(self, xb):
            return self.network(xb)

    def get_default_device():
        """ Set Device to GPU or CPU"""
        if torch.cuda.is_available():
            return torch.device('cuda')
        else:
            return torch.device('cpu')
        

    def to_device(data, device):
        "Move data to the device"
        if isinstance(data,(list,tuple)):
            return [to_device(x,device) for x in data]
        return data.to(device,non_blocking = True)

    class DeviceDataLoader():
        """ Wrap a dataloader to move data to a device """
        
        def __init__(self, dl, device):
            self.dl = dl
            self.device = device
        
        def __iter__(self):
            """ Yield a batch of data after moving it to device"""
            for b in self.dl:
                yield to_device(b,self.device)
                
        def __len__(self):
            """ Number of batches """
            return len(self.dl)

    device = get_default_device()
    device

    torch.cuda.empty_cache()
    model = ImageClassification()

    random_seed = 99
    torch.manual_seed(random_seed)

    train_loader = DeviceDataLoader(train_loader, device)
    test_loader = DeviceDataLoader(test_loader, device)

    to_device(model, device)

    @torch.no_grad()
    def evaluate(model, val_loader):
        model.eval()
        outputs = [model.validation_step(batch) for batch in val_loader]
        return model.validation_epoch_end(outputs)

    def accuracy(outputs, labels):
        _, preds = torch.max(outputs, dim=1)
        return torch.tensor(torch.sum(preds == labels).item() / len(preds))

    def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.RMSprop):
        history = []
        optimizer = opt_func(model.parameters(), lr)
        for epoch in range(epochs):
            # Training Phase 
            model.train()
            train_losses = []
            for batch in train_loader:
                loss = model.training_step(batch)
                train_losses.append(loss)
                loss.backward()
                optimizer.step()
                optimizer.zero_grad()
            # Validation phase
            result = evaluate(model, val_loader)
            result['train_loss'] = torch.stack(train_losses).mean().item()
            model.epoch_end(epoch, result)
            history.append(result)
        return history

    model=to_device(ImageClassification(),device)

    #initial evaluation of the model
    evaluate(model,test_loader)

 #set the no. of epochs, optimizer funtion and learning rate
    num_epochs = 2
    opt_func = torch.optim.RMSprop
    lr = 0.0001

    #fitting the model on training data and record the result after each epoch
    history = fit(num_epochs, lr, model, train_loader, test_loader, opt_func)

数据看起来是这样的，我在模型中图像的大小是[1,900,300]，其中1代表灰度通道，900是图像的高度（像素），300是图像的宽度（像素）。

示例数据

输出是：

  Labels: ['120', '144', '168', '192', '216', '24', '240', '48', '72', '96']
Total number of labels: 10
Length of train_batch: 1835
Length of test_batch: 459
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([11, 1, 900, 300])
Shape of labels array: torch.Size([11, 10])
Test batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Test batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Test batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Test batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Test batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Test batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Test batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Test batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Test batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Test batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Test batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Test batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Test batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Test batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Test batch size: torch.Size([11, 1, 900, 300])
Shape of labels array: torch.Size([11, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([11, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([11, 10])
Epoch [0], train_loss: 1.5568, val_loss: 1.6037, val_acc: 267.2000
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([32, 10])
Shape of preds: torch.Size([11, 10])
Epoch [1], train_loss: 0.9959, val_loss: 1.6849, val_acc: 273.4667

这是数据集的驱动链接：https://drive.google.com/drive/folders/1PsT9_HWX4snfgnhlwC6xM4rNjcoqXdk5?usp=drive_link

根据@Ivan的建议，我尝试过，但错误仍然存在。

代码的输出是：

PS C:\Users\smjobagc> & C:/Users/smjobagc/AppData/Local/miniconda3/envs/FSV/python.exe "w:/MASTER_BAGCHI_SCHALDACH/THESIS/code and dataset/10 class cropped 300_900 runs/10_class_torch.py"
Labels: ['120', '144', '168', '192', '216', '24', '240', '48', '72', '96']
Total number of labels: 10
Length of train_batch: 1835
Length of test_batch: 459
Traceback (most recent call last):
  File "w:\MASTER_BAGCHI_SCHALDACH\THESIS\code and dataset\10 class cropped 300_900 runs\10_class_torch.py", line 236, in <module>
    evaluate(model,test_loader)
  File "C:\Users\smjobagc\AppData\Local\miniconda3\envs\FSV\lib\site-packages\torch\autograd\grad_mode.py", line 28, in decorate_context
    return func(*args, **kwargs)
  File "w:\MASTER_BAGCHI_SCHALDACH\THESIS\code and dataset\10 class cropped 300_900 runs\10_class_torch.py", line 206, in evaluate
    outputs = [model.validation_step(batch) for batch in val_loader]
  File "w:\MASTER_BAGCHI_SCHALDACH\THESIS\code and dataset\10 class cropped 300_900 runs\10_class_torch.py", line 206, in <listcomp>
    outputs = [model.validation_step(batch) for batch in val_loader]
  File "w:\MASTER_BAGCHI_SCHALDACH\THESIS\code and dataset\10 class cropped 300_900 runs\10_class_torch.py", line 92, in validation_step
    acc = accuracy(out, labels)           # Calculate accuracy
  File "w:\MASTER_BAGCHI_SCHALDACH\THESIS\code and dataset\10 class cropped 300_900 runs\10_class_torch.py", line 212, in accuracy
    return torch.sum(preds.argmax(1) == labels).float().mean()
IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)

卷积神经网络模型评估训练集与测试集 GPU加速数据集平衡验证准确率自定义数据集 epoch设置

在PyTorch中执行CNN时如何评估未经训练的模型

1 个回答

撰写回答