计算机视觉神经网络 PyTorch
大家好,我在这里有个比较长的问题,想问一下为什么我的卷积神经网络(CNN)无法正常工作。为了方便大家理解,我在使用Jupyter Notebooks,并且已经导入了所有必要的库。
我尝试了很长时间来编写和调试代码。现在我有以下的代码和输出:
#Any Tensor we are working with is the Batch Size
#Input Size b times 3 times 240 times 360
#When running convolution, it's b times 16 times height times width
#Batch size, feature dimension, height, width
class SceneClassificationCNN(nn.Module):
def __init__(self, img_height=240, img_width=360):
super(SceneClassificationCNN, self).__init__()
nn.Flatten()
# Convolutional layers
self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, padding=1)
self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
# Pooling layer
self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
self._flat_features_size = self._get_conv_output_size(img_height, img_width)
# Fully connected layers
# Assuming input images are resized to 128x128, the feature map size is 16x16 after convolutions and pooling
self.fc1 = nn.Linear(in_features=self._flat_features_size , out_features=512)
self.fc2 = nn.Linear(in_features=512, out_features=128)
self.fc3 = nn.Linear(in_features=128, out_features=5) # Assuming 5 classes
#STOP HARD CODING NOTE TO SELF
#For self.fc1 find how to calculate the size of the flattened image in place of the 120 and 180
def _get_conv_output_size(self, img_height, img_width):
# Simulate forward pass through the convolutional and pooling layers
# without considering the actual data (using a dummy tensor)
dummy_input = torch.zeros(1, 3, img_height, img_width)
with torch.no_grad():
dummy_output = self.pool(functional.relu(self.conv1(dummy_input)))
dummy_output = self.pool(functional.relu(self.conv2(dummy_output)))
dummy_output = self.pool(functional.relu(self.conv3(dummy_output)))
print("Dummy output size:",dummy_output.size())
return int(np.prod(dummy_output.size()[1:]))
def forward(self, x):
# Apply convolutions and pooling
x = self.pool(functional.relu(self.conv1(x)))
x = self.pool(functional.relu(self.conv2(x)))
x = self.pool(functional.relu(self.conv3(x)))
# Flatten the output for the fully connected layers
#x = x.view(x.size(0), -1) #Change the shape
# Apply fully connected layers with ReLU activations
x = functional.relu(self.fc1(x))
x = functional.relu(self.fc2(x))
# Final layer without ReLU as it's going into a softmax (done by CrossEntropyLoss)
x = self.fc3(x)
#x = x.view(-1, self._flat_features_size)
return x
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
model = SceneClassificationCNN().to(device)
print(model)
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Learning rate can be adjusted
#I am use to the Adam Optimizer
这个代码的输出是:
Using device: cpu
Dummy output size: torch.Size([1, 64, 30, 45])
SceneClassificationCNN(
(conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(fc1): Linear(in_features=86400, out_features=512, bias=True)
(fc2): Linear(in_features=512, out_features=128, bias=True)
(fc3): Linear(in_features=128, out_features=5, bias=True)
)
虽然我在代码中有这个CNN模型,但它一直报错,所以我用了NeuralNetwork_Conv
,不幸的是,它也给我带来了相同或类似的问题。
接着,我重新编写了一个名为NeuralNetwork_Conv
的类的代码。
class NeuralNetwork_Conv(nn.Module):
def __init__(self):
super(NeuralNetwork_Conv, self).__init__()
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
self.pool1 = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
self.pool2 = nn.MaxPool2d(2, 2)
self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
self.pool3 = nn.MaxPool2d(2, 2)
# Placeholder for the linear layers - adjust the size accordingly
self.fc1 = nn.Linear(64 * 30 * 45, 512)
def forward(self, x):
x = self.conv1(x)
print("After conv1:", x.size())
x = self.pool1(x)
print("After pool1:", x.size())
x = self.conv2(x)
print("After conv2:", x.size())
x = self.pool2(x)
print("After pool2:", x.size())
x = self.conv3(x)
print("After conv3:", x.size())
x = self.pool3(x)
print("After pool3:", x.size())
x = x.view(-1, 64 * 30 * 45)
x = self.fc1(x)
return x
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
model = NeuralNetwork_Conv().to(device)
print(model)
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Test the forward pass with a dummy input to observe sizes
dummy_input = torch.randn(1, 3, 240, 360).to(device) # Example input size, adjust as necessary
model(dummy_input)
这是我从上面的代码得到的两个输出:
Using device: cpu
NeuralNetwork_Conv(
(conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(fc1): Linear(in_features=86400, out_features=512, bias=True)
)
After conv1: torch.Size([1, 16, 240, 360])
After pool1: torch.Size([1, 16, 120, 180])
After conv2: torch.Size([1, 32, 120, 180])
After pool2: torch.Size([1, 32, 60, 90])
After conv3: torch.Size([1, 64, 60, 90])
After pool3: torch.Size([1, 64, 30, 45])
还有另一个输出:
tensor([[-6.6583e-02, 7.8844e-02, -1.4249e-01, 6.1643e-02, -7.0408e-02,
4.2068e-02, -4.0336e-02, -1.9950e-01, -7.1389e-02, -1.3796e-01,
9.3257e-02, 1.9345e-01, 4.6931e-03, 3.6373e-01, -1.1949e-01,
-1.6199e-01, 1.3884e-01, 2.8671e-02, 5.3386e-02, 6.5578e-02,
1.8842e-01, -5.9504e-02, -1.8027e-01, -8.8520e-02, -2.0062e-02,
-1.5458e-01, -1.1620e-01, 2.5340e-02, 1.2343e-01, -1.9141e-01,
3.7330e-02, -1.5329e-02, 4.5059e-02, 1.3514e-01, -1.5895e-02,
-1.0641e-01, 1.5021e-02, 6.7946e-02, -1.0390e-01, -5.0331e-02,
1.2298e-02, -1.4691e-01, 2.8895e-01, -3.1915e-02, 7.0279e-02,
5.5356e-02, -6.9813e-02, 3.4496e-02, 2.0414e-02, 2.7333e-02,
-1.3051e-02, -3.4270e-02, 2.5378e-01, -5.9996e-02, -1.2498e-01,
-1.7903e-02, -1.8139e-02, -1.8555e-01, 3.4394e-01, -7.8244e-02,
-5.8675e-02, -2.3361e-02, -1.1856e-01, 1.4826e-02, 3.0094e-01,
-1.0646e-01, 1.1270e-01, -1.1333e-01, 1.5593e-02, -8.6904e-02,
2.7338e-02, -2.3950e-01, 1.0691e-01, -1.0705e-01, 8.4953e-02,
3.5296e-02, 1.2546e-01, 5.4389e-02, 1.8928e-01, 1.1250e-01,
-1.0872e-02, 1.6038e-02, 7.3459e-02, -1.6390e-02, 4.3814e-02,
-4.8063e-02, 4.9660e-02, -4.8125e-02, -4.4857e-02, 6.9098e-03,
1.5525e-01, 2.5897e-01, 1.1529e-01, 3.8345e-02, 1.3227e-01,
-9.4112e-02, -1.5524e-01, -4.4205e-02, -1.4851e-01, 1.4226e-01,
-1.9989e-01, 9.4048e-02, 3.0361e-01, 3.6938e-02, -1.9737e-01,
-4.3552e-02, -2.6977e-01, 4.0547e-02, -2.5311e-01, 1.7712e-01,
1.1568e-01, 7.7897e-02, 1.3156e-01, -3.6716e-02, -1.8810e-01,
-4.5886e-02, -1.1666e-01, -2.7934e-02, 4.2246e-01, 6.5305e-02,
6.8844e-02, 2.9094e-01, 1.5930e-01, 2.3336e-01, -1.0987e-01,
2.6732e-01, -7.9896e-02, 3.4161e-01, 2.8448e-01, 2.7811e-01,
-2.8356e-01, -1.4066e-01, 1.7005e-01, -1.6456e-01, -1.2238e-01,
7.5385e-02, -8.2953e-02, 5.1057e-02, -1.2672e-02, 9.9285e-02,
2.3554e-02, -1.4845e-03, -3.4829e-02, 3.6016e-01, 2.6877e-01,
1.3761e-01, -6.4589e-02, 4.6149e-02, 4.3486e-02, -1.7645e-01,
6.2570e-02, -7.3389e-02, -1.4512e-02, -1.4758e-01, -2.1920e-01,
1.7140e-02, -3.2040e-02, 1.0936e-01, 8.7229e-02, 8.8445e-02,
-1.9883e-01, 2.1703e-01, -1.7173e-01, 2.7462e-01, 1.2990e-01,
1.5235e-01, 1.9545e-02, 1.1720e-02, 1.9312e-01, 5.9859e-02,
-8.0287e-02, 1.0102e-01, -3.9349e-01, 1.4303e-01, -1.2415e-01,
-8.5000e-03, 1.5991e-01, -3.3937e-01, -1.2435e-01, 1.8382e-01,
4.4852e-02, 1.4590e-01, 1.8493e-01, 6.3306e-02, 6.4110e-02,
1.8389e-02, -7.8453e-02, -3.3310e-02, 1.2160e-03, -1.4169e-01,
-3.0171e-01, 1.4754e-01, -1.0948e-01, 9.7101e-02, 2.1271e-02,
3.7804e-02, -8.1400e-04, -1.0619e-01, -3.3092e-02, 7.3220e-02,
-3.5862e-03, 4.1970e-02, 1.5572e-02, 1.8815e-02, 8.1993e-02,
1.2919e-01, -5.1420e-02, 4.7143e-02, -1.0359e-01, -1.0286e-01,
1.6066e-02, 9.2730e-02, -5.8958e-02, -5.0492e-03, 1.0211e-01,
-5.6311e-02, -1.2885e-01, -2.0300e-02, 1.7353e-02, -1.2325e-01,
-5.6017e-02, -2.7655e-01, -9.5620e-02, -2.2155e-02, 2.1870e-01,
2.3230e-01, -3.0554e-05, 1.1196e-01, -1.3114e-01, 8.9631e-02,
-1.3647e-01, -4.4391e-02, -1.9639e-02, -1.0989e-01, 1.0549e-01,
-4.2130e-02, -4.3476e-01, 1.6702e-01, -3.2339e-02, 2.6870e-01,
-4.5465e-02, 2.5549e-01, 1.0326e-01, 9.0097e-02, -2.4702e-02,
1.8878e-01, -3.2149e-01, -5.7085e-03, 4.5387e-04, 4.2761e-02,
1.9696e-01, 2.3717e-01, 1.1287e-01, -2.7148e-01, -2.7781e-01,
9.4704e-02, 1.1562e-02, 3.0118e-02, 2.2923e-01, 1.9578e-02,
-2.5807e-01, -1.8138e-01, -2.7152e-01, -2.2853e-02, -2.0886e-02,
1.4479e-01, 1.0336e-01, 1.4169e-01, 1.0363e-01, 1.0341e-01,
-4.4898e-02, 6.4268e-02, -9.0907e-02, -3.9338e-01, -7.2397e-02,
-8.9581e-03, 1.4663e-01, 8.2005e-02, 2.8790e-01, -1.3653e-02,
-1.2523e-01, -1.6508e-01, -1.5815e-01, 5.4398e-02, 1.4715e-01,
-2.4775e-01, -2.0883e-01, -1.9139e-02, 8.6897e-02, 6.9590e-02,
7.5575e-02, -7.3747e-02, -1.4537e-02, -2.0008e-01, -1.5837e-01,
3.1999e-01, -4.9494e-02, 7.5654e-02, -1.5142e-01, -6.2533e-02,
-2.1078e-01, 6.5765e-02, -4.1795e-02, -1.0766e-03, 8.2803e-02,
1.0490e-02, -1.6136e-02, -1.2969e-01, -1.2275e-01, -4.1438e-03,
2.3278e-01, 7.1578e-02, 2.1670e-01, 4.9445e-02, -1.9228e-01,
2.2035e-01, -8.2743e-02, -2.4098e-01, -6.4845e-02, -2.6872e-02,
1.2220e-02, -8.4460e-03, 1.8067e-01, -3.4670e-01, -5.8514e-03,
-3.3068e-01, -1.2138e-01, 9.7391e-02, 1.0332e-01, 6.1555e-02,
3.6117e-02, 5.2334e-02, -3.7863e-02, 1.3306e-01, -4.3676e-02,
2.2926e-01, -1.3721e-01, 2.1945e-01, 2.4629e-01, -1.4614e-01,
-1.6835e-01, -1.1987e-01, -1.4246e-01, -1.5590e-01, 3.7477e-02,
-6.6188e-02, 4.5236e-02, -2.4773e-01, -1.5510e-01, 1.9009e-01,
-2.2781e-02, -7.3059e-02, 1.8916e-01, 1.3230e-01, 4.3261e-02,
4.6859e-02, -4.9147e-01, 5.7847e-02, -1.4865e-01, 5.6057e-03,
-2.0584e-01, 1.8284e-01, -2.1993e-01, 9.4927e-02, -5.8686e-03,
-2.8759e-02, -3.9250e-02, -2.2812e-01, 2.1872e-01, -1.2176e-01,
-1.0005e-01, 1.0752e-01, 4.7323e-02, 6.5562e-02, 1.0710e-01,
-1.3260e-02, -2.4615e-02, -4.3050e-02, -8.8642e-02, -1.0462e-01,
-1.7764e-01, 2.9332e-02, -8.1576e-03, 3.0675e-01, -2.8505e-02,
3.3142e-02, 8.2312e-02, 9.8706e-03, 4.1091e-05, -1.1159e-01,
-2.8545e-02, -1.7685e-01, -1.1629e-01, -5.1686e-02, 3.2084e-01,
1.8053e-01, 8.7533e-02, -2.5515e-01, -1.8066e-01, 2.7144e-02,
-1.4081e-02, 3.9089e-01, 3.0634e-02, 7.5020e-02, -8.9916e-02,
3.2381e-01, -2.6234e-01, -3.0246e-01, -4.7191e-02, 1.4518e-02,
-2.3411e-01, 1.5552e-02, -4.8474e-01, -1.2143e-01, 3.4133e-01,
6.5519e-02, -1.6780e-02, 1.1881e-01, -8.4260e-02, -5.1083e-02,
3.3583e-02, 2.8465e-01, 1.0361e-01, -3.9145e-02, -1.7361e-01,
3.6966e-02, -1.5067e-01, -6.7469e-02, -1.1069e-01, -1.0071e-02,
1.4910e-02, 2.9178e-02, -6.6805e-02, -2.0691e-01, 1.3508e-01,
-1.4120e-01, -2.4821e-01, 1.4179e-01, -1.3962e-02, -8.2849e-02,
3.3553e-02, -1.4514e-02, 3.2578e-01, 1.3230e-01, -1.4331e-01,
1.3124e-01, 1.5074e-01, -1.1892e-01, 9.2938e-02, -2.2535e-01,
1.2779e-01, 2.1662e-01, 1.0649e-01, -4.0648e-02, 2.7221e-01,
-3.0154e-01, 1.2282e-01, -5.9368e-02, 3.2799e-02, 1.5916e-01,
1.9251e-02, 6.4609e-02, -6.8788e-02, -9.1684e-02, 8.4554e-02,
-2.5653e-01, -1.7326e-02, -5.9050e-03, 3.8589e-02, -2.9320e-01,
2.7190e-01, -1.9769e-01, -5.7632e-02, -1.2614e-01, -1.4066e-01,
1.3740e-01, -7.1185e-02, 1.1509e-01, 3.1625e-02, -3.8954e-03,
-1.9939e-01, 1.2890e-01, -2.6241e-01, -4.1080e-02, 1.1835e-01,
3.9253e-03, 1.3970e-01, 1.3278e-02, 1.5135e-01, -4.2120e-02,
5.5194e-03, 8.3199e-03, 2.9458e-02, -1.9543e-01, 2.7979e-02,
8.2800e-02, 2.0363e-01, 9.0182e-02, 1.2844e-01, -6.9860e-02,
-1.9727e-01, 2.9260e-01, -1.2800e-01, -9.0905e-02, -2.9702e-01,
-2.5170e-02, 2.0275e-02, -8.5073e-02, 7.5998e-02, -8.6023e-02,
4.3336e-02, 2.0305e-01]], grad_fn=<AddmmBackward0>)
到目前为止,这一切看起来都不错,直到我运行我的代码:
#Consider flattening
def train_loop(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
num_batches = len(dataloader)
train_loss, correct = 0, 0
for batch, (x, y) in enumerate(dataloader):
x, y = x.to(device), y.to(device)
# Compute prediction and loss
pred = model(x)
loss = loss_fn(pred, y)
train_loss += loss.item()
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
# Backpropagation
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_loss /= num_batches
correct /= size
print(f"Train Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {train_loss:>8f} \n")
def validate_model(validation_dataloader, model, loss_fn):
size = len(validation_dataloader.dataset)
num_batches = len(validation_dataloader)
model.eval()
val_loss, correct = 0, 0
with torch.no_grad():
for x, y in validation_dataloader:
x, y = x.to(device), y.to(device)
pred = model(x)
val_loss += loss_fn(pred, y).item()
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
val_loss /= num_batches
correct /= size
print(f"Validation Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {val_loss:>8f} \n")
return 100*correct
best_val_accuracy = 0
model_path = 'best_model.pth'
epochs = 50
batch_size = 32
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
for epoch in range(epochs):
print(f"Epoch {epoch+1}\n-------------------------------")
train_loop(train_dataloader, model, loss_fn, optimizer)
val_accuracy = validate_model(valid_dataloader, model, loss_fn)
if val_accuracy > best_val_accuracy:
best_val_accuracy = val_accuracy
torch.save(model.state_dict(), model_path)
print(f"New best model saved at epoch {epoch+1} with validation accuracy: {best_val_accuracy:.2f}%")
print("Training and validation completed. Best model saved to 'best_model.pth'")
#Change the predictions to accuracy
这是我目前得到的输出(这是能运行的部分):
Epoch 1
-------------------------------
After conv1: torch.Size([32, 16, 60, 90])
After pool1: torch.Size([32, 16, 30, 45])
After conv2: torch.Size([32, 32, 30, 45])
After pool2: torch.Size([32, 32, 15, 22])
After conv3: torch.Size([32, 64, 15, 22])
After pool3: torch.Size([32, 64, 7, 11])
And I am getting a Runtime Error each time:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
Cell In[515], line 50
48 for epoch in range(epochs):
49 print(f"Epoch {epoch+1}\n-------------------------------")
---> 50 train_loop(train_dataloader, model, loss_fn, optimizer)
51 val_accuracy = validate_model(valid_dataloader, model, loss_fn)
52 if val_accuracy > best_val_accuracy:
Cell In[515], line 11
8 x, y = x.to(device), y.to(device)
10 # Compute prediction and loss
---> 11 pred = model(x)
12 loss = loss_fn(pred, y)
13 train_loss += loss.item()
File ~\AppData\Roaming\Python\Python311\site-packages\torch\nn\modules\module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs)
1509 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1510 else:
-> 1511 return self._call_impl(*args, **kwargs)
File ~\AppData\Roaming\Python\Python311\site-packages\torch\nn\modules\module.py:1520, in Module._call_impl(self, *args, **kwargs)
1515 # If we don't have any hooks, we want to skip the rest of the logic in
1516 # this function, and just call forward.
1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
...
---> 29 x = x.view(-1, 64 * 30 * 45) # Adjust this based on the actual size after pool3
30 x = self.fc1(x)
31 return x
RuntimeError: shape '[-1, 86400]' is invalid for input of size 157696
问题可能出在形状上,或者是mat1
和mat2
无法相乘。有人能帮我吗?如果需要,我可以提供更多细节。另外,抱歉输入这么长。我只是想尽量提供更多的信息。
谢谢大家的帮助和关注。
1 个回答
0
我觉得最后的扁平化层使用的维度不对。那行代码 x.view(-1, 64 * 30 * 45)
假设你可以把数据扁平化成 (B, 86400),但错误信息显示你应该把它扁平化成 (B, 157696)。
试试这个修改,来调整维度:
...
x = self.pool3(x)
print("After pool3:", x.size())
#Modifications start here.
#Get the dimensions of x
B, C, H, W = x.shape
#Calculate the flattened size
flattened_length = C * H * W
#Reshape to flat
x = x.view(-1, flattened_length)
x = self.fc1(x)
return x
这也意味着 fc1
需要配置成和扁平化后的 x
一样的输入大小(157696)。