Pytorch RNN错误:运行时错误:输入必须有3个维度1

2024-06-17 10:29:14 发布

您现在位置:Python中文网/ 问答频道 /正文

我正在尝试根据代码here训练RNN

我还发现了两个类似的帖子,但无法从中推断出我应该如何解决我的问题herehere

这个错误很容易解释,模型需要3维,但我只给出1维。然而,我不知道在哪里解决这个问题。我知道一篇好的堆栈文章应该包含数据,但我不确定如何在文章中包含示例张量。抱歉

我的输入是300d单词嵌入,我的输出是一个长度为11的热编码向量,其中模型在11个输出维度中的每个维度中进行分类选择

我将从数据加载器开始,然后从那里开始编写代码。

from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):
    def __init__(self, dat, labels):
        self.labels = labels
        self.dat = dat

    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        label = self.labels[idx]
        dat = self.dat[idx]
        sample = {"Sample": dat, "Class": label}
        return sample

我将我的香草RNN定义如下。

class VanillaRNN(nn.Module):
    def __init__(self, input_size, output_size, hidden_dim, n_layers):
        super(VanillaRNN, self).__init__()

        # Defining some parameters
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers

        #Defining the layers
        # RNN Layer
        self.rnn = nn.RNN(input_size, hidden_dim, n_layers, batch_first=True)   
        # Fully connected layer
        self.fc = nn.Linear(hidden_dim, output_size)
    
    def forward(self, inputs):
        
        batch_size = inputs.size(0)

        # Initializing hidden state for first input using method defined below
        hidden = self.init_hidden(batch_size)

        # Passing in the input and hidden state into the model and obtaining outputs
        out, hidden = self.rnn(inputs, hidden)
        
        # Reshaping the outputs such that it can be fit into the fully connected layer
        out = out.contiguous().view(-1, self.hidden_dim)
        out = self.fc(out)
        
        return out, hidden
    
    def init_hidden(self, batch_size):
        # This method generates the first hidden state of zeros which we'll use in the forward pass
        # We'll send the tensor holding the hidden state to the device we specified earlier as well
        hidden = torch.zeros(self.n_layers, batch_size, self.hidden_dim)
        return hidden

和我的培训循环如下所示

def plot_train_val(x, train, val, train_label,
                   val_label, title, y_label,
                   color):

  plt.plot(x, train, label=train_label, color=color)
  plt.plot(x, val, label=val_label, color=color, linestyle='--')
  plt.legend(loc='lower right')
  plt.xlabel('epoch')
  plt.ylabel(y_label)
  plt.title(title)


def count_parameters(model):
  parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
  return parameters


def init_weights(m):
  if type(m) in (nn.Linear, nn.Conv1d):
    nn.init.xavier_uniform_(m.weight)



# Training functioN
def train(model, device, train_loader, valid_loader, epochs, learning_rate):

  criterion = nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
  
  train_loss, validation_loss = [], []
  train_acc, validation_acc = [], []

  for epoch in range(epochs):
    #train
    model.train()
    running_loss = 0.
    correct, total = 0, 0
    steps = 0
    for idx, batch in enumerate(train_loader):
      text = batch["Sample"].to(device)
      target = batch['Class'].to(device)
      target = torch.autograd.Variable(target).long()
      text, target = text.to(device), target.to(device)
      # add micro for coding training loop
      optimizer.zero_grad()
      output, hideden = model(text)
      print(output.shape, target.shape, target.view(-1).shape)
      loss = criterion(output, target.view(-1))
      loss.backward()
      optimizer.step()
      steps += 1
      running_loss += loss.item()

      # get accuracy
      _, predicted = torch.max(output, 1)
      print(predicted)
      #predicted = torch.round(output.squeeze())
      total += target.size(0)
      correct += (predicted == target).sum().item()

    train_loss.append(running_loss/len(train_loader))
    train_acc.append(correct/total)

    print(f'Epoch: {epoch + 1}, '
          f'Training Loss: {running_loss/len(train_loader):.4f}, '
          f'Training Accuracy: {100*correct/total: .2f}%')

    # evaluate on validation data
    model.eval()
    running_loss = 0.
    correct, total = 0, 0

    with torch.no_grad():
      for idx, batch in enumerate(valid_loader):
        text = batch["Sample"].to(device)
        print(type(text), text.shape)
        target = batch['Class'].to(device)
        target = torch.autograd.Variable(target).long()
        text, target = text.to(device), target.to(device)

        optimizer.zero_grad()
        output = model(text)
        
        loss = criterion(output, target)
        running_loss += loss.item()

        # get accuracy
        _, predicted = torch.max(output, 1)
        #predicted = torch.round(output.squeeze())
        total += target.size(0)
        correct += (predicted == target).sum().item()

    validation_loss.append(running_loss/len(valid_loader))
    validation_acc.append(correct/total)

    print (f'Validation Loss: {running_loss/len(valid_loader):.4f}, '
           f'Validation Accuracy: {100*correct/total: .2f}%')

  return train_loss, train_acc, validation_loss, validation_acc

当我运行带有以下内容的模型时,我得到了下面提供的错误。提前感谢您的帮助。

# Model hyperparamters
#vocab_size = len(word_array)
learning_rate = 1e-3
output_size = 11
input_size = 300
epochs = 10
hidden_dim = 100
n_layers = 2

# Initialize model, training and testing
set_seed(SEED)
vanilla_rnn_model = VanillaRNN(input_size, output_size, hidden_dim, n_layers)

#vanilla_rnn_model = VanillaRNN(output_size, input_size, RNN_size, fc_size, DEVICE)
vanilla_rnn_model.to(DEVICE)

vanilla_rnn_start_time = time.time()
vanilla_train_loss, vanilla_train_acc, vanilla_validation_loss, vanilla_validation_acc = train(vanilla_rnn_model,
                                                                                               DEVICE,
                                                                                               train_loader,
                                                                                               valid_loader,
                                                                                               epochs = epochs,
                                                                                               learning_rate = learning_rate)

错误:(

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-31-bfd2f8f3456f> in <module>()
     19                                                                                                valid_loader,
     20                                                                                                epochs = epochs,
---> 21                                                                                                learning_rate = learning_rate)
     22 print("--- Time taken to train = %s seconds ---" % (time.time() - vanilla_rnn_start_time))
     23 #test_accuracy = test(vanilla_rnn_model, DEVICE, test_iter)

6 frames
<ipython-input-30-db1fa6c8b625> in train(model, device, train_loader, valid_loader, epochs, learning_rate)
     45       # add micro for coding training loop
     46       optimizer.zero_grad()
---> 47       output, hideden = model(text)
     48       print(output.shape, target.shape, target.view(-1).shape)
     49       loss = criterion(output, target.view(-1))

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
   1049         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1050                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051             return forward_call(*input, **kwargs)
   1052         # Do not call functions when jit is used
   1053         full_backward_hooks, non_full_backward_hooks = [], []

<ipython-input-26-c34b90b3cbc3> in forward(self, x)
     21 
     22         # Passing in the input and hidden state into the model and obtaining outputs
---> 23         out, hidden = self.rnn(x, hidden)
     24 
     25         # Reshaping the outputs such that it can be fit into the fully connected layer

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
   1049         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1050                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051             return forward_call(*input, **kwargs)
   1052         # Do not call functions when jit is used
   1053         full_backward_hooks, non_full_backward_hooks = [], []

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/rnn.py in forward(self, input, hx)
    263         assert hx is not None
    264         input = cast(Tensor, input)
--> 265         self.check_forward_args(input, hx, batch_sizes)
    266         _impl = _rnn_impls[self.mode]
    267         if batch_sizes is None:

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/rnn.py in check_forward_args(self, input, hidden, batch_sizes)
    227 
    228     def check_forward_args(self, input: Tensor, hidden: Tensor, batch_sizes: Optional[Tensor]):
--> 229         self.check_input(input, batch_sizes)
    230         expected_hidden_size = self.get_expected_hidden_size(input, batch_sizes)
    231 

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/rnn.py in check_input(self, input, batch_sizes)
    201             raise RuntimeError(
    202                 'input must have {} dimensions, got {}'.format(
--> 203                     expected_input_dim, input.dim()))
    204         if self.input_size != input.size(-1):
    205             raise RuntimeError(

RuntimeError: input must have 3 dimensions, got 1

Tags: theinselftargetinputoutputsizemodel
1条回答
网友
1楼 · 发布于 2024-06-17 10:29:14

首先,您需要将数据集包装在适当的数据加载器中,您可以执行以下操作:

from torch.utils.data import DataLoader

# [...]

# define a batch_size, I'll use 4 as an example
batch_size = 4

train_dset = CustomDataset(X2, y)  # your current code (change train_loader to train_dset)
train_loader = DataLoader(train_dset, batch_size=batch_size, shuffle=True))

此时,text现在应该是[4, 300]

然后,你说你的序列长度等于1。要修复此错误,可以使用unsqueeze添加长度维度:

# [...]
output, hideden = model(text.unsqueeze(1))
# [...]

现在,text应该是[4, 1, 300],这里有3个维度,RNN转发调用是expecting(您的RNN有batch_first=True):

input: tensor of shape (L, N, H_in) when batch_first=False or (N, L, H_in) when batch_first=True containing the features of the input sequence. (...)

相关问题 更多 >