Pythorch（序列到标签学习）中的损失维数问题

class myLSTM(nn.Module): def __init__(self, h_size=128, v_size=10, embed_d=300, mlp_d=256): super(myLSTM, self).__init__() self.embedding = nn.Embedding(v_size, embed_d) self.lstm = nn.LSTM(embed_d, h_size, num_layers=1, bidirectional=True, batch_first=True) self.mlp = nn.Linear(mlp_d, 1024) # Set static embedding vectors self.embedding.weight.requires_grad = False #self.sm = nn.CrossEntropyLoss() def display(self): for param in self.parameters(): print(param.data.size()) def filter_params(self): # Might not be compatible with python 3 #self.parameters = filter(lambda p: p.requires_grad, self.parameters()) pass def init_hidden(self): # Need to init hidden weights in LSTM pass def forward(self, sentence): print(sentence.size()) embeds = self.embedding(sentence) print(embeds.size()) out, _ = self.lstm(embeds) print(out.size()) out = self.mlp(out) return out

class myLSTM(nn.Module): def __init__(self, h_size=128, v_size=10, embed_d=300, mlp_d=256, num_classes=3, lstm_layers=1): super(myLSTM, self).__init__() self.num_layers = lstm_layers self.hidden_size = h_size self.embedding = nn.Embedding(v_size, embed_d) self.lstm = nn.LSTM(embed_d, h_size, num_layers=lstm_layers, bidirectional=True, batch_first=True) self.mlp = nn.Linear(2 * h_size * 2, num_classes) # Set static embedding vectors self.embedding.weight.requires_grad = False def forward(self, s1, s2): # Set initial states #h0 = Variable(torch.zeros(self.num_layers*2, s1.size(0), self.hidden_size)).cuda() # 2 for bidirection #c0 = Variable(torch.zeros(self.num_layers*2, s1.size(0), self.hidden_size)).cuda() batch_size = s1.size()[0] embeds_1 = self.embedding(s1) embeds_2 = self.embedding(s2) _, (h_1_last, _) = self.lstm(embeds_1)#, (h0, c0)) #note the change here. Last hidden state is taken _, (h_2_last, _) = self.lstm(embeds_2)#, (h0, c0)) concat = torch.cat( (h_1_last, h_2_last), dim=2) #double check the dimension concat = concat.view(batch_size, -1) scores = self.mlp(concat) return scores

batch_size = 64 SGD_optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001, weight_decay=1e-4) criterion = nn.CrossEntropyLoss() num_epochs = 10 model.train() if cuda: model = model.cuda() criterion = criterion.cuda() from torch.autograd import Variable from torch import optim epoch_losses = [] for epoch in range(num_epochs): print("Epoch {0}/{1}: {2}%".format(epoch, num_epochs, 100*float(epoch)/num_epochs)) # Batch loss aggregator losses = [] for start, end in tqdm(batch_index_gen(batch_size, len(n_data))): # Convert minibatch to numpy s1, s2, y = convert_to_numpy(n_data[start:end]) # Convert numpy to Tensor s1_tensor = torch.from_numpy(s1).type(torch.LongTensor) s2_tensor = torch.from_numpy(s2).type(torch.LongTensor) target_tensor = torch.from_numpy(y).type(torch.LongTensor) s1 = Variable(s1_tensor) s2 = Variable(s2_tensor) target = Variable(target_tensor) if cuda: s1 = s1.cuda() s2 = s2.cuda() target = target.cuda() # Zero gradients SGD_optimizer.zero_grad() # Forward Pass output = model.forward(s1,s2) # Calculate loss with respect to training labels loss = criterion(output, target) losses.append(loss.data[0]) # Backprogogate and update optimizer loss.backward() SGD_optimizer.step() # concat losses to epoch losses epoch_losses += losses

Epoch 0/10: 0.0% Batch size: 64 Sentences torch.Size([64, 50]) torch.Size([64, 50]) torch.Size([64, 50, 300]) torch.Size([64, 50, 300]) Hidden states torch.Size([2, 64, 128]) torch.Size([2, 64, 128]) Concatenated hidden states torch.Size([2, 64, 256]) Reshaped tensors for linear layer torch.Size([64, 512]) Linear propogation torch.Size([64, 3])

def eval_model(model, mode='dev'): file_name = 'snli_1.0/snli_1.0_dev.jsonl' if mode == 'dev' else 'snli_1.0/snli_1.0_test.jsonl' dev_data, _ = obtain_data(file_name) dev_n_data = vocab.process_data(dev_data) print("Length of data: {}".format(len(dev_n_data))) eval_batch_size = 1024 model.eval() total = len(dev_n_data) hit = 0 correct = 0 # Batch dev eval for start, end in batch_index_gen(eval_batch_size, len(dev_n_data)): s1, s2, y = convert_to_numpy(dev_n_data[start:end]) s1_tensor = torch.from_numpy(s1).type(torch.LongTensor) s2_tensor = torch.from_numpy(s2).type(torch.LongTensor) target_tensor = torch.from_numpy(y).type(torch.LongTensor) s1 = Variable(s1_tensor, volatile=True) s2 = Variable(s2_tensor, volatile=True) target = Variable(target_tensor, volatile=True) if cuda: s1 = s1.cuda() s2 = s2.cuda() target = target.cuda() output = model.forward(s1,s2) loss = criterion(output, target) #print("output size: {}".format(output.size())) #print("target size: {}".format(target.size())) pred = output.data.max(1)[1] # get the index of the max log-probability #print(pred[:5]) #print(output[:]) correct += pred.eq(target.data).cpu().sum() return correct / float(total) eval_model(model)

1条回答

网友

1楼 · 发布于 2024-04-26 11:39:23

我认为在某种程度上，你在试图解决一个蕴涵问题。在

也许你可以这样做：

设计你的模块以接受两个句子作为输入
把它们都嵌入到你的嵌入中
使用LSTM模块对它们进行编码。在
现在有两个句子的固定长度向量表示。最简单的事情就是把它们串联起来一起。在
在上面加一层线来评估每一个蕴涵类的分数（我想是3个）
应用softmax得到正确的概率分布

所以你的模型可以是这样的（仔细检查尺寸）：

class myLSTM(nn.Module):
    def __init__(self, h_size=128, v_size=10, embed_d=300, num_classes = 3):
        super(myLSTM, self).__init__()
        self.embedding = nn.Embedding(v_size, embed_d)
        self.lstm = nn.LSTM(embed_d, h_size, num_layers=1, bidirectional=True, batch_first=True)
        self.mlp = nn.Linear(2*h_size*2, num_classes) #<- change here

    def forward(self, sentence1, sentence2):
        embeds_1 = self.embedding(sentence1)
        embeds_2 = self.embedding(sentence2)
        _, (h_1_last, _) = self.lstm(embeds_1) #note the change here. Last hidden state is taken
        _, (h_2_last, _) = self.lstm(embeds_2)
        concat = torch.concat([h_1_last, h_2_last], dim=1) #double check the dimension
        scores = self.mlp(concat)
        probas = F.softmax(scores) #from torch.functional ...

然后你可以尝试添加更多隐藏层，或者思考如何以更智能的方式组合两个句子（注意力等）。再次检查CrossEntropyLoss接受什么作为输入和目标并进行调整（是未规范化的班级分数还是概率分布）。检查http://pytorch.org/docs/master/nn.html#lstm中的LSTM模块文档，以澄清LSTM返回的内容（您需要每个单词的隐藏状态还是只需要最后一个单词之后的表示形式）。在

相关问题更多 >

编程相关推荐

热门问题

热门文章