PyTorch LSTM未使用隐层

0 投票
1 回答
29 浏览
提问于 2025-04-13 18:19

我在使用PyTorch的LSTM接口,但遇到了一些问题。我用LSTM来做一个简单的AI模型。这个模型的任务是:如果前一个数字小于当前数字,就返回1。

比如对于一个数组[0.7, 0.3, 0.9, 0.99],期望的输出是[1.0, 0.0, 1.0, 1.0]。第一个输出应该无论如何都是1.0

为了解决这个问题,我设计了以下的网络:

# network.py

import torch

N_INPUT = 1
N_STACKS = 1
N_HIDDEN = 3

LR = 0.001


class Network(torch.nn.Module):

    # params: self
    def __init__(self):
        super(Network, self).__init__()

        self.lstm = torch.nn.LSTM(
            input_size=N_INPUT,
            hidden_size=N_HIDDEN,
            num_layers=N_STACKS,
        )

        self.linear = torch.nn.Linear(N_HIDDEN, 1)
        self.relu = torch.nn.ReLU()

        self.optim = torch.optim.Adam(self.parameters(), lr=LR)
        self.loss = torch.nn.MSELoss()

    # params: self, predicted, expecteds
    def backprop(self, xs, es):

        # perform backprop
        self.optim.zero_grad()
        l = self.loss(xs, torch.tensor(es))
        l.backward()
        self.optim.step()

        return l

    # params: self, data (as a python array)
    def forward(self, dat):

        out, _ = self.lstm(torch.tensor(dat))

        out = self.relu(out)
        out = self.linear(out)

        return out

我从这个文件中调用这个网络:

# main.py

import network

import numpy as np

# create a new network
n: network.Network = network.Network()


# create some data
def rand_array():

    # a bunch of random numbers
    a = [[np.random.uniform(0, 1)] for i in range(1000)]

    # now, our expected value is 0 if the previous number is greater, and 1 else
    expected = [0.0 if a[i - 1][0] > a[i][0] else 1.0 for i in range(len(a))]
    expected[0] = 1.0  # make the first element always just 1.0

    return [a, expected]


# a bunch of random arrays
data = [rand_array() for i in range(1000)]

# 100 epochs
for i in range(100):
    for i in data:

        pred = n(i[0])
        loss = n.backprop(pred, i[1])
        print("Loss: {:.5f}".format(loss))

现在,当我运行这个程序时,损失值大约在0.25左右,而且一旦到达这个值就不再变化。我觉得模型只是对每个输入取了01的平均值(0.5)。

这让我觉得模型无法看到之前的数据;这些数据只是随机数字(不过期望的输出是基于这些随机数字的),而模型无法记住之前发生了什么。

我遇到的问题是什么呢?

1 个回答

1

我没有看到隐藏状态的问题。你使用的设置有点奇怪,可能会影响学习效果。

首先,我们需要一个合适的数据集和数据加载器。

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import numpy as np
import torch.nn.functional as F

class RandDataset(Dataset):
    def __init__(self, sequence_length):
        self.sequence_length = sequence_length
    
    def __len__(self):
        return 10000 # data is generated so length is arbitrary 
    
    def __getitem__(self, idx):
        sequence = torch.rand(self.sequence_length)
        labels = torch.ones_like(sequence)
        labels[1:] = sequence[:-1] < sequence[1:]
        
        sequence = sequence[None,:,None] # shape (1, sequence_length, 1)
        labels = labels[None,:] # shape (1, sequence_length)
        
        return sequence, labels

def collate_fn(batch):
    sequences = torch.cat([i[0] for i in batch])
    labels = torch.cat([i[1] for i in batch])
    return sequences, labels

dataset = RandDataset(1000)
dataloader = DataLoader(dataset, batch_size=32, collate_fn=collate_fn)

接下来是模型。添加一个输入投影层,去掉LSTM模块后不必要的relu激活函数。

class LSTMModel(nn.Module):
    def __init__(self, d_in, d_proj, d_hidden, n_layers):
        super().__init__()
        
        self.input_layer = nn.Linear(d_in, d_proj)
        self.lstm = nn.LSTM(input_size=d_proj, hidden_size=d_hidden, num_layers=n_layers, batch_first=True)
        self.output_layer = nn.Linear(d_hidden, 1)
        
    def forward(self, x):
        x = self.input_layer(x)
        x = F.relu(x)
        
        x, _ = self.lstm(x)
        
        x = self.output_layer(x)
        return x

model = LSTMModel(1, 32, 64, 2)

现在我们开始训练。我们要预测的是二分类的结果,所以我们应该使用 BCEWithLogitsLoss,而不是 MSELoss。用MSE来做分类预测是没有意义的。

device = 'cuda'

opt = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_function = nn.BCEWithLogitsLoss()

epochs = 1

model.to(device);

for epoch in range(epochs):
    for i, batch in enumerate(dataloader):
        seqs, labs = batch
        
        seqs = seqs.to(device)
        labs = labs.to(device)
        
        preds = model(seqs)
        loss = loss_function(preds.reshape(-1), labs.reshape(-1))
        
        if i%10==0:
            print(f'{loss.item():.3f}')
        
        opt.zero_grad()
        loss.backward()
        opt.step()

训练完成后,测试一下性能。

model.eval()
seq, lab = dataset[0]

pred = model(seq.to(device)).cpu()

pred = (torch.sigmoid(pred)>0.5).float().squeeze()
lab = lab.squeeze()

acc = (pred == lab).float().mean()
print(acc)
> tensor(0.9990)

撰写回答