我试图用{batch_size
,然后在数据中所涵盖的时间段内按顺序训练我的LSTM。
我的问题是——在每次迭代中,我创建一个month X 2-stores
数据集(为了这篇文章),并在模型中运行它。
在开始之前,我使用init
函数初始化它。在这种情况下,得到的张量充满了nans
。另一方面,如果我每次都使用init
,结果就是实际的数字。
lstm模型是
n_features = 68
n_steps = 3
batch_size = 2
seq_len = n_steps*batch_size
n_hidden = 2 # number of hidden states
n_layers = 2 # number of LSTM layers (stacked)
# 2. Build the Model
class SmallLSTM(torch.nn.Module):
def __init__(self,n_features,seq_len, n_hidden, n_layers, n_steps, batch_size):
super(SmallLSTM, self).__init__()
self.n_features = n_features
self.seq_len = seq_len
self.n_hidden = n_hidden # number of hidden states
self.n_layers = n_layers # number of LSTM layers (stacked)
self.n_steps = n_steps
self.batch_size = batch_size
self.l_lstm = torch.nn.LSTM(input_size = self.n_features,
hidden_size = self.n_hidden,
num_layers = self.n_layers,
batch_first = True,
dropout = 0.1)
# according to pytorch docs LSTM output is
# (batch_size,seq_len, num_directions * hidden_size)
# when considering batch_first = True
self.l_linear = torch.nn.Linear(self.n_steps*self.batch_size * self.n_hidden, self.batch_size)
def init_hidden(self, batch_size):
# even with batch_first = True this remains same as docs
hidden_state = torch.zeros(self.n_layers,1,self.n_hidden)
cell_state = torch.zeros(self.n_layers,1,self.n_hidden)
self.hidden = (hidden_state, cell_state)
def forward(self, x):
#batch_size, seq_len, _ = x.size()
lstm_out, self.hidden = self.l_lstm(x,self.hidden)
# lstm_out(with batch_first = True) is
# (batch_size,seq_len,num_directions * hidden_size)
# for following linear layer we want to keep batch_size dimension and merge rest
# .contiguous() -> solves tensor compatibility error
lstm_out = lstm_out.reshape((1,self.n_steps*self.batch_size * self.n_hidden))
lstm_out = self.l_linear(lstm_out)
#self.hidden = [elem.detach_() for elem in self.hidden]
return lstm_out
没有每次迭代的初始化,代码看起来是这样的
batch_size = 2
stores_drawn_idx = 2
Stores_train_batch = Stores_train.iloc[stores_drawn_idx:stores_drawn_idx+batch_size]
Stores_train_batch.reset_index(inplace=True, drop = True)
stores_drawn_idx += batch_size
Months = Xy['Month'].sort_values().unique()
n_steps = 3
mv_net = SmallLSTM(n_features = n_features,\
seq_len = seq_len, \
n_hidden=n_hidden, \
n_layers = n_layers, \
n_steps = n_steps, \
batch_size = batch_size)
mv_net.init_hidden(1)
train_batch = pd.DataFrame(columns = Xy.columns)
for j in range(Stores_train_batch.shape[0]):
X_ = Xy_temp_month.drop(['origin_address', 'Retailer_origin', 'Month', 'target'], axis = 1).values.astype('float32')
y_ = Xy_temp_month[Xy_temp_month['Month'] == months_temp[-1]]['target']
X_ = torch.from_numpy(X_.reshape(1,n_steps*batch_size,X_.shape[1]))
X_ = torch.tensor(X_,dtype=torch.float32)
y_ = torch.tensor(y_.values.astype('float32'), dtype=torch.float32).reshape([len(y_)])
n_features = 68
seq_len = n_steps*batch_size
n_hidden = 2 # number of hidden states
n_layers = 2 # number of LSTM layers (stacked)
criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.Adam(mv_net.parameters(), lr=0.05, weight_decay=1500)
valid_loss_min = 1000
#mv_net.init_hidden(1)
output = mv_net(X_)
loss = criterion(output, y_)
print(loss.item())
在这种情况下,每次运行print(loss.item())
行时,结果都是nan
。
如果我加上那一行mv_net.init_hidden(1)
,我会得到损失值
我该怎么办? 谢谢
目前没有回答
相关问题 更多 >
编程相关推荐