我对Keras和深度学习都是新手。我试图训练一个LSTM自动编码器,将由120个特征和大约28K个时间戳组成的多变量时间序列压缩成一个潜在的表示形式,我可以在以后的监督学习中使用它。在这个过程中,我遇到了一个数据基数错误。我理解这是输入数据大小的错误。但我不知道如何解决这个问题。我的tensorflow版本是2.3.0
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import optimizers, Sequential
from tensorflow.keras.models import Model
from tensorflow.keras.utils import plot_model
from tensorflow.keras.layers import Dense, LSTM, RepeatVector, TimeDistributed
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
print(tf.__version__)
def generate_datasets_for_training(data, window_size,scale=True, scaler_type=StandardScaler):
_l = len(data)
data = scaler_type().fit_transform(data)
Xs = []
Ys = []
for i in range(0, (_l - window_size)):
# because this is an autoencoder - our Ys are the same as our Xs. No need to pull the next sequence of values
Xs.append(data[i:i+window_size])
Ys.append(data[i:i+window_size])
tr_x, tr_y, ts_x, ts_y = [np.array(x) for x in train_test_split(Xs, Ys)]
assert tr_x.shape[2] == ts_x.shape[2] == (data.shape[1] if (type(data) == np.ndarray) else len(data))
return (tr_x.shape[2], tr_x, tr_y, ts_x, ts_y)
def plot_history(history):
plt.figure(figsize=(15, 5))
ax = plt.subplot(1, 2, 1)
plt.plot(history.history["loss"])
plt.title("Train loss")
ax = plt.subplot(1, 2, 2)
plt.plot(history.history["val_loss"])
plt.title("Test loss")
sample_data = pd.read_csv("sample_data.csv")
sample_data = sample_data.drop('time', axis=1)
epochs = 100
batch_size = 32
window_length = 4
early_stop = tf.keras.callbacks.EarlyStopping(
monitor='val_loss', min_delta=1e-2, patience=5, verbose=0, mode='auto',
baseline=None, restore_best_weights=True)
feats, X, Y, XX, YY = generate_datasets_for_training(sample_data, 4)
print('feats:' + str(feats))
print("X shape: " + str(X.shape))
print("Y shape: " + str(Y.shape))
print("XX shape: " + str(XX.shape))
print("YY shape: " + str(YY.shape))
model = keras.Sequential()
model.add(keras.layers.LSTM(64, kernel_initializer='he_uniform', batch_input_shape=(None, window_length, feats), return_sequences=True, name='encoder_1'))
model.add(keras.layers.LSTM(32, kernel_initializer='he_uniform', return_sequences=True, name='encoder_2'))
model.add(keras.layers.LSTM(16, kernel_initializer='he_uniform', return_sequences=False, name='encoder_3'))
model.add(keras.layers.RepeatVector(window_length, name='encoder_decoder_bridge'))
model.add(keras.layers.LSTM(16, kernel_initializer='he_uniform', return_sequences=True, name='decoder_1'))
model.add(keras.layers.LSTM(32, kernel_initializer='he_uniform', return_sequences=True, name='decoder_2'))
model.add(keras.layers.LSTM(64, kernel_initializer='he_uniform', return_sequences=True, name='decoder_3'))
model.add(keras.layers.TimeDistributed(keras.layers.Dense(feats)))
model.compile(loss="mse",optimizer='adam')
model.build()
print(model.summary())
history = model.fit(x=X, y=Y, validation_data=(XX, YY), epochs=100, batch_size=batch_size, shuffle=True, callbacks=[early_stop])
print(history)
plot_history(history)
这是我运行此操作时遇到的错误:
ValueError: Data cardinality is ambiguous:
x sizes: 21596
y sizes: 7199
Please provide data which shares the same first dimension.
当我打印出输入数据的形状时,我得到的是:
X shape: (21596, 4, 119)
Y shape: (7199, 4, 119)
XX shape: (21596, 4, 119)
YY shape: (7199, 4, 119)
感谢所有能帮忙的人,非常感谢!:)
我认为这是您的
model.fit()
中的一个错误,最好尝试以下方法:您必须在训练和验证数据集中获得相同数量的样本
相关问题 更多 >
编程相关推荐