在下面的最小工作示例中,我使用伪数据(输入的形状)说明了如何创建自动编码器网络,以及numpy
ndarray进行预测所面临的问题
import numpy as np
import pandas as pd
import random
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import ModelCheckpoint
from sklearn.metrics import mean_squared_error
class SingleEncoder:
def __init__(self, train, test):
self.x_train = train
self.x_test = test
self.first_dim = 1
self.second_dim = 100
self.channels = 4
self.input_dim = (self.first_dim, self.second_dim,
self.channels) #(1, 100, 4)
def buildModel(self):
input_layer = self.input_dim
autoencoder = Sequential()
activ='relu'
# encoder
autoencoder.add(Dense(200, activation='relu', input_shape=input_layer))
autoencoder.add(Dense(100, activation='relu'))
autoencoder.add(Dense(80, activation='linear'))
#decoder
autoencoder.add(Dense(80, activation='linear'))
autoencoder.add(Dense(100, activation='relu'))
autoencoder.add(Dense(200, activation='relu'))
autoencoder.add(Dense(self.channels, activation='relu'))
autoencoder.compile(optimizer='adam', loss='mae',
metrics=['mean_squared_error'])
autoencoder.summary()
filepath = "weights.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss',
verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]
autoencoder.fit(self.x_train, self.x_train, epochs=10, batch_size=32,
shuffle=True,callbacks=callbacks_list)
return autoencoder
#
网络丢失:
def LosScore2(x_pred, x_test):
mse = []
for i in range(len(x_test)):
mse.append(mean_squared_error(pd.DataFrame(x_pred[i]), pd.DataFrame(x_test[i])))
return mse
生成假数据:
#Generate sample data, the shape of expected input
# X: ND-Array containing fixed-length segments of shape (1,100,4)
X = np.random.randn(2000, 1, 100, 4)
a,b,c = np.repeat(0, 700), np.repeat(1, 700), np.repeat(2, 600)
y = np.hstack((a,b,c))
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=7)
LABELS= list(set(np.ndarray.flatten(y_train)))
培训网络:
models = []
for i in range(len(LABELS)):
print(LABELS[i])
sub_train = x_train[y_train == i]
sub_test = x_test[y_test == i]
autoencoder = SingleEncoder(sub_train, sub_test)
autoencoder = autoencoder.buildModel()
models.append(autoencoder)
这个MWE
工作得很好,但是当我尝试对测试集求值时,由于输入形状,我遇到了一个错误,如下所示:
print("Evaluating on test set -> ")
x_pred = []
# for each model
# predition
for e in range(len(models)):
x_pred.append(models[e].predict(x_test))
scored0 = (LosScore2(x_pred[0], x_test))
scored1 = (LosScore2(x_pred[1], x_test))
scored2 = (LosScore2(x_pred[2], x_test))
下面是错误:
Evaluating on test set ->
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-11-989c507cfd63> in <module>()
9 #scored0 = (LosScore2(x_pred[0][np.newaxis], x_test))
10
---> 11 scored0 = (LosScore2(x_pred[0], x_test))
12 scored1 = (LosScore2(x_pred[1], x_test))
13 scored2 = (LosScore2(x_pred[2], x_test))
3 frames
<ipython-input-9-356737ea1f97> in LosScore2(x_pred, x_test)
2 mse = []
3 for i in range(len(x_test)):
----> 4 mse.append(mean_squared_error(pd.DataFrame(x_pred[i]), pd.DataFrame(x_test[i])))
5 return mse
/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in __init__(self, data, index, columns, dtype, copy)
462 mgr = init_dict({data.name: data}, index, columns, dtype=dtype)
463 else:
--> 464 mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy)
465
466 # For data is list-like, or Iterable (will consume into list)
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/construction.py in init_ndarray(values, index, columns, dtype, copy)
167 # by definition an array here
168 # the dtypes will be coerced to a single dtype
--> 169 values = prep_ndarray(values, copy=copy)
170
171 if dtype is not None:
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/construction.py in prep_ndarray(values, copy)
293 values = values.reshape((values.shape[0], 1))
294 elif values.ndim != 2:
--> 295 raise ValueError("Must pass 2-d input")
296
297 return values
ValueError: Must pass 2-d input
我知道这与测试数据形状有关,但我不知道如何修复它
您是否尝试过:
而不是:
如您所见,在以下几行中:
正在进行重塑,并且正在选择值的第一部分
我想可能是这样
您正在使用sklearn mse和4D输入将它们转换为熊猫数据帧,这将生成一系列错误。我建议您保持简单的numpy格式,以计算每个样本的mse:
这里是跑步笔记本:https://colab.research.google.com/drive/1OWdTYuIVeIWyMp477DoNNNKJ3ZXSoDji?usp=sharing
相关问题 更多 >
编程相关推荐