numpy数据帧数据阵列

2024-04-20 02:17:47 发布

您现在位置:Python中文网/ 问答频道 /正文

在下面的最小工作示例中,我使用伪数据(输入的形状)说明了如何创建自动编码器网络,以及numpyndarray进行预测所面临的问题

import numpy as np
import pandas as pd
import random
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import ModelCheckpoint
from sklearn.metrics import mean_squared_error
class SingleEncoder:

    def __init__(self, train, test):
        self.x_train = train
        self.x_test = test
        self.first_dim = 1
        self.second_dim = 100
        self.channels = 4
        self.input_dim = (self.first_dim, self.second_dim, 
               self.channels) #(1, 100, 4)

    def buildModel(self):
        input_layer = self.input_dim
        autoencoder = Sequential()
        activ='relu'

        # encoder
        autoencoder.add(Dense(200,  activation='relu', input_shape=input_layer)) 
        autoencoder.add(Dense(100,  activation='relu')) 
        autoencoder.add(Dense(80,  activation='linear'))   

        #decoder
        autoencoder.add(Dense(80, activation='linear'))  
        autoencoder.add(Dense(100, activation='relu')) 
        autoencoder.add(Dense(200, activation='relu'))
        autoencoder.add(Dense(self.channels, activation='relu'))

        autoencoder.compile(optimizer='adam', loss='mae',  
metrics=['mean_squared_error'])
        autoencoder.summary()

        filepath = "weights.hdf5"
        checkpoint = ModelCheckpoint(filepath, monitor='loss',
 verbose=1, save_best_only=True, mode='max')
        callbacks_list = [checkpoint]

        autoencoder.fit(self.x_train, self.x_train, epochs=10, batch_size=32, 
          shuffle=True,callbacks=callbacks_list)

        return autoencoder

#

网络丢失:

def LosScore2(x_pred, x_test):
    mse = []
    for i in range(len(x_test)):
        mse.append(mean_squared_error(pd.DataFrame(x_pred[i]), pd.DataFrame(x_test[i])))
    return mse

生成假数据:

#Generate sample data, the shape of expected input
# X: ND-Array containing fixed-length segments of shape (1,100,4)
X = np.random.randn(2000, 1, 100, 4)
a,b,c = np.repeat(0, 700), np.repeat(1, 700), np.repeat(2, 600)
y = np.hstack((a,b,c))

from sklearn.model_selection import  train_test_split

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=7)
LABELS= list(set(np.ndarray.flatten(y_train)))

培训网络:

models = []
for i in range(len(LABELS)):
  print(LABELS[i])
  sub_train = x_train[y_train == i]
  sub_test = x_test[y_test == i]
  
  autoencoder = SingleEncoder(sub_train, sub_test)
  autoencoder = autoencoder.buildModel()
  models.append(autoencoder)

这个MWE工作得很好,但是当我尝试对测试集求值时,由于输入形状,我遇到了一个错误,如下所示:

print("Evaluating on test set -> ")
x_pred = []
# for each model
 # predition
for e in range(len(models)):
    x_pred.append(models[e].predict(x_test))

scored0 = (LosScore2(x_pred[0], x_test))
scored1 = (LosScore2(x_pred[1], x_test))
scored2 = (LosScore2(x_pred[2], x_test))

下面是错误:

Evaluating on test set -> 

---------------------------------------------------------------------------

ValueError                                Traceback (most recent call last)

<ipython-input-11-989c507cfd63> in <module>()
      9 #scored0 = (LosScore2(x_pred[0][np.newaxis], x_test))
     10 
---> 11 scored0 = (LosScore2(x_pred[0], x_test))
     12 scored1 = (LosScore2(x_pred[1], x_test))
     13 scored2 = (LosScore2(x_pred[2], x_test))

3 frames

<ipython-input-9-356737ea1f97> in LosScore2(x_pred, x_test)
      2     mse = []
      3     for i in range(len(x_test)):
----> 4         mse.append(mean_squared_error(pd.DataFrame(x_pred[i]), pd.DataFrame(x_test[i])))
      5     return mse

/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in __init__(self, data, index, columns, dtype, copy)
    462                 mgr = init_dict({data.name: data}, index, columns, dtype=dtype)
    463             else:
--> 464                 mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy)
    465 
    466         # For data is list-like, or Iterable (will consume into list)

/usr/local/lib/python3.6/dist-packages/pandas/core/internals/construction.py in init_ndarray(values, index, columns, dtype, copy)
    167     # by definition an array here
    168     # the dtypes will be coerced to a single dtype
--> 169     values = prep_ndarray(values, copy=copy)
    170 
    171     if dtype is not None:

/usr/local/lib/python3.6/dist-packages/pandas/core/internals/construction.py in prep_ndarray(values, copy)
    293         values = values.reshape((values.shape[0], 1))
    294     elif values.ndim != 2:
--> 295         raise ValueError("Must pass 2-d input")
    296 
    297     return values

ValueError: Must pass 2-d input

我知道这与测试数据形状有关,但我不知道如何修复它


Tags: intestimportselfaddinputnptrain
2条回答

您是否尝试过:

scored0 = (LosScore2(x_pred, x_test))

而不是:

scored0 = (LosScore2(x_pred[0], x_test))

如您所见,在以下几行中:

usr/local/lib/python3.6/dist-packages/pandas/core/internals/construction.py in prep_ndarray(values, copy)
    293         values = values.reshape((values.shape[0], 1))
    294     elif values.ndim != 2:
 > 295         raise ValueError("Must pass 2-d input")
    296 
    297     return values

正在进行重塑,并且正在选择值的第一部分

我想可能是这样

您正在使用sklearn mse和4D输入将它们转换为熊猫数据帧,这将生成一系列错误。我建议您保持简单的numpy格式,以计算每个样本的mse:

def LosScore2(x_pred, x_test):
    mse = []
    for i in range(len(x_test)):
        mse.append(np.mean(np.square(x_test[i]-x_pred[i])))
    return mse

这里是跑步笔记本:https://colab.research.google.com/drive/1OWdTYuIVeIWyMp477DoNNNKJ3ZXSoDji?usp=sharing

相关问题 更多 >