我不知道该怎么做才能让这个模型工作。它说要重塑,但我已经这样做了,但我得到了一个不一致的样本数据错误。我不知道这是怎么发生的。我运行过其他模型,但我不明白为什么现在会发生这种情况
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LassoLarsCV
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline, make_union
from sklearn.preprocessing import Normalizer
from tpot.builtins import StackingEstimator
from sklearn.preprocessing import MinMaxScaler
from sklearn import preprocessing
y = data['y1']
x = data[['x1','x2','x3','x4','x5']]
x.values.ravel()
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.2,random_state=0)
# Average CV score on the training set was: -0.1116338317020572
exported_pipeline = make_pipeline(
Normalizer(norm="max"),
StackingEstimator(estimator=LassoLarsCV(normalize=True)),
RandomForestRegressor(bootstrap=False, max_features=0.5, min_samples_leaf=1, min_samples_split=19, n_estimators=100)
)
exported_pipeline.fit(x_train, y_train)
results = exported_pipeline.predict(y_test)
print(np.mean(abs(y_test-results)))
这个代码给了我这个错误
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-93-5e4ac0c63791> in <module>
28
29 exported_pipeline.fit(x_train, y_train)
---> 30 results = exported_pipeline.predict(y_test)
31 print(np.mean(abs(y_test-results)))
~/anaconda3/lib/python3.8/site-packages/sklearn/utils/metaestimators.py in <lambda>(*args, **kwargs)
114
115 # lambda, but not partial, allows help() to work with update_wrapper
--> 116 out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)
117 # update the docstring of the returned function
118 update_wrapper(out, self.fn)
~/anaconda3/lib/python3.8/site-packages/sklearn/pipeline.py in predict(self, X, **predict_params)
417 Xt = X
418 for _, name, transform in self._iter(with_final=False):
--> 419 Xt = transform.transform(Xt)
420 return self.steps[-1][-1].predict(Xt, **predict_params)
421
~/anaconda3/lib/python3.8/site-packages/sklearn/preprocessing/_data.py in transform(self, X, copy)
1827 """
1828 copy = copy if copy is not None else self.copy
-> 1829 X = check_array(X, accept_sparse='csr')
1830 return normalize(X, norm=self.norm, axis=1, copy=copy)
1831
~/anaconda3/lib/python3.8/site-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
550 # If input is 1D raise error
551 if array.ndim == 1:
--> 552 raise ValueError(
553 "Expected 2D array, got 1D array instead:\narray={}.\n"
554 "Reshape your data either using array.reshape(-1, 1) if "
ValueError: Expected 2D array, got 1D array instead:
array=[-0.54719445 0.01222733 0.89720391 ... -1.22633808 -0.19243653
-0.1420281 ].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.
我已经尝试了数组的几种变体。根据我在stackoverflow上读到的内容,重塑(-1,1),但它们似乎都不起作用。我为配合零件添加了一个values.reforme
exported_pipeline.fit(x_train.values.reshape(-1,1), y_train)
results = exported_pipeline.predict(y_test)
print(np.mean(abs(y_test-results)))
结果就是这个错误
ValueError Traceback (most recent call last)
<ipython-input-96-fb56ff22e193> in <module>
27 )
28
---> 29 exported_pipeline.fit(x_train.values.reshape(-1,1), y_train)
30 results = exported_pipeline.predict(y_test)
31 print(np.mean(abs(y_test-results)))
~/anaconda3/lib/python3.8/site-packages/sklearn/pipeline.py in fit(self, X, y, **fit_params)
348 This estimator
349 """
--> 350 Xt, fit_params = self._fit(X, y, **fit_params)
351 with _print_elapsed_time('Pipeline',
352 self._log_message(len(self.steps) - 1)):
~/anaconda3/lib/python3.8/site-packages/sklearn/pipeline.py in _fit(self, X, y, **fit_params)
309 cloned_transformer = clone(transformer)
310 # Fit or load from cache the current transformer
--> 311 X, fitted_transformer = fit_transform_one_cached(
312 cloned_transformer, X, y, None,
313 message_clsname='Pipeline',
~/.local/lib/python3.8/site-packages/joblib/memory.py in __call__(self, *args, **kwargs)
350
351 def __call__(self, *args, **kwargs):
--> 352 return self.func(*args, **kwargs)
353
354 def call_and_shelve(self, *args, **kwargs):
~/anaconda3/lib/python3.8/site-packages/sklearn/pipeline.py in _fit_transform_one(transformer, X, y, weight, message_clsname, message, **fit_params)
726 with _print_elapsed_time(message_clsname, message):
727 if hasattr(transformer, 'fit_transform'):
--> 728 res = transformer.fit_transform(X, y, **fit_params)
729 else:
730 res = transformer.fit(X, y, **fit_params).transform(X)
~/anaconda3/lib/python3.8/site-packages/sklearn/base.py in fit_transform(self, X, y, **fit_params)
572 else:
573 # fit method of arity 2 (supervised transformation)
--> 574 return self.fit(X, y, **fit_params).transform(X)
575
576
~/anaconda3/lib/python3.8/site-packages/tpot/builtins/stacking_estimator.py in fit(self, X, y, **fit_params)
65 Returns a copy of the estimator
66 """
---> 67 self.estimator.fit(X, y, **fit_params)
68 return self
69
~/anaconda3/lib/python3.8/site-packages/sklearn/linear_model/_least_angle.py in fit(self, X, y)
1378 returns an instance of self.
1379 """
-> 1380 X, y = check_X_y(X, y, y_numeric=True)
1381 X = as_float_array(X, copy=self.copy_X)
1382 y = as_float_array(y, copy=self.copy_X)
~/anaconda3/lib/python3.8/site-packages/sklearn/utils/validation.py in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)
763 y = y.astype(np.float64)
764
--> 765 check_consistent_length(X, y)
766
767 return X, y
~/anaconda3/lib/python3.8/site-packages/sklearn/utils/validation.py in check_consistent_length(*arrays)
209 uniques = np.unique(lengths)
210 if len(uniques) > 1:
--> 211 raise ValueError("Found input variables with inconsistent numbers of"
212 " samples: %r" % [int(l) for l in lengths])
213
ValueError: Found input variables with inconsistent numbers of samples: [120498, 5738]
然后我添加了这些值。重塑(-1,1)到这两个值,得到了预期2D数组的错误,得到了1D
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.2,random_state=0)
x_train.values.reshape(-1,1)
y_train.values.reshape(-1,1)
# Average CV score on the training set was: -0.1116338317020572
exported_pipeline = make_pipeline(
Normalizer(norm="max"),
StackingEstimator(estimator=LassoLarsCV(normalize=True)),
RandomForestRegressor(bootstrap=False, max_features=0.5, min_samples_leaf=1, min_samples_split=19, n_estimators=100)
)
exported_pipeline.fit(x_train, y_train)
results = exported_pipeline.predict(y_test)
print(np.mean(abs(y_test-results)))
我试过这个
x.values.reshape(-1,1)
y.values.reshape(-1,1)
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.2,random_state=0)
# Average CV score on the training set was: -0.1116338317020572
exported_pipeline = make_pipeline(
Normalizer(norm="max"),
StackingEstimator(estimator=LassoLarsCV(normalize=True)),
RandomForestRegressor(bootstrap=False, max_features=0.5, min_samples_leaf=1, min_samples_split=19, n_estimators=100)
)
exported_pipeline.fit(x_train, y_train)
results = exported_pipeline.predict(y_test)
print(np.mean(abs(y_test-results)))
非常感谢你的帮助。我似乎不明白为什么会发生这种情况,所以非常感谢任何帮助或评论。该模型由tpot制造
预测通常基于x值而不是y值。因此,我认为正确的路线应该是:
换句话说,你从x预测y,而不是相反。正如错误消息所示,x通常是一个二维数组
相关问题 更多 >
编程相关推荐