无法解决错误消息“预期为2D数组，改为1D数组”？

import pandas as pd from sklearn.ensemble import RandomForestRegressor from sklearn.linear_model import LassoLarsCV from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline, make_union from sklearn.preprocessing import Normalizer from tpot.builtins import StackingEstimator from sklearn.preprocessing import MinMaxScaler from sklearn import preprocessing y = data['y1'] x = data[['x1','x2','x3','x4','x5']] x.values.ravel() x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.2,random_state=0) # Average CV score on the training set was: -0.1116338317020572 exported_pipeline = make_pipeline( Normalizer(norm="max"), StackingEstimator(estimator=LassoLarsCV(normalize=True)), RandomForestRegressor(bootstrap=False, max_features=0.5, min_samples_leaf=1, min_samples_split=19, n_estimators=100) ) exported_pipeline.fit(x_train, y_train) results = exported_pipeline.predict(y_test) print(np.mean(abs(y_test-results)))

--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-93-5e4ac0c63791> in <module> 28 29 exported_pipeline.fit(x_train, y_train) ---> 30 results = exported_pipeline.predict(y_test) 31 print(np.mean(abs(y_test-results))) ~/anaconda3/lib/python3.8/site-packages/sklearn/utils/metaestimators.py in <lambda>(*args, **kwargs) 114 115 # lambda, but not partial, allows help() to work with update_wrapper --> 116 out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs) 117 # update the docstring of the returned function 118 update_wrapper(out, self.fn) ~/anaconda3/lib/python3.8/site-packages/sklearn/pipeline.py in predict(self, X, **predict_params) 417 Xt = X 418 for _, name, transform in self._iter(with_final=False): --> 419 Xt = transform.transform(Xt) 420 return self.steps[-1][-1].predict(Xt, **predict_params) 421 ~/anaconda3/lib/python3.8/site-packages/sklearn/preprocessing/_data.py in transform(self, X, copy) 1827 """ 1828 copy = copy if copy is not None else self.copy -> 1829 X = check_array(X, accept_sparse='csr') 1830 return normalize(X, norm=self.norm, axis=1, copy=copy) 1831 ~/anaconda3/lib/python3.8/site-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator) 550 # If input is 1D raise error 551 if array.ndim == 1: --> 552 raise ValueError( 553 "Expected 2D array, got 1D array instead:\narray={}.\n" 554 "Reshape your data either using array.reshape(-1, 1) if " ValueError: Expected 2D array, got 1D array instead: array=[-0.54719445 0.01222733 0.89720391 ... -1.22633808 -0.19243653 -0.1420281 ]. Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

ValueError Traceback (most recent call last) <ipython-input-96-fb56ff22e193> in <module> 27 ) 28 ---> 29 exported_pipeline.fit(x_train.values.reshape(-1,1), y_train) 30 results = exported_pipeline.predict(y_test) 31 print(np.mean(abs(y_test-results))) ~/anaconda3/lib/python3.8/site-packages/sklearn/pipeline.py in fit(self, X, y, **fit_params) 348 This estimator 349 """ --> 350 Xt, fit_params = self._fit(X, y, **fit_params) 351 with _print_elapsed_time('Pipeline', 352 self._log_message(len(self.steps) - 1)): ~/anaconda3/lib/python3.8/site-packages/sklearn/pipeline.py in _fit(self, X, y, **fit_params) 309 cloned_transformer = clone(transformer) 310 # Fit or load from cache the current transformer --> 311 X, fitted_transformer = fit_transform_one_cached( 312 cloned_transformer, X, y, None, 313 message_clsname='Pipeline', ~/.local/lib/python3.8/site-packages/joblib/memory.py in __call__(self, *args, **kwargs) 350 351 def __call__(self, *args, **kwargs): --> 352 return self.func(*args, **kwargs) 353 354 def call_and_shelve(self, *args, **kwargs): ~/anaconda3/lib/python3.8/site-packages/sklearn/pipeline.py in _fit_transform_one(transformer, X, y, weight, message_clsname, message, **fit_params) 726 with _print_elapsed_time(message_clsname, message): 727 if hasattr(transformer, 'fit_transform'): --> 728 res = transformer.fit_transform(X, y, **fit_params) 729 else: 730 res = transformer.fit(X, y, **fit_params).transform(X) ~/anaconda3/lib/python3.8/site-packages/sklearn/base.py in fit_transform(self, X, y, **fit_params) 572 else: 573 # fit method of arity 2 (supervised transformation) --> 574 return self.fit(X, y, **fit_params).transform(X) 575 576 ~/anaconda3/lib/python3.8/site-packages/tpot/builtins/stacking_estimator.py in fit(self, X, y, **fit_params) 65 Returns a copy of the estimator 66 """ ---> 67 self.estimator.fit(X, y, **fit_params) 68 return self 69 ~/anaconda3/lib/python3.8/site-packages/sklearn/linear_model/_least_angle.py in fit(self, X, y) 1378 returns an instance of self. 1379 """ -> 1380 X, y = check_X_y(X, y, y_numeric=True) 1381 X = as_float_array(X, copy=self.copy_X) 1382 y = as_float_array(y, copy=self.copy_X) ~/anaconda3/lib/python3.8/site-packages/sklearn/utils/validation.py in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator) 763 y = y.astype(np.float64) 764 --> 765 check_consistent_length(X, y) 766 767 return X, y ~/anaconda3/lib/python3.8/site-packages/sklearn/utils/validation.py in check_consistent_length(*arrays) 209 uniques = np.unique(lengths) 210 if len(uniques) > 1: --> 211 raise ValueError("Found input variables with inconsistent numbers of" 212 " samples: %r" % [int(l) for l in lengths]) 213 ValueError: Found input variables with inconsistent numbers of samples: [120498, 5738]

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.2,random_state=0) x_train.values.reshape(-1,1) y_train.values.reshape(-1,1) # Average CV score on the training set was: -0.1116338317020572 exported_pipeline = make_pipeline( Normalizer(norm="max"), StackingEstimator(estimator=LassoLarsCV(normalize=True)), RandomForestRegressor(bootstrap=False, max_features=0.5, min_samples_leaf=1, min_samples_split=19, n_estimators=100) ) exported_pipeline.fit(x_train, y_train) results = exported_pipeline.predict(y_test) print(np.mean(abs(y_test-results)))

x.values.reshape(-1,1) y.values.reshape(-1,1) x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.2,random_state=0) # Average CV score on the training set was: -0.1116338317020572 exported_pipeline = make_pipeline( Normalizer(norm="max"), StackingEstimator(estimator=LassoLarsCV(normalize=True)), RandomForestRegressor(bootstrap=False, max_features=0.5, min_samples_leaf=1, min_samples_split=19, n_estimators=100) ) exported_pipeline.fit(x_train, y_train) results = exported_pipeline.predict(y_test) print(np.mean(abs(y_test-results)))

1条回答

网友

1楼 · 发布于 2024-09-21 00:18:30

预测通常基于x值而不是y值。因此，我认为正确的路线应该是：

results = exported_pipeline.predict(x_test)

换句话说，你从x预测y，而不是相反。正如错误消息所示，x通常是一个二维数组

相关问题更多 >

编程相关推荐

热门问题

热门文章