检索数据的ValueError DataFrame.d类型必须为int、float或bool

2024-05-16 10:38:46 发布

您现在位置:Python中文网/ 问答频道 /正文

我正在尝试使用多重输出回归使用trainxgboost运行Python代码,我得到了一个值错误。谢谢你的帮助

请找到我的数据样本

Layers  Model  Technique    Accuracy-1  Accuracy-2  Latency    time
18-27   Net     1           0.96         0.99       334368.0    0.99
38-37   MNet    1           0.76         0.99       313348.0    0.99

下面是我使用XGBoost的代码

def optimize(trial,x,y,regressor):
  max_depth = trial.suggest_int("max_depth",3,30)
  n_estimators = trial.suggest_int("n_estimators",100,3000)
  max_leaves= trial.suggest_int("max_leaves",1,10)
  colsample_bytree = trial.suggest_uniform('colsample_bytree', 0.0, 1.0) 
  gamma = trial.suggest_uniform('gamma', 0.0, 0.05)  
  min_child_weight = trial.suggest_uniform('min_child_weight',1,3)
  reg_lambda = trial.suggest_uniform('reg_lambda',0.5,1)
  model = xgb.XGBRegressor(
    objective ='reg:squarederror',
    n_estimators=n_estimators,
    max_depth=max_depth,
    learning_rate=learning_rate,
    colsample_bytree=colsample_bytree,
    gamma=gamma,
    min_child_weight=min_child_weight,
    reg_lambda=reg_lambda,
    max_leaves=max_leaves)
  kf=model_selection.KFold(n_splits=5)
  error=[]
  for idx in kf.split(X=x , y=y):
    train_idx , test_idx= idx[0],idx[1]
    xtrain=x[train_idx]
    ytrain=y[train_idx]
    xtest=x[test_idx]
    ytest=y[test_idx]   
    model.fit(x,y)
    y_pred = model.predict(xtest)
    fold_err = metrics.mean_squared_error(ytest,y_pred)
    error.append(fold_err)
  return np.mean(error)

def optimize_xgb(X,y):
  list_of_y = ["Target 1","Target 2", "Target 3","Target 4"]
  for i,m in zip(range(y.shape[1]),list_of_y):
    print("{} optimized Parameters on MSE Error".format(m))
    optimization_function = partial(optimize , x=X,y=y[:,i],regressor="random_forest")
    study = optuna.create_study(direction="minimize")
    study.optimize(optimization_function,n_trials=1)
data["Latency"] = minmax_scale(data["Latency"])  
X = data[["Layers ","Model"]]
Y = data[['Accuracy-1', 'Accuracy-2','Latency', 'time ']]
encoder = OneHotEncoder(sparse=False)
onehot = encoder.fit_transform(X)
X_encoded  = encoder.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(
np.array(X_encoded), np.array(Y), test_size=0.3, random_state=42)
def modeling(X,y,max_depth=10,n_estimators=300,max_leaves=10,
             learning_rate=0.01,colsample_bytree=0.001,gamma=0.0001,min_child_weight=2,
             reg_lambda=0.3):
  
    model = xgb.XGBRegressor(objective='reg:squarederror',
                          n_estimators=n_estimators,
                          max_depth=max_depth,
                          max_leaves=max_leaves,
                          learning_rate=learning_rate,
                          gamma=gamma,
                          min_child_weight=min_child_weight,
                          colsample_bytree=colsample_bytree)
  if y.shape[1] ==1:
    print(" Apply Xgboost for one single Target....\n")
    model_xgb = model.fit(X, y)
  else:
    print(" Apply Xgboost for {} Targets....".format(y.shape[1]))
    model_xgb = MOR(model).fit(X, y)
  cv = RepeatedKFold(n_splits=5, n_repeats=3, random_state=1)
  scores = []
  for i in range(y.shape[1]):
    scores.append(np.abs(cross_val_score(model, X, y[:,i], scoring='neg_mean_squared_error', cv=cv, n_jobs=-1)))
    print('Mean MSE of the {} target : {}  ({})'.format(i,scores[i].mean(), scores[i].std()) )
  return model_xgb
model_xgb = modeling(X_train,y_train,optimize="no")
y_estimated = model_xgb.predict(X_test)
mse(y_estimated,y_test)
################
y = np.random.random((1000,1))
model_xgb = modeling(X,y,optimize="no")

检索ValueError时出错:数据的DataFrame.d类型必须为int、float或bool。 不希望字段、图层、模型中出现数据类型


Tags: testmodeltrainminmaxleavessuggestdepth