我正在尝试使用多重输出回归使用trainxgboost运行Python代码,我得到了一个值错误。谢谢你的帮助
请找到我的数据样本
Layers Model Technique Accuracy-1 Accuracy-2 Latency time
18-27 Net 1 0.96 0.99 334368.0 0.99
38-37 MNet 1 0.76 0.99 313348.0 0.99
下面是我使用XGBoost的代码
def optimize(trial,x,y,regressor):
max_depth = trial.suggest_int("max_depth",3,30)
n_estimators = trial.suggest_int("n_estimators",100,3000)
max_leaves= trial.suggest_int("max_leaves",1,10)
colsample_bytree = trial.suggest_uniform('colsample_bytree', 0.0, 1.0)
gamma = trial.suggest_uniform('gamma', 0.0, 0.05)
min_child_weight = trial.suggest_uniform('min_child_weight',1,3)
reg_lambda = trial.suggest_uniform('reg_lambda',0.5,1)
model = xgb.XGBRegressor(
objective ='reg:squarederror',
n_estimators=n_estimators,
max_depth=max_depth,
learning_rate=learning_rate,
colsample_bytree=colsample_bytree,
gamma=gamma,
min_child_weight=min_child_weight,
reg_lambda=reg_lambda,
max_leaves=max_leaves)
kf=model_selection.KFold(n_splits=5)
error=[]
for idx in kf.split(X=x , y=y):
train_idx , test_idx= idx[0],idx[1]
xtrain=x[train_idx]
ytrain=y[train_idx]
xtest=x[test_idx]
ytest=y[test_idx]
model.fit(x,y)
y_pred = model.predict(xtest)
fold_err = metrics.mean_squared_error(ytest,y_pred)
error.append(fold_err)
return np.mean(error)
def optimize_xgb(X,y):
list_of_y = ["Target 1","Target 2", "Target 3","Target 4"]
for i,m in zip(range(y.shape[1]),list_of_y):
print("{} optimized Parameters on MSE Error".format(m))
optimization_function = partial(optimize , x=X,y=y[:,i],regressor="random_forest")
study = optuna.create_study(direction="minimize")
study.optimize(optimization_function,n_trials=1)
data["Latency"] = minmax_scale(data["Latency"])
X = data[["Layers ","Model"]]
Y = data[['Accuracy-1', 'Accuracy-2','Latency', 'time ']]
encoder = OneHotEncoder(sparse=False)
onehot = encoder.fit_transform(X)
X_encoded = encoder.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(
np.array(X_encoded), np.array(Y), test_size=0.3, random_state=42)
def modeling(X,y,max_depth=10,n_estimators=300,max_leaves=10,
learning_rate=0.01,colsample_bytree=0.001,gamma=0.0001,min_child_weight=2,
reg_lambda=0.3):
model = xgb.XGBRegressor(objective='reg:squarederror',
n_estimators=n_estimators,
max_depth=max_depth,
max_leaves=max_leaves,
learning_rate=learning_rate,
gamma=gamma,
min_child_weight=min_child_weight,
colsample_bytree=colsample_bytree)
if y.shape[1] ==1:
print(" Apply Xgboost for one single Target....\n")
model_xgb = model.fit(X, y)
else:
print(" Apply Xgboost for {} Targets....".format(y.shape[1]))
model_xgb = MOR(model).fit(X, y)
cv = RepeatedKFold(n_splits=5, n_repeats=3, random_state=1)
scores = []
for i in range(y.shape[1]):
scores.append(np.abs(cross_val_score(model, X, y[:,i], scoring='neg_mean_squared_error', cv=cv, n_jobs=-1)))
print('Mean MSE of the {} target : {} ({})'.format(i,scores[i].mean(), scores[i].std()) )
return model_xgb
model_xgb = modeling(X_train,y_train,optimize="no")
y_estimated = model_xgb.predict(X_test)
mse(y_estimated,y_test)
################
y = np.random.random((1000,1))
model_xgb = modeling(X,y,optimize="no")
检索ValueError时出错:数据的DataFrame.d类型必须为int、float或bool。 不希望字段、图层、模型中出现数据类型
目前没有回答
相关问题 更多 >
编程相关推荐