我想知道在SKlearn with Pipeline中使用onehotcoder时,它是否会删除原始的分类列。因为我看不到随机森林算法中有什么东西
#%%
numerical_features =(sorted(X_train2.select_dtypes(include=['float64']).columns))
categorical_features = (sorted(X_train2.select_dtypes(exclude=['float64']).columns))
#%%
values_after_ros=pd.DataFrame(y_train)
values_after_ros1=values_after_ros[0].value_counts()
#%%
preprocess = make_column_transformer((StandardScaler(),numerical_features),(OneHotEncoder(handle_unknown='ignore'),categorical_features))
model = make_pipeline(
preprocess,RandomForestClassifier(verbose=250))
print(model)
#%%
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_validate
from sklearn.metrics import f1_score
f1= {'f11': make_scorer(f1_score,average=None,labels=[0]),
'f12': make_scorer(f1_score,average=None,labels=[1]),
'f13': make_scorer(f1_score,average=None,labels=[2]),
'f14': make_scorer(f1_score,average=None,labels=[3]),
'f15': make_scorer(f1_score,average=None,labels=[4]),
'f16': make_scorer(f1_score,average=None,labels=[5]),
'f17': make_scorer(f1_score,average=None,labels=[6]),
'refit_score': make_scorer(f1_score,average='micro'),
}
param_grid = {
}
#%%
grid_clf = RandomizedSearchCV(model , param_grid, cv=2,scoring=f1,refit='refit_score' ,n_iter=1,verbose=250, random_state=42)
#%%
grid_clf.fit(ros_xtrain, ros_train_y)
#%%
OHE在转换后不输出原始列。下面是一个例子。你知道吗
输出
相关问题 更多 >
编程相关推荐