不平衡学习过采样后训练形状的输出

import imblearn.over_sampling import SMOTE from collections import Counter def oversample(x_values, y_values): oversampler = SMOTE(random_state=42, n_jobs=-1) x_oversampled, y_oversampled = oversampler.fit_resample(x_values, y_values) print("Oversampling training set from {0} to {1} using {2}".format(dict(Counter(y_values)), dict(Counter(y_over_sampled)), oversampling_method)) return x_oversampled, y_oversampled

from imblearn.pipeline import Pipeline from sklearn.preprocessing import MinMaxScaler from sklearn.ensemble import RandomForestClassifier pipe = Pipeline([('scaler', MinMaxScaler()), ('sampler', SMOTE(random_state=42, n_jobs=-1)), ('estimator', RandomForestClassifier())]) pipe.fit(x_values, y_values)

1条回答

网友

1楼 · 发布于 2024-05-29 09:45:26

理论上是的。当安装过采样器时，将创建一个属性sampling_strategy_，其中包含调用fit_resample时要生成的少数类的样本数。您可以使用它获得与上述示例类似的输出。以下是基于您的代码修改的示例：

# Imports
from collections import Counter
from sklearn.datasets import make_classification
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from imblearn.over_sampling import SMOTE    
from imblearn.pipeline import Pipeline

# Create toy dataset
X, y = make_classification(weights=[0.20, 0.80], random_state=0)
init_class_distribution = Counter(y)
min_class_label, _ = init_class_distribution.most_common()[-1]
print(f'Initial class distribution: {dict(init_class_distribution)}')

# Create and fit pipeline
pipe = Pipeline([('scaler', MinMaxScaler()), ('sampler', SMOTE(random_state=42, n_jobs=-1)), ('estimator', RandomForestClassifier(random_state=23))])
pipe.fit(X, y)
sampling_strategy = dict(pipe.steps).get('sampler').sampling_strategy_
expected_n_samples = sampling_strategy.get(min_class_label)
print(f'Expected number of generated samples: {expected_n_samples}')

# Fit and resample over-sampler pipeline
 sampler_pipe = Pipeline(pipe.steps[:-1])
X_res, y_res = sampler_pipe.fit_resample(X, y)
actual_class_distribution = Counter(y_res)
print(f'Actual class distribution: {actual_class_distribution}')

相关问题更多 >

编程相关推荐

热门问题

热门文章