尽管设置了随机状态和种子，但无法从持久XGBClassifier复制概率分数

XGBClassifier(base_score=0.5, booster='gbtree',colsample_bylevel=1, colsample_bytree=0.8, gamma=1, learning_rate=0.01, max_delta_step=0, max_depth=4, min_child_weight=1, missing=nan, n_estimators=1500, n_jobs=-1, nthread=None, objective='binary:logistic', random_state=777, reg_alpha=2, reg_lambda=1, scale_pos_weight=0.971637216356233, seed=777, silent=True, subsample=0.6, verbose=2)

def save_as_pickled_object(obj, filepath): import pickle import os import sys """ This is a defensive way to write pickle.write, allowing for very large files on all platforms """ max_bytes = 2**31 - 1 """ Adding protocol = 4 as an argument to pickle.dumps because it allows for seralizing data greater than 4GB reference link: https://stackoverflow.com/questions/29704139/pickle-in-python3-doesnt-work-for-large-data-saving """ bytes_out = pickle.dumps(obj, protocol=4) n_bytes = sys.getsizeof(bytes_out) with open(filepath, 'wb') as f_out: for idx in range(0, n_bytes, max_bytes): f_out.write(bytes_out[idx:idx+max_bytes]) def try_to_load_as_pickled_object_or_None(filepath): import pickle import os import sys """ This is a defensive way to write pickle.load, allowing for very large files on all platforms """ max_bytes = 2**31 - 1 try: input_size = os.path.getsize(filepath) bytes_in = bytearray(0) with open(filepath, 'rb') as f_in: for _ in range(0, input_size, max_bytes): bytes_in += f_in.read(max_bytes) obj = pickle.loads(bytes_in) except: return None return obj

1条回答

网友

1楼 · 发布于 2024-04-26 11:35:32

我想，您转储和读取模型的方式可能有点滑稽，或者您使用的是xgboost版本的一个特性。你知道吗

我可以通过在笔记本中使用以下代码简单地加载持久化的XGB模型（重复“加载模型”部分和内核重启后的初始导入）来完全重现预测的概率

import os
import numpy as np
import pandas as pd
import pickle
import joblib
import xgboost as xgb


## Training a model
np.random.seed(312)
train_X = np.random.random((10000,10))
train_y = np.random.randint(0,2, train_X.shape[0])
val_X = np.random.random((10000,10))
val_y = np.random.randint(0,2, train_y.shape[0])

xgb_model_mpg = xgb.XGBClassifier(max_depth= 3)
_ = xgb_model_mpg.fit(train_X, train_y)
print(xgb_model_mpg.predict_proba(val_X))


## Save the model
with open('m.pkl', 'wb') as fout:
    pickle.dump(xgb_model_mpg, fout)

joblib_dump(xgb_model_mpg, 'm.jlib')


## Load the model
m_jlb = joblib.load('m.jlib')
m_pkl = pickle.load( open( "m.pkl", "rb" ) )

print(m_jlb.predict_proba(val_X))
print(m_pkl.predict_proba(val_X))

我在python 3.5.5的普通jupyter笔记本中使用xgboost 0.71和joblib 0.12.4

相关问题更多 >

编程相关推荐

热门问题

热门文章