创建一个交叉验证函数来确定每个预测模型的均方根误差(RMSPE)

2024-04-16 12:17:44 发布

您现在位置:Python中文网/ 问答频道 /正文

我在创建python函数方面是新手。我试图提出一个交叉有效的函数来确定不同预测模型的均方根误差百分比(RMSPE)。附件是我的代码片段:

y = mod_coal.loc[:,"Replacement_cost_USD"]
mod_coal = mod_coal.drop('Replacement_cost_USD', axis = 1)

###Do a cross valiation function for model accuracy
def cross_valid(n_cv_estimates, method):

    CV_list = np.repeat(0, n_cv_estimates) 
    for k in range(1,n_cv_estimates):
        mod_coal_train, mod_coal_test, y_train, y_test = train_test_split(mod_coal, y, test_size = 0.2, random_state = 4240)

        if method == "LM":
            #Fit a simple linear regression model
            lm = LinearRegression()
            lm_model = lm.fit(mod_coal_train,y_train)
            lm_predictions = lm.predict(mod_coal_test)
            RMSPE = np.sqrt(np.mean((lm_predictions - y_test)/y_test)**2)
            CV_list[k] = RMSPE

        elif method == 'Ridge':
            #Fit a Ridge regression model

            #Find optimal alpha
            ridge_parameters = RidgeCV(alphas = [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 1, 5, 10, 20])
            ridge_param_model = ridge_parameters.fit(mod_coal_train, y_train)

            #Optimal alpha is 5 for ridge regression
            ridge = Ridge(alpha=5.0)
            ridge_model = ridge.fit(mod_coal_train, y_train)
            ridge_predictions = ridge.predict(mod_coal_test)
            RMSPE = np.sqrt(np.mean((ridge_predictions - y_test)/y_test)**2)
            CV_list[k] = RMSPE

        elif method == 'Lasso':
            #Fit a LASSO regression model

            #Find optimal alpha
            lasso_parameters = LassoCV(alphas=None, cv=10, max_iter = 1000, normalize=True)
            lasso_param_model = lasso_parameters.fit(mod_coal_train, y_train)

            #Optimal alpha is 215961 for Lasso regression
            lasso = Lasso(alpha=215961) 
            lasso_model = lasso.fit(mod_coal_train, y_train)
            lasso_predictions = lasso.predict(mod_coal_test)
            RMSPE = np.sqrt(np.mean((lasso_predictions - y_test)/y_test)**2)
            CV_list[k] = RMSPE

        elif method == "randomForest":

            #Fit a randomforest model
            rf = RandomForestRegressor(n_estimators = 1000, random_state = 4240)
            rf_model = rf.fit(mod_coal_train, y_train)
            rf_predictions = rf.predict(mod_coal_test)
            RMSPE = np.sqrt(np.mean((rf_predictions - y_test)/y_test)**2)
            CV_list[k] = RMSPE

        elif method == "gbm":

            #Fit a gradient boosting model
            gbm = GradientBoostingRegressor(n_estimators = 1000)
            gbm_model = gbm.fit(mod_coal_train, y_train)
            gbm_predictions = gbm.predict(mod_coal_test)
            RMSPE = np.sqrt(np.mean((gbm_predictions - y_test)/y_test)**2)
            CV_list[k] = RMSPE

        return CV_list 

random.seed(4240)
lm_cv = cross_valid(1000000,"LM")
ridge_cv = cross_valid(1000000, "Ridge")
lasso_cv = cross_valid(1000000, "Lasso")
rf_cv = cross_valid(1000000, "randomForest")
gbm_cv = cross_valid(1000000, "gbm")

print(lm_cv)
print(ridge_cv)
print(lasso_cv)
print(rf_cv)
print(gbm_cv)

实际上,我希望在CV\u list变量中附加一个RMSPE值列表。但是,当我试图打印lmïcv、ridgeïcv、lassoïcv、rfïcv和gbmïcv时,得到的列表是0。我现在不知道我的代码哪里出错了。感谢您的帮助。你知道吗


Tags: testmodmodelnptraincvlmridge