我能在使用鲁棒定标器后对套索回归的截距和系数进行反变换吗？

import pandas as pd from sklearn.preprocessing import RobustScaler from sklearn.linear_model import Lasso df = pd.DataFrame({'Y':[5, -10, 10, .5, 2.5, 15], 'X1':[1., -2., 2., .1, .5, 3], 'X2':[1, 1, 2, 1, 1, 1], 'X3':[6, 6, 6, 5, 6, 4], 'X4':[6, 5, 4, 3, 2, 1]}) X = df[['X1','X2', 'X3' ,'X4']] y = df[['Y']] #Scaling transformer_x = RobustScaler().fit(X) transformer_y = RobustScaler().fit(y) X_scal = transformer_x.transform(X) y_scal = transformer_y.transform(y) #LASSO lasso = Lasso() lasso = lasso.fit(X_scal, y_scal) def pred_val(X1,X2,X3,X4): print('X1 entered: ', X1) #Scale X value that user entered - by hand med_X = X.median() Q1_X = X.quantile(0.25) Q3_X = X.quantile(0.75) IQR_X = Q3_X - Q1_X X_scaled = (X1 - med_X)/IQR_X print('X1 scaled by hand: ', X_scaled[0].round(2)) #Scale X value that user entered - by function X_scaled2 = transformer_x.transform(np.array([[X1,X2]])) print('X1 scaled by function: ', X_scaled2[0][0].round(2)) #Intercept by hand med_y = y.median() Q1_y = y.quantile(0.25) Q3_y = y.quantile(0.75) IQR_y = Q3_y - Q1_y inv_int = med_y + IQR_y*lasso.intercept_[0] #Intercept by function inv_int2 = transformer_y.inverse_transform(lasso.intercept_.reshape(-1, 1))[0][0] #Coefficient by hand inv_coef = lasso.coef_[0]*IQR_y #Coefficient by function inv_coef2 = transformer_x.inverse_transform(reg.coef_.reshape(1,-1))[0] #Prediction by hand preds = inv_int + inv_coef*X_scaled[0] #Prediction by function preds_inner = lasso.predict(X_scaled2) preds_f = transformer_y.inverse_transform(preds_inner.reshape(-1, 1))[0][0] print('\nIntercept by hand: ', inv_int[0].round(2)) print('Intercept by function: ', inv_int2.round(2)) print('\nCoefficients by hand: ', inv_coef[0].round(2)) print('Coefficients by function: ', inv_coef2[0].round(2)) print('\nYour predicted value by hand is: ', preds[0].round(2)) print('Your predicted value by function is: ', preds_f.round(2)) print('Perfect Prediction would be 80') pred_val(10,1,1,1)

Out[1]: X1 entered: 10 X1 scaled by hand: 5.97 X1 scaled by function: 5.97 Intercept by hand: 34.19 Intercept by function: 34.19 Coefficients by hand: 7.6 Coefficients by function: 7.6 Your predicted value by hand is: 79.54 Your predicted value by function is: 79.54 Perfect Prediction would be 80

1条回答

网友

1楼 · 发布于 2024-04-26 12:11:39

基于链接的SO线程，您所要做的就是获得未缩放的预测值。对吗？在

如果是，那么您需要做的就是：

# Scale the test dataset
X_test_scaled = transformer_x.transform(X_test)

# Predict with the trained model
prediction = lasso.predict(X_test_scaled)

# Inverse transform the prediction
prediction_in_dollars = transformer_y.inverse_transform(prediction)

更新：

假设列车数据只包含一个名为X的特性。下面是RobustScaler将要做的：

^{pr2}$

然后，套索回归将给出如下预测：

a * X_scaled + b = y_scaled

您必须计算出方程式，以查看未缩放数据上的模型系数：

# Substituting X_scaled and y_scaled from the 1st equation
# In this equation `median(X), IQR(X), median(y) and IQR(y) are plain numbers you already know from the training phase
a * (X - median(X))/IQR(X) + b = (y - median(y))/IQR(y)

如果你试着用这个来做一个a_new * x + b_new = y式的等式，你会得到：

a_new = (a * (X - median(X)) / (X * IQR(X))) * IQR(y)
b_new = b * IQR(y) + median(y)
a_new * X + b_new = y

您可以看到，未标度系数（a_new）依赖于X。因此，您可以使用unscaledX直接进行预测，但在这两者之间，您是间接地应用转换。在

更新2

我已经修改了你的代码，现在它展示了如何获得原始比例的系数。这个脚本只是我上面展示的公式的实现。在

import pandas as pd
import numpy as np
from sklearn.preprocessing import RobustScaler
from sklearn.linear_model import Lasso

df = pd.DataFrame({'Y':[5, -10, 10, .5, 2.5, 15], 'X1':[1., -2.,  2., .1, .5, 3], 'X2':[1, 1, 2, 1, 1, 1],
              'X3':[6, 6, 6, 5, 6, 4], 'X4':[6, 5, 4, 3, 2, 1]})

X = df[['X1','X2','X3','X4']]
y = df[['Y']]

#Scaling
transformer_x = RobustScaler().fit(X)
transformer_y = RobustScaler().fit(y)
X_scal = transformer_x.transform(X)
y_scal = transformer_y.transform(y)

#LASSO
lasso = Lasso()
lasso = lasso.fit(X_scal, y_scal)

def pred_val(X_test):

    print('X entered: ',)
    print (X_test.values[0])

    #Scale X value that user entered - by hand
    med_X = X.median()
    Q1_X = X.quantile(0.25)
    Q3_X = X.quantile(0.75)
    IQR_X = Q3_X - Q1_X
    X_scaled = ((X_test - med_X)/IQR_X).fillna(0).values
    print('X_test scaled by hand: ',)
    print (X_scaled[0])

    #Scale X value that user entered - by function
    X_scaled2 = transformer_x.transform(X_test)
    print('X_test scaled by function: ',)
    print (X_scaled2[0])

    #Intercept by hand
    med_y = y.median()
    Q1_y = y.quantile(0.25)
    Q3_y = y.quantile(0.75)
    IQR_y = Q3_y - Q1_y

    a = lasso.coef_
    coef_new = ((a * (X_test - med_X).values) / (X_test * IQR_X).values) * float(IQR_y)
    coef_new = np.nan_to_num(coef_new)[0]

    b = lasso.intercept_[0]
    intercept_new = b * float(IQR_y) + float(med_y)

    custom_pred = sum((coef_new * X_test.values)[0]) + intercept_new

    pred = lasso.predict(X_scaled2)
    final_pred = transformer_y.inverse_transform(pred.reshape(-1, 1))[0][0]


    print('Original intercept: ', lasso.intercept_[0].round(2))
    print('New intercept: ', intercept_new.round(2))
    print('Original coefficients: ', lasso.coef_.round(2))
    print('New coefficients: ', coef_new.round(2))
    print('Your predicted value by function is: ', final_pred.round(2))
    print('Your predicted value by hand is: ', custom_pred.round(2))


X_test = pd.DataFrame([10,1,1,1]).T
X_test.columns = ['X1', 'X2', 'X3', 'X4']

pred_val(X_test)

您可以看到自定义预测使用原始值（X_test.values）。在

结果：

X entered: 
[10  1  1  1]

X_test scaled by hand: 
[ 5.96774194  0.         -6.66666667 -1.        ]
X_test scaled by function: 
[ 5.96774194  0.         -6.66666667 -1.        ]

Original intercept:  0.01
New intercept:  3.83

Original coefficients:  [ 0.02  0.   -0.   -0.  ]
New coefficients:  [0.1 0.  0.  0. ]

Your predicted value by function is:  4.83
Your predicted value by hand is:  4.83

如上所述，新系数取决于X_test。这意味着您不能将它们的当前值用于另一个测试样本。对于不同的输入，它们的值将不同。在

相关问题更多 >

编程相关推荐

热门问题

热门文章