sklearn中GridSearchCV的自定义估计器

2024-05-14 06:31:44 发布

您现在位置:Python中文网/ 问答频道 /正文

我正在尝试实现一个自定义估计器,并使用GridSearchCV优化四个参数。我还没有完全理解这一点,并且遇到了一些问题(请参见下面的更新)

这是我的估值器,其中coeffs包含我要优化的四个参数v1, v2, sm是输入数据(NumPy数组)

from sklearn.base import BaseEstimator, RegressorMixin
import numpy as np

class wc(BaseEstimator, RegressorMixin):
        def __init__(self, verbose=False):
            self.verbose = verbose
    
        def fit(self, coeffs, v1, v2, sm):
            return self
    
        def predict(self, coeffs, v1, v2, sm):
    
            a = coeffs[0]
            b = coeffs[1]
            c = coeffs[2]
            d = coeffs[3]
    
            theta = 40.0
            t = np.exp(-2 * b * v2 * np.arccos(np.cos(theta * np.pi / 180)))
            sigmav = a * v1 * np.cos(theta) * (1 - t ** 2)
    
            sigmas = c + d * sm
            sigma = sigmav + t ** 2 * sigmas
            return sigma
    
    
wcm = wc()

现在我定义了一个函数来执行网格搜索:

from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer

def wcm_params_selection(v1, v2, sm, y, nfolds):

    a = np.arange(0.0, 10.0, 0.1)
    b = np.arange(0.0, 10.0, 0.1)
    c = np.arange(-10.0, 0, 0.1)
    d = np.arange(0.0, 10.0, 0.1)

    param_grid = {'a': a, 'b': b, 'c': c, 'd': d}
    my_func = make_scorer(mean_squared_error, greater_is_better=False)
    grid_search = GridSearchCV(wcm, param_grid, scoring=my_func, cv=nfolds)
    grid_search.fit(v1=v1, v2=v2, sm=sm, y=y)
    return grid_search.best_params_ 

运行

v1,v2,sm = np.array([5.0]),np.array([5.0]),np.array([0.5])  # single predictor test values
y = np.array([-10])  # measurement

wcm_params_selection(v1=v1, v2=v2, sm=sm, y=y, nfolds=5)

这给了我错误消息TypeError: fit() missing 1 required positional argument: 'X'

更新: 我已经更新了我的代码很多,我想我越来越接近了。如果能得到一些关于这一点正确性的反馈,那就太好了。对于一些测试数据,我似乎以ab始终为0结束

估计器类别:

class wc(BaseEstimator, RegressorMixin):
    def __init__(self, a, b, c, d):
        self.a = a
        self.b = b
        self.c = c
        self.d = d

    def fit(self, X, y):
        X, y = check_X_y(X, y)
        self.is_fitted_ = True
        return self

    def predict(self, X):
        X = check_array(X)
        check_is_fitted(self, 'is_fitted_')

        v1 = X[:, 0]
        v2 = X[:, 0]
        sm = X[:, 1]

        theta = 40.0
        t = np.exp(-2 * self.b * v2 * np.arccos(np.cos(theta * np.pi / 180)))
        sigmav = self.a * v1 * np.cos(theta) * (1 - np.power(t, 2))

        sigmas = self.c + self.d * sm
        sigma = sigmav + np.power(t, 2) * sigmas
        return sigma

网格搜索功能:

def wcm_params_selection(X, y, nfolds):
    a = np.arange(0.0, 5.0, 1)
    b = np.arange(0.0, 10.0, 1)
    c = np.arange(-35, 0, 2)
    d = np.arange(15, 60, 1)

    param_grid = {'a': a, 'b': b, 'c': c, 'd': d}
    my_func = make_scorer(mean_squared_error, greater_is_better=False)
    grid_search = GridSearchCV(wc, param_grid, scoring=my_func, cv=2)
    grid_search.fit(X, y)
    return grid_search.best_params_

以及调整模型以预测值:

wcm_params = wcm_params_selection(X, y, 5)
wcmfit = wc(a=wcm_params['a'], b=wcm_params['b'], c=wcm_params['c'], d=wcm_params['d'])
wcmfit = wcmfit.fit(X, y)
out = wcmfit.predict(X)
score = wcmfit.score(X, y)

不幸的是,我完全迷路了,a和b总是得到0,这是不正确的。任何支持都将不胜感激


Tags: selfsearchreturndefnpparamsgridfit