使用自定义分类器时GridSearchCV出现问题

class OwnClassifier(BaseEstimator, ClassifierMixin): def __init__(self, estimator=None): self.yt = None if estimator is None: estimator = LogisticRegression(solver='liblinear') self.estimator = estimator self.discr = KBinsDiscretizer(n_bins=4, encode='ordinal') def fit(self, X, y): self.yt = y.copy() self.yt = self.discr.fit_transform(self.yt.reshape(-1, 1)).astype(int) self.estimator.fit(X,self.yt.ravel()) return self def predict(self, X): return self.estimator.predict(X) def predict_proba(self, X): return self.estimator.predict_proba(X) def score(self, X, y=None): return accuracy_score(self.yt, self.predict(X))

1条回答

网友

1楼 · 发布于 2024-05-15 00:16:04

问题在于score方法，因为您强制它始终使用训练数据self.yt来计算精度，这就是回溯说形状不兼容的原因。这已在以下代码中修复：

from sklearn.linear_model import LogisticRegression
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_boston
from sklearn.preprocessing import KBinsDiscretizer
from sklearn.model_selection import GridSearchCV

class OwnClassifier(BaseEstimator, ClassifierMixin):

    def __init__(self, estimator=None):

        if estimator is None:
            estimator = LogisticRegression(solver='liblinear')

        self.estimator = estimator
        self.discr = KBinsDiscretizer(n_bins=4, encode='ordinal')

    def fit(self, X, y):

        # fit the discretizer
        self.discr.fit(y.reshape(-1, 1))

        # transform the target
        yt = self.discr.transform(y.reshape(-1, 1)).astype(int).ravel()

        # fit the model
        self.estimator.fit(X, yt)

    def predict(self, X):
        return self.estimator.predict(X)

    def predict_proba(self, X):
        return self.estimator.predict_proba(X)

    def score(self, X, y):

        # transform the target using the fitted discretizer
        yt = self.discr.transform(y.reshape(-1, 1)).astype(int).ravel()

        # calculate the accuracy using the fitted model
        return accuracy_score(yt, self.predict(X))

boston_data = load_boston()
X = boston_data['data']
y = boston_data['target']

grid = [{'estimator__C': [1, 10, 100, 1000]}]
myLogi = OwnClassifier()
gridCv = GridSearchCV(myLogi, grid)
gridCv.fit(X, y)

相关问题更多 >

编程相关推荐

热门问题

热门文章