如何使用曲线拟合的交叉检验？

1条回答

网友

1楼 · 发布于 2024-05-01 21:27:07

我发现我在尺寸上有两个错误。因为它们是同时发生的，所以我无法轻易地追溯它们。我会把答案贴在这里，也许以后会有用

1。从文档中

修改documentation中的示例有助于追溯维度错误

from sklearn import datasets, linear_model
from sklearn.model_selection import cross_validate, cross_val_score

diabetes = datasets.load_diabetes()
X = diabetes.data[:150]
y = diabetes.target[:150]
lasso = linear_model.Lasso()

cv_results = cross_validate(lasso, X, y, cv=3)
sorted(cv_results.keys())

print( cv_results['test_score'] )

[0.33150734 0.08022311 0.03531764]

注意cross_validation需要第一个维度相同：

print( X.shape, y.shape )

(150, 10) (150,)

注意，对于这些新维度，调用curve_fit的简单方法会抛出一个错误：

def lincomb( X, a, b ):
    x1 = X[0]
    x2 = X[1]
    return a*x1*x2 + b

popt, pcov = curve_fit( lincomb, x_data, y_data )
print( popt )

                                     -
ValueError                                Traceback (most recent call last)
<ipython-input-6-dedaa241e377> in <module>
      4     return a*x1*x2 + b
      5 
  > 6 popt, pcov = curve_fit( lincomb, x_data, y_data )
      7 print( popt )

/usr/local/lib/python3.6/dist-packages/scipy/optimize/minpack.py in curve_fit(f, xdata, ydata, p0, sigma, absolute_sigma, check_finite, bounds, method, jac, **kwargs)
    754         # Remove full_output from kwargs, otherwise we're passing it in twice.
    755         return_full = kwargs.pop('full_output', False)
 > 756         res = leastsq(func, p0, Dfun=jac, full_output=1, **kwargs)
    757         popt, pcov, infodict, errmsg, ier = res
    758         cost = np.sum(infodict['fvec'] ** 2)

/usr/local/lib/python3.6/dist-packages/scipy/optimize/minpack.py in leastsq(func, x0, args, Dfun, full_output, col_deriv, ftol, xtol, gtol, maxfev, epsfcn, factor, diag)
    381     if not isinstance(args, tuple):
    382         args = (args,)
 > 383     shape, dtype = _check_func('leastsq', 'func', func, x0, args, n)
    384     m = shape[0]
    385 

/usr/local/lib/python3.6/dist-packages/scipy/optimize/minpack.py in _check_func(checker, argname, thefunc, x0, args, numinputs, output_shape)
     24 def _check_func(checker, argname, thefunc, x0, args, numinputs,
     25                 output_shape=None):
 -> 26     res = atleast_1d(thefunc(*((x0[:numinputs],) + args)))
     27     if (output_shape is not None) and (shape(res) != output_shape):
     28         if (output_shape[0] != 1):

/usr/local/lib/python3.6/dist-packages/scipy/optimize/minpack.py in func_wrapped(params)
    456     if transform is None:
    457         def func_wrapped(params):
 > 458             return func(xdata, *params) - ydata
    459     elif transform.ndim == 1:
    460         def func_wrapped(params):

ValueError: operands could not be broadcast together with shapes (2,) (10,)

这可以通过在对curve_fit的调用中再次转置来解决：

popt, pcov = curve_fit( lincomb, x_data.T, y_data )
print( popt )

[-0.17857143 -1.57142857]

2。班级

在x_data中使用cross_validation的新维度（使用问题中定义的类）会引发不同的错误：

from sklearn.model_selection import cross_validate

class LinComb:
    def __init__( self, a=None, b=None ):
        self.a = a
        self.b = b
    def _lincomb_background(self, X, a, b):
        x1 = X[0]
        x2 = X[1]
        return a*x1*x2 + b

    def predict( self, X ):
        return self._lincomb_background( X, self.a, self.b )

    def fit( self, X, y ):
        from scipy.optimize import curve_fit
        popt, pcov = curve_fit( self._lincomb_background, X, y )
        self.a = popt[0]
        self.b = popt[1]
        return self

    def get_params( self, deep=False ):
        return { 'a':self.a, 'b':self.b }

    def set_params( self, **parameters ):
        for parameter, value in parameters.intems():
            setattr( self, parameter, value )
        return self


cross_validate( LinComb(), x_data, y_data, cv=5, scoring='neg_mean_squared_error' )

                                     -
ValueError                                Traceback (most recent call last)
<ipython-input-10-e0ff8bb83213> in <module>
  > 1 cross_validate( LinComb(), x_data, y_data, cv=5, scoring='neg_mean_squared_error' )

/usr/local/lib/python3.6/dist-packages/sklearn/model_selection/_validation.py in cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score, return_estimator, error_score)
    229             return_times=True, return_estimator=return_estimator,
    230             error_score=error_score)
 > 231         for train, test in cv.split(X, y, groups))
    232 
    233     zipped_scores = list(zip(*scores))

/usr/local/lib/python3.6/dist-packages/joblib/parallel.py in __call__(self, iterable)
    919             # remaining jobs.
    920             self._iterating = False
 > 921             if self.dispatch_one_batch(iterator):
    922                 self._iterating = self._original_iterator is not None
    923 

/usr/local/lib/python3.6/dist-packages/joblib/parallel.py in dispatch_one_batch(self, iterator)
    757                 return False
    758             else:
 > 759                 self._dispatch(tasks)
    760                 return True
    761 

/usr/local/lib/python3.6/dist-packages/joblib/parallel.py in _dispatch(self, batch)
    714         with self._lock:
    715             job_idx = len(self._jobs)
 > 716             job = self._backend.apply_async(batch, callback=cb)
    717             # A job can complete so quickly than its callback is
    718             # called before we get here, causing self._jobs to

/usr/local/lib/python3.6/dist-packages/joblib/_parallel_backends.py in apply_async(self, func, callback)
    180     def apply_async(self, func, callback=None):
    181         """Schedule a func to be run"""
 > 182         result = ImmediateResult(func)
    183         if callback:
    184             callback(result)

/usr/local/lib/python3.6/dist-packages/joblib/_parallel_backends.py in __init__(self, batch)
    547         # Don't delay the application, to avoid keeping the input
    548         # arguments in memory
 > 549         self.results = batch()
    550 
    551     def get(self):

/usr/local/lib/python3.6/dist-packages/joblib/parallel.py in __call__(self)
    223         with parallel_backend(self._backend, n_jobs=self._n_jobs):
    224             return [func(*args, **kwargs)
 > 225                     for func, args, kwargs in self.items]
    226 
    227     def __len__(self):

/usr/local/lib/python3.6/dist-packages/joblib/parallel.py in <listcomp>(.0)
    223         with parallel_backend(self._backend, n_jobs=self._n_jobs):
    224             return [func(*args, **kwargs)
 > 225                     for func, args, kwargs in self.items]
    226 
    227     def __len__(self):

/usr/local/lib/python3.6/dist-packages/sklearn/model_selection/_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, error_score)
    512             estimator.fit(X_train, **fit_params)
    513         else:
 > 514             estimator.fit(X_train, y_train, **fit_params)
    515 
    516     except Exception as e:

<ipython-input-9-ff88060f1729> in fit(self, X, y)
     15     def fit( self, X, y ):
     16         from scipy.optimize import curve_fit
 -> 17         popt, pcov = curve_fit( self._lincomb_background, X, y )
     18         self.a = popt[0]
     19         self.b = popt[1]

/usr/local/lib/python3.6/dist-packages/scipy/optimize/minpack.py in curve_fit(f, xdata, ydata, p0, sigma, absolute_sigma, check_finite, bounds, method, jac, **kwargs)
    754         # Remove full_output from kwargs, otherwise we're passing it in twice.
    755         return_full = kwargs.pop('full_output', False)
 > 756         res = leastsq(func, p0, Dfun=jac, full_output=1, **kwargs)
    757         popt, pcov, infodict, errmsg, ier = res
    758         cost = np.sum(infodict['fvec'] ** 2)

/usr/local/lib/python3.6/dist-packages/scipy/optimize/minpack.py in leastsq(func, x0, args, Dfun, full_output, col_deriv, ftol, xtol, gtol, maxfev, epsfcn, factor, diag)
    381     if not isinstance(args, tuple):
    382         args = (args,)
 > 383     shape, dtype = _check_func('leastsq', 'func', func, x0, args, n)
    384     m = shape[0]
    385 

/usr/local/lib/python3.6/dist-packages/scipy/optimize/minpack.py in _check_func(checker, argname, thefunc, x0, args, numinputs, output_shape)
     24 def _check_func(checker, argname, thefunc, x0, args, numinputs,
     25                 output_shape=None):
 -> 26     res = atleast_1d(thefunc(*((x0[:numinputs],) + args)))
     27     if (output_shape is not None) and (shape(res) != output_shape):
     28         if (output_shape[0] != 1):

/usr/local/lib/python3.6/dist-packages/scipy/optimize/minpack.py in func_wrapped(params)
    456     if transform is None:
    457         def func_wrapped(params):
 > 458             return func(xdata, *params) - ydata
    459     elif transform.ndim == 1:
    460         def func_wrapped(params):

ValueError: operands could not be broadcast together with shapes (2,) (8,)

3。类内部的维度错误

此错误来自curve_fit，而不是cross_validation，并且必须在类内部，在调用模型_lincomb_background()的^{函数和predict()中更正。修改后的类为：

class LinComb:
    def __init__( self, a=None, b=None ):
        self.a = a
        self.b = b
    def _lincomb_background(self, X, a, b):
        x1 = X[0]
        x2 = X[1]
        return a*x1*x2 + b

    def predict( self, X ):
        return self._lincomb_background( X.T, self.a, self.b ) # Call with transposed X!

    def fit( self, X, y ):
        from scipy.optimize import curve_fit
        popt, pcov = curve_fit( self._lincomb_background, X.T, y ) # Call with transposed X!
        self.a = popt[0]
        self.b = popt[1]
        return self

    def get_params( self, deep=False ):
        return { 'a':self.a, 'b':self.b }

    def set_params( self, **parameters ):
        for parameter, value in parameters.intems():
            setattr( self, parameter, value )
        return self

通过这两个修改后的调用，cross_validation按预期工作：

cross_validate( LinComb(), x_data, y_data, cv=5, scoring='neg_mean_squared_error' )

{'fit_time': array([0.00105524, 0.00051618, 0.0004158 , 0.00040078, 0.00039887]), 'score_time': array([0.00158715, 0.0001812 , 0.00017715, 0.00017595, 0.00017548]), 'test_score': array([-12.89 , -0.29918379, -3.82378685, -2.72051908, -7.25 ])}

4。摘要

a）首先检查cross_validation()的尺寸是否正确

b）然后在调用curve_fit()时调整类内的维度

c）最后调整类内的维度，在predict()

相关问题更多 >

编程相关推荐

热门问题

热门文章