队列。空通过交叉验证

2024-04-23 17:24:47 发布

您现在位置:Python中文网/ 问答频道 /正文

我在google colab上使用cross_validation_predict,用ComplementNB获得queue.Empty
代码:

import pandas as pd
import numpy as np
import torch
from torch.nn.functional import one_hot

from sklearn.model_selection import cross_val_predict
from sklearn.naive_bayes     import MultinomialNB
from sklearn.naive_bayes     import ComplementNB

class CustomNB:
    def fit(self, X,y):
        self.clf.fit(X, y)

    def predict(self, X):
        return self.clf.predict(X)

    def get_params(self, deep=True):
        return self.clf.get_params(deep)

    def set_params(self, **params):
        self.clf.set_params(params)


class CustomMultinomialNB(CustomNB):
    def __init__(self, alpha=1.0, fit_prior=True, class_prior=None):
        self.clf = MultinomialNB(alpha, fit_prior, class_prior)

class CustomComplementNB(CustomNB):
    def __init__(self, alpha=1.0, fit_prior=True, class_prior=None):
        self.clf = ComplementNB(alpha, fit_prior, class_prior)


np.random.seed(0)
nrs_df = pd.DataFrame({'user_nr':list(range(60)),
                       'path_nr':[i%30 for i in range(60)]})

acc =[]
for _, df in nrs_df.groupby('user_nr', sort=False):
     l=df['path_nr'].tolist()
     row = one_hot(torch.LongTensor(l), num_classes=200).sum(axis=0)
     acc.append(row)

X = torch.stack(acc, axis=0).numpy()
y = np.random.choice(list("mf"), len(nrs_df['user_nr'].unique()))

#these work
clf = ComplementNB()
clf = MultinomialNB()
clf = CustomMultinomialNB()
#this one does not
clf = CustomComplementNB()

p = cross_val_predict(clf, X, y, cv=10)

使用CustomComplementNB这会产生:


Empty                                     Traceback (most recent call last)

/usr/local/lib/python3.6/dist-packages/joblib/parallel.py in dispatch_one_batch(self, iterator)
    795             try:
--> 796                 tasks = self._ready_batches.get(block=False)
    797             except queue.Empty:

6 frames

/usr/lib/python3.6/queue.py in get(self, block, timeout)
    160                 if not self._qsize():
--> 161                     raise Empty
    162             elif timeout is None:

Empty: 


During handling of the above exception, another exception occurred:

TypeError                                 Traceback (most recent call last)

<ipython-input-27-8325406c36f4> in <module>()
     52 clf = CustomComplementNB()
     53 
---> 54 p = cross_val_predict(clf, X, y, cv=10)

/usr/local/lib/python3.6/dist-packages/sklearn/model_selection/_validation.py in cross_val_predict(estimator, X, y, groups, cv, n_jobs, verbose, fit_params, pre_dispatch, method)
    787     prediction_blocks = parallel(delayed(_fit_and_predict)(
    788         clone(estimator), X, y, train, test, verbose, fit_params, method)
--> 789         for train, test in cv.split(X, y, groups))
    790 
    791     # Concatenate the predictions

/usr/local/lib/python3.6/dist-packages/joblib/parallel.py in __call__(self, iterable)
   1001             # remaining jobs.
   1002             self._iterating = False
-> 1003             if self.dispatch_one_batch(iterator):
   1004                 self._iterating = self._original_iterator is not None
   1005 

/usr/local/lib/python3.6/dist-packages/joblib/parallel.py in dispatch_one_batch(self, iterator)
    805                 big_batch_size = batch_size * n_jobs
    806 
--> 807                 islice = list(itertools.islice(iterator, big_batch_size))
    808                 if len(islice) == 0:
    809                     return False

/usr/local/lib/python3.6/dist-packages/sklearn/model_selection/_validation.py in <genexpr>(.0)
    787     prediction_blocks = parallel(delayed(_fit_and_predict)(
    788         clone(estimator), X, y, train, test, verbose, fit_params, method)
--> 789         for train, test in cv.split(X, y, groups))
    790 
    791     # Concatenate the predictions

/usr/local/lib/python3.6/dist-packages/sklearn/base.py in clone(estimator, safe)
     63     for name, param in new_object_params.items():
     64         new_object_params[name] = clone(param, safe=False)
---> 65     new_object = klass(**new_object_params)
     66     params_set = new_object.get_params(deep=False)
     67 

TypeError: __init__() got an unexpected keyword argument 'norm'

创建这些子类的原因是我最初需要包装器来在引擎盖下使用partial_fit进行训练。对于调试,我简化了它,这样包装器基本上什么都不做。你知道吗


Tags: inpyimportselflibusrdefparams