以下是完整的错误消息:
AttributeErrorTraceback (most recent call last) in () 24 25 # train ---> 26 pipe.fit(train1, labelsTrain1) 27 28 # test
C:\Users\mcichonski\AppData\Local\Continuum\anaconda3\envs\py27\lib\site-packages\sklearn\pipeline.pyc in fit(self, X, y, **fit_params) 246 This estimator 247 """ --> 248 Xt, fit_params = self._fit(X, y, **fit_params) 249 if self._final_estimator is not None: 250 self._final_estimator.fit(Xt, y, **fit_params)
C:\Users\mcichonski\AppData\Local\Continuum\anaconda3\envs\py27\lib\site-packages\sklearn\pipeline.pyc in _fit(self, X, y, **fit_params) 211 Xt, fitted_transformer = fit_transform_one_cached( 212 cloned_transformer, None, Xt, y, --> 213 **fit_params_steps[name]) 214 # Replace the transformer of the step with the fitted 215 # transformer. This is necessary when loading the transformer
C:\Users\mcichonski\AppData\Local\Continuum\anaconda3\envs\py27\lib\site-packages\sklearn\externals\joblib\memory.pyc in call(self, *args, **kwargs) 360 361 def call(self, *args, **kwargs): --> 362 return self.func(*args, **kwargs) 363 364 def call_and_shelve(self, *args, **kwargs):
C:\Users\mcichonski\AppData\Local\Continuum\anaconda3\envs\py27\lib\site-packages\sklearn\pipeline.pyc in _fit_transform_one(transformer, weight, X, y, **fit_params) 579 **fit_params): 580 if hasattr(transformer, 'fit_transform'): --> 581 res = transformer.fit_transform(X, y, **fit_params) 582 else: 583 res = transformer.fit(X, y, **fit_params).transform(X)
C:\Users\mcichonski\AppData\Local\Continuum\anaconda3\envs\py27\lib\site-packages\sklearn\feature_extraction\text.pyc in fit_transform(self, raw_documents, y) 867 868 vocabulary, X = self._count_vocab(raw_documents, --> 869 self.fixed_vocabulary_) 870 871 if self.binary:
C:\Users\mcichonski\AppData\Local\Continuum\anaconda3\envs\py27\lib\site-packages\sklearn\feature_extraction\text.pyc in _count_vocab(self, raw_documents, fixed_vocab) 790 for doc in raw_documents: 791 feature_counter = {} --> 792 for feature in analyze(doc): 793 try: 794 feature_idx = vocabulary[feature]
C:\Users\mcichonski\AppData\Local\Continuum\anaconda3\envs\py27\lib\site-packages\sklearn\feature_extraction\text.pyc in (doc) 264 265 return lambda doc: self._word_ngrams( --> 266 tokenize(preprocess(self.decode(doc))), stop_words) 267 268 else:
C:\Users\mcichonski\AppData\Local\Continuum\anaconda3\envs\py27\lib\site-packages\sklearn\feature_extraction\text.pyc in (x) 230 231 if self.lowercase: --> 232 return lambda x: strip_accents(x.lower()) 233 else: 234 return strip_accents
AttributeError: 'NoneType' object has no attribute 'lower'
代码如下:
def printNMostInformative(vectorizer, clf, N):
feature_names = vectorizer.get_feature_names()
coefs_with_fns = sorted(zip(clf.coef_[0], feature_names))
topClass1 = coefs_with_fns[:N]
topClass2 = coefs_with_fns[:-(N + 1):-1]
print("Class 1 best: ")
for feat in topClass1:
print(feat)
print("Class 2 best: ")
for feat in topClass2:
print(feat)
vectorizer = CountVectorizer(tokenizer=tokenizeText, ngram_range=(1,1))
clf = LinearSVC()
pipe = Pipeline([('cleanText', CleanTextTransformer()), ('vectorizer', vectorizer), ('clf', clf)])
# data
train1 = train['Title'].tolist()
labelsTrain1 = train['Conference'].tolist()
test1 = test['Title'].tolist()
labelsTest1 = test['Conference'].tolist()
# train
pipe.fit(train1, labelsTrain1)
# test
preds = pipe.predict(test1)
print("accuracy:", accuracy_score(labelsTest1, preds))
print("Top 10 features used to predict: ")
printNMostInformative(vectorizer, clf, 10)
pipe = Pipeline([('cleanText', CleanTextTransformer()), ('vectorizer', vectorizer)])
transform = pipe.fit_transform(train1, labelsTrain1)
vocab = vectorizer.get_feature_names()
for i in range(len(train1)):
s = ""
indexIntoVocab = transform.indices[transform.indptr[i]:transform.indptr[i+1]]
numOccurences = transform.data[transform.indptr[i]:transform.indptr[i+1]]
for idx, num in zip(indexIntoVocab, numOccurences):
s += str((vocab[idx], num))
看来这和列车1的数据有关。不知道怎么解决这个问题。在
这是在清除数据广告之后,现在尝试使用此功能打印出最重要的特征,即系数最高的特征:
对于那些寻找更多信息的人-这是基于一个教程 https://towardsdatascience.com/machine-learning-for-text-classification-using-spacy-in-python-b276b4051a49。我也得到了同样的错误:
这与
cleanText()
函数有关,该函数没有返回管道要使用的任何内容-因此是非类型对象回溯如果您添加
^{pr2}$return text
,它应该可以修复您的错误相关问题 更多 >
编程相关推荐