AttributeError:“NoneType”对象没有使用spacy的python属性“lower”

2024-04-26 00:57:37 发布

您现在位置:Python中文网/ 问答频道 /正文

以下是完整的错误消息:

AttributeErrorTraceback (most recent call last) in () 24 25 # train ---> 26 pipe.fit(train1, labelsTrain1) 27 28 # test

C:\Users\mcichonski\AppData\Local\Continuum\anaconda3\envs\py27\lib\site-packages\sklearn\pipeline.pyc in fit(self, X, y, **fit_params) 246 This estimator 247 """ --> 248 Xt, fit_params = self._fit(X, y, **fit_params) 249 if self._final_estimator is not None: 250 self._final_estimator.fit(Xt, y, **fit_params)

C:\Users\mcichonski\AppData\Local\Continuum\anaconda3\envs\py27\lib\site-packages\sklearn\pipeline.pyc in _fit(self, X, y, **fit_params) 211 Xt, fitted_transformer = fit_transform_one_cached( 212 cloned_transformer, None, Xt, y, --> 213 **fit_params_steps[name]) 214 # Replace the transformer of the step with the fitted 215 # transformer. This is necessary when loading the transformer

C:\Users\mcichonski\AppData\Local\Continuum\anaconda3\envs\py27\lib\site-packages\sklearn\externals\joblib\memory.pyc in call(self, *args, **kwargs) 360 361 def call(self, *args, **kwargs): --> 362 return self.func(*args, **kwargs) 363 364 def call_and_shelve(self, *args, **kwargs):

C:\Users\mcichonski\AppData\Local\Continuum\anaconda3\envs\py27\lib\site-packages\sklearn\pipeline.pyc in _fit_transform_one(transformer, weight, X, y, **fit_params) 579 **fit_params): 580 if hasattr(transformer, 'fit_transform'): --> 581 res = transformer.fit_transform(X, y, **fit_params) 582 else: 583 res = transformer.fit(X, y, **fit_params).transform(X)

C:\Users\mcichonski\AppData\Local\Continuum\anaconda3\envs\py27\lib\site-packages\sklearn\feature_extraction\text.pyc in fit_transform(self, raw_documents, y) 867 868 vocabulary, X = self._count_vocab(raw_documents, --> 869 self.fixed_vocabulary_) 870 871 if self.binary:

C:\Users\mcichonski\AppData\Local\Continuum\anaconda3\envs\py27\lib\site-packages\sklearn\feature_extraction\text.pyc in _count_vocab(self, raw_documents, fixed_vocab) 790 for doc in raw_documents: 791 feature_counter = {} --> 792 for feature in analyze(doc): 793 try: 794 feature_idx = vocabulary[feature]

C:\Users\mcichonski\AppData\Local\Continuum\anaconda3\envs\py27\lib\site-packages\sklearn\feature_extraction\text.pyc in (doc) 264 265 return lambda doc: self._word_ngrams( --> 266 tokenize(preprocess(self.decode(doc))), stop_words) 267 268 else:

C:\Users\mcichonski\AppData\Local\Continuum\anaconda3\envs\py27\lib\site-packages\sklearn\feature_extraction\text.pyc in (x) 230 231 if self.lowercase: --> 232 return lambda x: strip_accents(x.lower()) 233 else: 234 return strip_accents

AttributeError: 'NoneType' object has no attribute 'lower'

代码如下:

def printNMostInformative(vectorizer, clf, N):
    feature_names = vectorizer.get_feature_names()
    coefs_with_fns = sorted(zip(clf.coef_[0], feature_names))
    topClass1 = coefs_with_fns[:N]
    topClass2 = coefs_with_fns[:-(N + 1):-1]
    print("Class 1 best: ")
    for feat in topClass1:
        print(feat)
    print("Class 2 best: ")
    for feat in topClass2:
        print(feat)

vectorizer = CountVectorizer(tokenizer=tokenizeText, ngram_range=(1,1))
clf = LinearSVC()

pipe = Pipeline([('cleanText', CleanTextTransformer()), ('vectorizer', vectorizer), ('clf', clf)])

# data
train1 = train['Title'].tolist()
labelsTrain1 = train['Conference'].tolist()

test1 = test['Title'].tolist()
labelsTest1 = test['Conference'].tolist()

# train
pipe.fit(train1, labelsTrain1)

# test
preds = pipe.predict(test1)
print("accuracy:", accuracy_score(labelsTest1, preds))
print("Top 10 features used to predict: ")

printNMostInformative(vectorizer, clf, 10)
pipe = Pipeline([('cleanText', CleanTextTransformer()), ('vectorizer', vectorizer)])
transform = pipe.fit_transform(train1, labelsTrain1)

vocab = vectorizer.get_feature_names()
for i in range(len(train1)):
    s = ""
    indexIntoVocab = transform.indices[transform.indptr[i]:transform.indptr[i+1]]
    numOccurences = transform.data[transform.indptr[i]:transform.indptr[i+1]]
    for idx, num in zip(indexIntoVocab, numOccurences):
        s += str((vocab[idx], num))

看来这和列车1的数据有关。不知道怎么解决这个问题。在

这是在清除数据广告之后,现在尝试使用此功能打印出最重要的特征,即系数最高的特征:


Tags: inselflocaltransformparamsusersappdatafeature
1条回答
网友
1楼 · 发布于 2024-04-26 00:57:37

对于那些寻找更多信息的人-这是基于一个教程 https://towardsdatascience.com/machine-learning-for-text-classification-using-spacy-in-python-b276b4051a49。我也得到了同样的错误:

这与cleanText()函数有关,该函数没有返回管道要使用的任何内容-因此是非类型对象回溯

def cleanText(text):
    text = text.strip().replace("\n", " ").replace("\r", " ")
    text = text.lower()

如果您添加return text,它应该可以修复您的错误

^{pr2}$

相关问题 更多 >

    热门问题