y标签是否需要是支持向量机工作的特定类型？

import matplotlib.pyplot as plt from sklearn import svm, metrics from sklearn.model_selection import train_test_split import numpy as np import os # Working with files and folders from PIL import Image # Image processing rootdir = os.getcwd() image_file = 'images.npy' key_file = 'keys.npy' if (os.path.exists(image_file) and os.path.exists(key_file)): print "Loading existing numpy's" pixel_arr = np.load(image_file) key = np.load(key_file) else: print "Creating new numpy's" key_array = [] pixel_arr = np.empty((0,10000), "uint8") for subdir, dirs, files in os.walk('data'): dir_name = subdir.split("/")[-1] if "x" in dir_name: for file in files: if ".DS_Store" not in file: im = Image.open(os.path.join(subdir, file)) if im.size == (100,100): key_array.append(dir_name) numpied_image = np.array(im.convert('L')).reshape(1,-1) #Image.fromarray(np.reshape(numpied_image,(-1,100)), 'L').show() pixel_arr = np.append(pixel_arr, numpied_image, axis=0) im.close() key = np.array(key_array) np.save(image_file, pixel_arr) np.save(key_file, key) # Create a classifier: a support vector classifier classifier = svm.SVC(gamma='auto') X_train, X_test, y_train, y_test = train_test_split(pixel_arr, key, test_size=0.1,random_state=33) # We learn the digits on the first half of the digits print "Fitting classifier" classifier.fit(X_train, y_train) # Now predict the value of the digit on the second half: expected = y_test print "Predicting" predicted = classifier.predict(X_test) print("Classification report for classifier %s:\n%s\n" % (classifier, metrics.classification_report(expected, predicted))) print("Confusion matrix:\n%s" % metrics.confusion_matrix(expected, predicted))

Classification report for classifier SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf', max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False): precision recall f1-score support 0x0 0.00 0.00 0.00 9 1x0 0.00 0.00 0.00 9 1x1 0.00 0.00 0.00 12 2x0 0.00 0.00 0.00 12 2x1 0.00 0.00 0.00 10 2x2 0.00 0.00 0.00 7 3x0 0.00 0.00 0.00 7 3x1 0.00 0.00 0.00 8 3x2 0.00 0.00 0.00 8 3x3 0.01 1.00 0.02 3 4x0 0.00 0.00 0.00 11 4x1 0.00 0.00 0.00 10 4x2 0.00 0.00 0.00 8 4x3 0.00 0.00 0.00 15 4x4 0.00 0.00 0.00 8 5x0 0.00 0.00 0.00 12 5x1 0.00 0.00 0.00 7 5x2 0.00 0.00 0.00 11 5x3 0.00 0.00 0.00 7 5x4 0.00 0.00 0.00 9 5x5 0.00 0.00 0.00 14 6x0 0.00 0.00 0.00 11 6x1 0.00 0.00 0.00 12 6x2 0.00 0.00 0.00 11 6x3 0.00 0.00 0.00 9 6x4 0.00 0.00 0.00 9 6x5 0.00 0.00 0.00 18 6x6 0.00 0.00 0.00 13 avg / total 0.00 0.01 0.00 280 >>> print("Confusion matrix:\n%s" % metrics.confusion_matrix(expected, predicted)) Confusion matrix: [[ 0 0 0 0 0 0 0 0 0 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 12 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 12 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 10 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 11 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 10 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 15 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 12 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 11 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 14 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 11 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 12 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 11 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 18 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 13 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]

2条回答

网友

1楼 · 编辑于 2024-05-15 15:27:25

对于分类问题，您应该首先将目标向量（键）转换为数字类型，而不是直接使用类别/标签名称。如以下示例所示：

In [21]: iris=datasets.load_iris()

In [22]: X=iris.data

In [23]: y=iris.target

In [24]: y
Out[24]: 
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [25]: iris.target_names
Out[25]: array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

使用提到的LabelEncoder尝试以下代码：

from sklearn import preprocessing
le=preprocessing.LabelEncoder()
le.fit(key)
le.transform(key)
Out[36]: array([0, 1])

网友

2楼 · 编辑于 2024-05-15 15:27:25

虽然我还不能使它在脚本上工作，但我使用了手写图像数据集并应用了这个脚本，它给出了相同的结果。不过，它最终还是比其他伽马值的效果更好。也就是说，起初所有的结果都在中间，但随着gamma的变化，我得到了手写数字脚本，以获得很高的精度。我只能假设这是同样的问题，虽然我还没有找到一个伽玛值，使任何比5%的精度更好。你知道吗

但是，至于为什么会发生这种情况，我很肯定gamma值（或者SVC的任何其他参数）是关闭的。你知道吗

相关问题更多 >

编程相关推荐

热门问题

热门文章