Scikit-learn分类测试中出现ValueError: 使用序列设置数组元素
我在使用关于多类adaboost的教程,想要对一些有两个类别的图片进行分类(不过我觉得这个算法应该也能处理二分类问题)。接下来我会把样本扩展到其他类别。
目前我的测试样本很小,总共只有17张图片,其中10张用于训练,7张用于测试。
现在我有两个类别:0: 没有车辆, 1: 有车辆
。我使用整数标签是因为根据上面链接中的示例,训练数据是基于整数标签的。
我对提供的示例做了一点修改,加入了我自己的图片文件,但出现了错误。
Traceback (most recent call last):
File "C:\Users\app\Documents\Python Scripts\carclassify.py", line 66, in <module>
bdt_discrete.fit(X_train, y_train)
File "C:\Users\app\Anaconda\lib\site-packages\sklearn\ensemble\weight_boosting.py", line 389, in fit
return super(AdaBoostClassifier, self).fit(X, y, sample_weight)
File "C:\Users\app\Anaconda\lib\site-packages\sklearn\ensemble\weight_boosting.py", line 99, in fit
X = np.ascontiguousarray(array2d(X), dtype=DTYPE)
File "C:\Users\app\Anaconda\lib\site-packages\numpy\core\numeric.py", line 408, in ascontiguousarray
return array(a, dtype, copy=False, order='C', ndmin=1)
ValueError: setting an array element with a sequence.
以下是我根据scikit-learn网站上的示例修改的代码:
f = open("PATH_TO_SAMPLES\\samples.txt",'r')
out = f.read().splitlines()
import numpy as np
imgs = []
tmp_hogs = []
# 13 of the images are with vehicles, 4 are without
labels = [1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0]
for file in out:
filepath = "C:\PATH_TO_SAMPLE_IMAGES\\" + file
curr_img = color.rgb2gray(io.imread(filepath))
imgs.append(resize(curr_img,(60,40)))
fd, hog_image = hog(curr_img, orientations=8, pixels_per_cell=(16, 16),
cells_per_block=(1, 1), visualise=True)
tmp_hogs.append(fd)
img_hogs = np.array(tmp_hogs)
n_split = 10
X_train, X_test = img_hogs[:n_split], X[n_split:] # all first ten images with vehicles
y_train, y_test = labels[:n_split], labels[n_split:] # 3 images with vehicles, 4 without
#now all the code below is straight off the example on scikit-learn's website
bdt_real = AdaBoostClassifier(
DecisionTreeClassifier(max_depth=2),
n_estimators=600,
learning_rate=1)
bdt_discrete = AdaBoostClassifier(
DecisionTreeClassifier(max_depth=2),
n_estimators=600,
learning_rate=1.5,
algorithm="SAMME")
bdt_real.fit(X_train, y_train)
bdt_discrete.fit(X_train, y_train)
real_test_errors = []
discrete_test_errors = []
for real_test_predict, discrete_train_predict in zip(
bdt_real.staged_predict(X_test), bdt_discrete.staged_predict(X_test)):
real_test_errors.append(
1. - accuracy_score(real_test_predict, y_test))
discrete_test_errors.append(
1. - accuracy_score(discrete_train_predict, y_test))
n_trees = xrange(1, len(bdt_discrete) + 1)
pl.figure(figsize=(15, 5))
pl.subplot(131)
pl.plot(n_trees, discrete_test_errors, c='black', label='SAMME')
pl.plot(n_trees, real_test_errors, c='black',
linestyle='dashed', label='SAMME.R')
pl.legend()
pl.ylim(0.18, 0.62)
pl.ylabel('Test Error')
pl.xlabel('Number of Trees')
pl.subplot(132)
pl.plot(n_trees, bdt_discrete.estimator_errors_, "b", label='SAMME', alpha=.5)
pl.plot(n_trees, bdt_real.estimator_errors_, "r", label='SAMME.R', alpha=.5)
pl.legend()
pl.ylabel('Error')
pl.xlabel('Number of Trees')
pl.ylim((.2,
max(bdt_real.estimator_errors_.max(),
bdt_discrete.estimator_errors_.max()) * 1.2))
pl.xlim((-20, len(bdt_discrete) + 20))
pl.subplot(133)
pl.plot(n_trees, bdt_discrete.estimator_weights_, "b", label='SAMME')
pl.legend()
pl.ylabel('Weight')
pl.xlabel('Number of Trees')
pl.ylim((0, bdt_discrete.estimator_weights_.max() * 1.2))
pl.xlim((-20, len(bdt_discrete) + 20))
# prevent overlapping y-axis labels
pl.subplots_adjust(wspace=0.25)
pl.show()
编辑
我输入了
print tmp_hogs
输出是:
[ array([ 0.27621208, 0.11038658, 0.10698133, ..., 0.08661556, 0.04612063, 0.0280782 ]),
array([ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ..., -1.29909838e-15, -7.01780982e-17, -1.24900943e-15]),
array([ 0.0503603 , 0.1497235 , 0.2372957 , ..., 0.07249325, 0.04545541, 0.00903818]),
array([ 0.27299191, 0.13122109, 0.0719268 , ..., 0.0848522 , 0.04789403, 0.01387038]),
array([ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ..., 3.32140617e-17, -6.58924128e-17, -6.23567224e-16]),
array([ 0.37431874, 0.18094303, 0.01219871, ..., 0.06501856, 0.04855516, 0.02439321]),
array([ 0.41087302, 0.16478851, 0.03396399, ..., 0.09511273, 0.04077713, 0.03945513]),
array([ 0.17753915, 0.07025565, 0.09136909, ..., 0.03396507, 0.01379266, 0.01645722]),
array([ 0.40605587, 0.05915388, 0.03767763, ..., 0.08981079, 0.05452031, 0.01725399]),
array([ 0. , 0. , 0. , ..., 0.00579303, 0.02053979, 0.0019091 ]),
array([ 0.31550735, 0.11988131, 0.07716529, ..., 0.09815158, 0.03058497, 0.02236517]),
array([ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ..., -3.51175682e-16, 1.31619418e-03, 2.86127901e-16]),
array([ 0.21381704, 0.22352378, 0.11568828, ..., 0.06311083, 0.02696666, 0.00402261]),
array([ 0.17480064, 0.1469145 , 0.16336016, ..., 0.05614001, 0.03244093, 0.00524034]),
array([ 0. , 0. , 0. , ..., 0.03089959, 0.00509584, 0.00247698]),
array([ 0.04711166, 0.0218663 , 0.05316 , ..., 0.04214594, 0.04892439, 0.25840958]),
array([ 0.05357464, 0.00530857, 0.07162301, ..., 0.06802692, 0.08331959, 0.26619977])]
然后我运行了
print img_hogs
输出是:
[ array([ 0.27621208, 0.11038658, 0.10698133, ..., 0.08661556, 0.04612063, 0.0280782 ])
array([ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ..., -1.29909838e-15, -7.01780982e-17, -1.24900943e-15])
array([ 0.0503603 , 0.1497235 , 0.2372957 , ..., 0.07249325, 0.04545541, 0.00903818])
array([ 0.27299191, 0.13122109, 0.0719268 , ..., 0.0848522 , 0.04789403, 0.01387038])
array([ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ..., 3.32140617e-17, -6.58924128e-17, -6.23567224e-16])
array([ 0.37431874, 0.18094303, 0.01219871, ..., 0.06501856, 0.04855516, 0.02439321])
array([ 0.41087302, 0.16478851, 0.03396399, ..., 0.09511273, 0.04077713, 0.03945513])
array([ 0.17753915, 0.07025565, 0.09136909, ..., 0.03396507, 0.01379266, 0.01645722])
array([ 0.40605587, 0.05915388, 0.03767763, ..., 0.08981079, 0.05452031, 0.01725399])
array([ 0. , 0. , 0. , ..., 0.00579303, 0.02053979, 0.0019091 ])
array([ 0.31550735, 0.11988131, 0.07716529, ..., 0.09815158, 0.03058497, 0.02236517])
array([ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ..., -3.51175682e-16, 1.31619418e-03, 2.86127901e-16])
array([ 0.21381704, 0.22352378, 0.11568828, ..., 0.06311083, 0.02696666, 0.00402261])
array([ 0.17480064, 0.1469145 , 0.16336016, ..., 0.05614001, 0.03244093, 0.00524034])
array([ 0. , 0. , 0. , ..., 0.03089959, 0.00509584, 0.00247698])
array([ 0.04711166, 0.0218663 , 0.05316 , ..., 0.04214594, 0.04892439, 0.25840958])
array([ 0.05357464, 0.00530857, 0.07162301, ..., 0.06802692, 0.08331959, 0.26619977])]
1 个回答
1
尝试一下:
imgs = []
tmp_hogs = np.zeros((17, 256))
# 13 of the images are with vehicles, 4 are without
labels = [1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0]
i = 0
for file in out:
filepath = "C:\PATH_TO_SAMPLE_IMAGES\\" + file
curr_img = color.rgb2gray(io.imread(filepath))
imgs.append(resize(curr_img,(60,40)))
fd, hog_image = hog(curr_img, orientations=8, pixels_per_cell=(16, 16),
cells_per_block=(1, 1), visualise=True)
tmp_hogs[i,:] = fd
i+=1
img_hogs = tmp_hogs