python sklearn 值错误:用序列设置数组元素
在训练sklearn的sgd分类器时,使用的是名字和年龄这两个数组,最后得到了一个颜色。但是在调用.fit()方法时出现了错误,错误信息是“用一个序列设置数组元素”是什么意思?这是不是说sklearn的sgd分类器不能接受数组的数组作为输入呢?
不过,如果名字和年龄不是数组,而是单个元素,就没有错误。
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np`
a=np.array([0, 2, 5, 2])
b=np.array( [0, 5, 0, 2])
c=np.array([2,2,0,0])
d=np.array([5,2,5,0])
age_a=np.array([5, 10, 7, 6])
age_b=np.array([3, 7, 11,8])
age_c=np.array([15, 10, 17, 2])
age_d=np.array([2, 8, 12,7])
color_a=np.array([0,2,1,1])
color_b=np.array([1,12,0,1])
color_c=np.array([0,1,1,0])
color_d=np.array([1,0,0,1])
#data2={'name':[a,b,c,d],'age':[age_a, age_b, age_c, age_d],'color': [color_a,color_b,color_c,color_d]}
data2={'name':[a,b,c,d],'age':[age_a, age_b, age_c, age_d],'color':[0,1,0,1]}
new2 = pd.DataFrame.from_dict(data2)
print(new2)
x = new2.loc[:, new2.columns != 'color']
y = new2.loc[:, 'color']
x=np.array(x,dtype=object)
y=np.array(y)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=42)
from sklearn.linear_model import SGDClassifier
sgd_clf=SGDClassifier(random_state=42)
sgd_clf.fit(x_train, y_train)
sgd_clf.predict(x_test)`
` TypeError Traceback (most recent call last) TypeError: float() argument must be a string or a real number, not 'list'
The above exception was the direct cause of the following exception:
ValueError Traceback (most recent call last)
Cell In[117], line 25
21 print(y_test)
24 clf = SGDClassifier(loss="hinge", penalty="l2", max_iter=5)
25 clf.fit(x_train, y_train)
26 #SGDClassifier(max_iter=100)
28 clf.predict([[2., 2.]])
ValueError: setting an array element with a sequence.
1 个回答
0
你可以这样把你的数组变成一维的:
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
a = np.array([0, 2, 5, 2])
b = np.array([0, 5, 0, 2])
c = np.array([2, 2, 0, 0])
d = np.array([5, 2, 5, 0])
age_a = np.array([5, 10, 7, 6])
age_b = np.array([3, 7, 11, 8])
age_c = np.array([15, 10, 17, 2])
age_d = np.array([2, 8, 12, 7])
color_a = np.array([0, 2, 1, 1])
color_b = np.array([1, 12, 0, 1])
color_c = np.array([0, 1, 1, 0])
color_d = np.array([1, 0, 0, 1])
data2 = {'name': [a, b, c, d], 'age': [age_a, age_b, age_c, age_d], 'color': [0, 1, 0, 1]}
new2 = pd.DataFrame.from_dict(data2)
print(new2)
x = new2.loc[:, new2.columns != 'color']
y = new2.loc[:, 'color']
x_flattened = np.array([np.concatenate((row['name'], row['age'])) for _, row in x.iterrows()])
y = np.array(y)
x_train, x_test, y_train, y_test = train_test_split(x_flattened, y, test_size=0.25, random_state=42)
from sklearn.linear_model import SGDClassifier
sgd_clf = SGDClassifier(random_state=42)
sgd_clf.fit(x_train, y_train)
sgd_clf.predict(x_test)
如果你想得到像下面这样的输出,那看起来是对的:
name age color
0 [0, 2, 5, 2] [5, 10, 7, 6] 0
1 [0, 5, 0, 2] [3, 7, 11, 8] 1
2 [2, 2, 0, 0] [15, 10, 17, 2] 0
3 [5, 2, 5, 0] [2, 8, 12, 7] 1