Sklearn Tsne绘制时的IndexError错误

import pandas as pd import numpy as np import sklearn import matplotlib.pyplot as plt from sklearn.preprocessing import StandardScaler from sklearn.manifold import TSNE #Step 1 - Download the data dataframe_all = pd.read_csv('https://d396qusza40orc.cloudfront.net/predmachlearn/pml-training.csv') num_rows = dataframe_all.shape[0] #Step 2 - Clearn the data #count the number of missing elements (NaN) in each column counter_nan = dataframe_all.isnull().sum() counter_without_nan = counter_nan[counter_nan==0] #remove the columns with missing elements dataframe_all = dataframe_all[counter_without_nan.keys()] #remove the first 7 columns which contain no descriminative information dataframe_all = dataframe_all.ix[:,7:] #Step 3: Create feature vectors x = dataframe_all.ix[:,:-1].values standard_scalar = StandardScaler() x_std = standard_scalar.fit_transform(x) # t distributed stochastic neighbour embedding (t-SNE) visualization tsne = TSNE(n_components=2, random_state = 0) x_test_2d = tsne.fit_transform(x_std) #scatter plot the sample points among 5 classes markers=('s','d','o','^','v') color_map = {0:'red', 1:'blue', 2:'lightgreen', 3:'purple', 4:'cyan'} plt.figure() for idx, cl in enumerate(np.unique(x_test_2d)): plt.scatter(x=x_test_2d[cl, 0],y =x_test_2d[cl, 1], c=color_map[idx], marker=markers[idx], label=cl) plt.show()

1条回答

网友

1楼 · 发布于 2024-04-24 20:32:15

错误是由于以下行引起的：

plt.scatter(x_test_2d[cl, 0], x_test_2d[cl, 1], c=color_map[idx], marker=markers[idx])

在这里，cl可以采用整数值，也可以不采用整数值（来自np.unique(x_test_2d)），这会产生错误，例如cl采用的最后一个值是99.46295，然后使用：x_test_2d[cl, 0]，它会转换成x_test_2d[99.46295, 0]

定义一个变量y，保存类标签，然后使用：

# variable holding the classes
y = dataframe_all.classe.values
y = np.array([ord(i) for i in y])

#scatter plot the sample points among 5 classes
plt.figure()
plt.scatter(x_test_2d[:, 0], x_test_2d[:, 1], c = y)
plt.show()

完整代码：

import pandas as pd
import numpy as np
import sklearn 
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE

#Step 1 - Download the data
dataframe_all = pd.read_csv('https://d396qusza40orc.cloudfront.net/predmachlearn/pml-training.csv')
num_rows = dataframe_all.shape[0]

#Step 2 - Clearn the data
#count the number of missing elements (NaN) in  each column
counter_nan = dataframe_all.isnull().sum()
counter_without_nan = counter_nan[counter_nan==0]
#remove the columns with missing elements
dataframe_all = dataframe_all[counter_without_nan.keys()]
#remove the first 7 columns which contain no descriminative information
dataframe_all = dataframe_all.ix[:,7:]

#Step 3: Create feature vectors
x = dataframe_all.ix[:,:-1].values
standard_scalar = StandardScaler()
x_std = standard_scalar.fit_transform(x)

# t distributed stochastic neighbour embedding (t-SNE) visualization
tsne = TSNE(n_components=2, random_state = 0)
x_test_2d = tsne.fit_transform(x_std)

# variable holding the classes
y = dataframe_all.classe.values # you need this for the colors
y = np.array([ord(i) for i in y]) # convert letters to numbers

#scatter plot the sample points among 5 classes
plt.figure()
plt.scatter(x_test_2d[:, 0], x_test_2d[:, 1], c = y)
plt.show()

相关问题更多 >

编程相关推荐

热门问题

热门文章