<p>好吧,我终于把代码搞定了。请参阅下面的解决方案:</p>
<pre><code># Data Preprocessing
# Import Libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# Import Dataset
dataset = pd.read_csv('Data2.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, 5].values
df_X = pd.DataFrame(X)
df_y = pd.DataFrame(y)
# Replace Missing Values
from sklearn.preprocessing import Imputer
imputer = Imputer(missing_values = 'NaN', strategy = 'mean', axis = 0)
imputer = imputer.fit(X[:, 3:5 ])
X[:, 3:5] = imputer.transform(X[:, 3:5])
# Encoding Categorical Data "Name"
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder_x = LabelEncoder()
X[:, 0] = labelencoder_x.fit_transform(X[:, 0])
# Encoding Categorical Data "University"
from sklearn.preprocessing import LabelEncoder
labelencoder_x1 = LabelEncoder()
X[:, 1] = labelencoder_x1.fit_transform(X[:, 1])
# Transform Name into a Matrix
onehotencoder1 = OneHotEncoder(categorical_features = [0])
X = onehotencoder1.fit_transform(X).toarray()
# Transform University into a Matrix
onehotencoder2 = OneHotEncoder(categorical_features = [6])
X = onehotencoder2.fit_transform(X).toarray()
</code></pre>