使用随机预测应用分层10倍交叉验证

#Importing Libraries import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import classification_report, confusion_matrix from sklearn.externals import joblib from sklearn.model_selection import KFold from sklearn.metrics import accuracy_score #Creating Dataset and including the first row by setting no header as input dataset = pd.read_csv('finalDataset.csv') #Renaming the columns #print('Shape of the dataset: ' + str(dataset.shape)) #print(dataset.head()) #Creating the dependent variable class factor = pd.factorize(dataset['DJ class']) definitions = factor[1] #print(definitions) #Splitting the data into independent and dependent variables X = dataset.iloc[:,3:1941].values y = dataset.iloc[:,0].values #print('The independent features set: ') #print(X[:5,:]) #print('The dependent variable: ') #print(y[:5]) # Creating the Training and Test set from data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, random_state = 30) # Feature Scaling scaler = StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) # Fitting Random Forest Classification to the Training set classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 40) classifier.fit(X_train, y_train) # Predicting the Test set results y_pred = classifier.predict(X_test) #Reverse factorize (converting y_pred from 0s,1s and 2s to Iris-setosa, Iris-versicolor and Iris-virginica reversefactor = dict(zip(range(1,11),definitions)) #print(reversefactor) y_test = np.vectorize(reversefactor.get)(y_test) y_pred = np.vectorize(reversefactor.get)(y_pred) # Making the Confusion Matrix print(pd.crosstab(y_test, y_pred, rownames=['Actual DJ'], colnames=['Predicted DJ'])) sk_report = classification_report( digits=6, y_true=y_test, y_pred=classifier.predict(X_test)) print(sk_report) # # print('accuracy_score', accuracy_score(y_test, classifier.predict(X_test))) cm = confusion_matrix(y_test, classifier.predict(X_test)) print(cm) # save the model to disk modelFilename = 'randomforestmodel.pkl' if (accuracy_score(y_test, classifier.predict(X_test))*100) > 75: joblib.dump(classifier, modelFilename) print("Saved model to disk")

0条回答

目前没有回答

相关问题更多 >

编程相关推荐

热门问题

热门文章