Python、机器学习和线性回归

# to get in-line plots %matplotlib inline import matplotlib.pyplot as plt import numpy as np import scipy as sp from scipy import stats # Load the data IDnumber = 0000001 np.random.seed(IDnumber) filename = "ccpp_Data_clean2018.csv" Data = np.genfromtxt(filename, delimiter=';',skip_header=1) dataDescription = stats.describe(Data) print(dataDescription) Data.shape #get number of total samples num_total_samples = Data.shape[0] print("Total number of samples: "+str(num_total_samples)) #size of each chunk of data for training, validation, testing size_chunk = int(num_total_samples/3.) print("Size of each chunk of data: "+str(size_chunk)) #shuffle the data np.random.shuffle(Data) #training data X_training = np.delete(Data[:size_chunk], 4, 1) Y_training = Data[:size_chunk, 4] print("Training data input size: "+str(X_training.shape)) print("Training data output size: "+str(Y_training.shape)) #validation data, to be used to choose among different models X_validation = np.delete(Data[size_chunk:size_chunk*2], 4, 1) Y_validation = Data[size_chunk:size_chunk*2, 4] print("Validation data input size: "+str(X_validation.shape)) print("Validation data ouput size: "+str(Y_validation.shape)) #test data, to be used to estimate the true loss of the final model(s) X_test = np.delete(Data[size_chunk*2:num_total_samples], 4, 1) Y_test = Data[size_chunk*2: num_total_samples, 4] print("Test data input size: "+str(X_test.shape)) print("Test data output size: "+str(Y_test.shape)) #scale the data # standardize the input matrix from sklearn import preprocessing scaler = preprocessing.StandardScaler().fit(X_training) X_training = scaler.transform(X_training) print("Mean of the training input data:"+str(X_training.mean(axis=0))) print("Std of the training input data:"+str(X_training.std(axis=0))) X_validation = scaler.transform(X_validation) # use the same transformation on validation data print("Mean of the validation input data:"+str(X_validation.mean(axis=0))) print("Std of the validation input data:"+str(X_validation.std(axis=0))) X_test = scaler.transform(X_test) # use the same transformation on test data print("Mean of the test input data:"+str(X_test.mean(axis=0))) print("Std of the test input data:"+str(X_test.std(axis=0))) #compute linear regression coefficients for training data #add a 1 at the beginning of each sample for training, validation, and testing m_training = # COMPLETE: NUMBER OF POINTS IN THE TRAINING SET X_training = np.hstack((np.ones((m_training,1)),X_training)) m_validation = # COMPLETE: NUMBER OF POINTS IN THE VALIDATION SET X_validation = np.hstack((np.ones((m_validation,1)),X_validation)) m_test = # COMPLETE: NUMBER OF POINTS IN THE TEST SET X_test = np.hstack((np.ones((m_test,1)),X_test)) # Compute the coefficients for linear regression (LR) using linalg.lstsq w_np, RSStr_np, rank_X_tr, sv_X_tr = #COMPLETE print("LR coefficients with numpy lstsq: "+ str(w_np)) # compute Residual sums of squares by hand print("RSS with numpy lstsq: "+str(RSStr_np)) print("Empirical risk with numpy lstsq:"+str(RSStr_np/m_training))

#compute predictions on training set, validation set, and test set prediction_training = # COMPLETE prediction_validation = # COMPLETE prediction_test = # COMPLETE #what about the RSS and loss for points in the validation data? RSS_validation =# COMPLETE RSS_test = # COMPLETE print("RSS on validation data: "+str(RSS_validation)) print("Loss estimated from validation data:"+str(RSS_validation/m_validation)) #another measure of how good our linear fit is given by the following (that is 1 - R^2) #compute 1 -R^2 for training, validation, and test set Rmeasure_training = #COMPLETE Rmeasure_validation = #COMPLETE Rmeasure_test = #COMPLETE

1条回答

网友
1楼 · 发布于 2024-04-19 08:10:49

你可以用
m_training=len(X_training)
但更好的方法确实是使用形状
X_training.shape
它将返回一个元组（m，n），其中m是行数，n是列数。那么
m_training = X_training.shape[0]
就是你要找的。实际上，为了在数据的第一行中添加一列1，您需要指明行数。你知道吗
对于函数linalg.lstsq公司您可以查看以下示例： https://docs.scipy.org/doc/numpy-1.15.0/reference/generated/numpy.linalg.lstsq.html
在您的情况下，应该是：
linalg.lstsq(X_training,y_training)

相关问题更多 >

编程相关推荐

热门问题

热门文章