import pandas as pd
import numpy as np
from sklearn import datasets, linear_model
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
from scipy import stats
diabetes = datasets.load_diabetes()
X = diabetes.data
y = diabetes.target
X2 = sm.add_constant(X)
est = sm.OLS(y, X2)
est2 = est.fit()
print(est2.summary())
from sklearn import linear_model
from scipy import stats
import numpy as np
class LinearRegression(linear_model.LinearRegression):
"""
LinearRegression class after sklearn's, but calculate t-statistics
and p-values for model coefficients (betas).
Additional attributes available after .fit()
are `t` and `p` which are of the shape (y.shape[1], X.shape[1])
which is (n_features, n_coefs)
This class sets the intercept to 0 by default, since usually we include it
in X.
"""
def __init__(self, *args, **kwargs):
if not "fit_intercept" in kwargs:
kwargs['fit_intercept'] = False
super(LinearRegression, self)\
.__init__(*args, **kwargs)
def fit(self, X, y, n_jobs=1):
self = super(LinearRegression, self).fit(X, y, n_jobs)
sse = np.sum((self.predict(X) - y) ** 2, axis=0) / float(X.shape[0] - X.shape[1])
se = np.array([
np.sqrt(np.diagonal(sse[i] * np.linalg.inv(np.dot(X.T, X))))
for i in range(sse.shape[0])
])
self.t = self.coef_ / se
self.p = 2 * (1 - stats.t.cdf(np.abs(self.t), y.shape[0] - X.shape[1]))
return self
编辑:可能不是正确的方法,请参见注释
您可以使用sklearn.feature_selection.f_regression。
click here for the scikit-learn page
这有点过分了,但我们试试看。首先让我们使用statsmodel找出p值应该是什么
我们得到
好吧,让我们复制一下。这有点过头了,因为我们几乎是在用矩阵代数再现线性回归分析。但这到底是怎么回事。
这给了我们。
所以我们可以从statsmodel中复制值。
scikit learn的LinearRegression不计算此信息,但您可以轻松地扩展类以执行此操作:
从here偷来的。
对于Python中的这种统计分析,您应该看看statsmodels。
相关问题 更多 >
编程相关推荐