我正在做一个程序,拟合一个分段线性回归,数据中最多有4-5个断点,然后决定有多少断点是防止过度拟合和欠拟合的最佳方法。然而,我的代码运行起来非常慢,这是由于它是多么的笨拙。你知道吗
我的代码草稿是:
import numpy as np
import pandas as pd
from scipy.optimize import curve_fit, differential_evolution
import matplotlib.pyplot as plt
import warnings
def segmentedRegression_two(xData,yData):
def func(xVals,break1,break2,slope1,offset1,slope_mid,offset_mid,slope2,offset2):
returnArray=[]
for x in xVals:
if x < break1:
returnArray.append(slope1 * x + offset1)
elif (np.logical_and(x >= break1,x<break2)):
returnArray.append(slope_mid * x + offset_mid)
else:
returnArray.append(slope2 * x + offset2)
return returnArray
def sumSquaredError(parametersTuple): #Definition of an error function to minimize
model_y=func(xData,*parametersTuple)
warnings.filterwarnings("ignore") # Ignore warnings by genetic algorithm
return np.sum((yData-model_y)**2.0)
def generate_genetic_Parameters():
initial_parameters=[]
x_max=np.max(xData)
x_min=np.min(xData)
y_max=np.max(yData)
y_min=np.min(yData)
slope=10*(y_max-y_min)/(x_max-x_min)
initial_parameters.append([x_max,x_min]) #Bounds for model break point
initial_parameters.append([x_max,x_min])
initial_parameters.append([-slope,slope])
initial_parameters.append([-y_max,y_min])
initial_parameters.append([-slope,slope])
initial_parameters.append([-y_max,y_min])
initial_parameters.append([-slope,slope])
initial_parameters.append([y_max,y_min])
result=differential_evolution(sumSquaredError,initial_parameters,seed=3)
return result.x
geneticParameters = generate_genetic_Parameters() #Generates genetic parameters
fittedParameters, pcov= curve_fit(func, xData, yData, geneticParameters) #Fits the data
print('Parameters:', fittedParameters)
model=func(xData,*fittedParameters)
absError = model - yData
SE = np.square(absError)
MSE = np.mean(SE)
RMSE = np.sqrt(MSE)
Rsquared = 1.0 - (np.var(absError) / np.var(yData))
return Rsquared
def segmentedRegression_three(xData,yData):
def func(xVals,break1,break2,break3,slope1,offset1,slope2,offset2,slope3,offset3,slope4,offset4):
returnArray=[]
for x in xVals:
if x < break1:
returnArray.append(slope1 * x + offset1)
elif (np.logical_and(x >= break1,x<break2)):
returnArray.append(slope2 * x + offset2)
elif (np.logical_and(x >= break2,x<break3)):
returnArray.append(slope3 * x + offset3)
else:
returnArray.append(slope4 * x + offset4)
return returnArray
def sumSquaredError(parametersTuple): #Definition of an error function to minimize
model_y=func(xData,*parametersTuple)
warnings.filterwarnings("ignore") # Ignore warnings by genetic algorithm
return np.sum((yData-model_y)**2.0)
def generate_genetic_Parameters():
initial_parameters=[]
x_max=np.max(xData)
x_min=np.min(xData)
y_max=np.max(yData)
y_min=np.min(yData)
slope=10*(y_max-y_min)/(x_max-x_min)
initial_parameters.append([x_max,x_min]) #Bounds for model break point
initial_parameters.append([x_max,x_min])
initial_parameters.append([x_max,x_min])
initial_parameters.append([-slope,slope])
initial_parameters.append([-y_max,y_min])
initial_parameters.append([-slope,slope])
initial_parameters.append([-y_max,y_min])
initial_parameters.append([-slope,slope])
initial_parameters.append([y_max,y_min])
initial_parameters.append([-slope,slope])
initial_parameters.append([y_max,y_min])
result=differential_evolution(sumSquaredError,initial_parameters,seed=3)
return result.x
geneticParameters = generate_genetic_Parameters() #Generates genetic parameters
fittedParameters, pcov= curve_fit(func, xData, yData, geneticParameters) #Fits the data
print('Parameters:', fittedParameters)
model=func(xData,*fittedParameters)
absError = model - yData
SE = np.square(absError)
MSE = np.mean(SE)
RMSE = np.sqrt(MSE)
Rsquared = 1.0 - (np.var(absError) / np.var(yData))
return Rsquared
def segmentedRegression_four(xData,yData):
def func(xVals,break1,break2,break3,break4,slope1,offset1,slope2,offset2,slope3,offset3,slope4,offset4,slope5,offset5):
returnArray=[]
for x in xVals:
if x < break1:
returnArray.append(slope1 * x + offset1)
elif (np.logical_and(x >= break1,x<break2)):
returnArray.append(slope2 * x + offset2)
elif (np.logical_and(x >= break2,x<break3)):
returnArray.append(slope3 * x + offset3)
elif (np.logical_and(x >= break3,x<break4)):
returnArray.append(slope4 * x + offset4)
else:
returnArray.append(slope5 * x + offset5)
return returnArray
def sumSquaredError(parametersTuple): #Definition of an error function to minimize
model_y=func(xData,*parametersTuple)
warnings.filterwarnings("ignore") # Ignore warnings by genetic algorithm
return np.sum((yData-model_y)**2.0)
def generate_genetic_Parameters():
initial_parameters=[]
x_max=np.max(xData)
x_min=np.min(xData)
y_max=np.max(yData)
y_min=np.min(yData)
slope=10*(y_max-y_min)/(x_max-x_min)
initial_parameters.append([x_max,x_min]) #Bounds for model break point
initial_parameters.append([x_max,x_min])
initial_parameters.append([x_max,x_min])
initial_parameters.append([x_max,x_min])
initial_parameters.append([-slope,slope])
initial_parameters.append([-y_max,y_min])
initial_parameters.append([-slope,slope])
initial_parameters.append([-y_max,y_min])
initial_parameters.append([-slope,slope])
initial_parameters.append([y_max,y_min])
initial_parameters.append([-slope,slope])
initial_parameters.append([y_max,y_min])
initial_parameters.append([-slope,slope])
initial_parameters.append([y_max,y_min])
result=differential_evolution(sumSquaredError,initial_parameters,seed=3)
return result.x
geneticParameters = generate_genetic_Parameters() #Generates genetic parameters
fittedParameters, pcov= curve_fit(func, xData, yData, geneticParameters) #Fits the data
print('Parameters:', fittedParameters)
model=func(xData,*fittedParameters)
absError = model - yData
SE = np.square(absError)
MSE = np.mean(SE)
RMSE = np.sqrt(MSE)
Rsquared = 1.0 - (np.var(absError) / np.var(yData))
return Rsquared
从这里开始,到目前为止,我的想法是这样的:
r2s=[segmentedRegression_two(xData,yData),segmentedRegression_three(xData,yData),segmentedRegression_four(xData,yData)]
best_fit=np.max(r2s)
尽管我可能需要使用AIC或其他什么。你知道吗
有什么方法可以让我的跑步更有效率吗?你知道吗
我抓起你的一个
func
,把它放在一个测试脚本中:然后我开始了“矢量化”的第一步,用值块来评估函数,而不是逐个元素。你知道吗
allclose
测试打印True
。我还ran
将其放入ipython
,并对两个版本进行计时。你知道吗我还做了
plt.plot(xVals, ret)
以查看函数的简单绘图。你知道吗我写
func1
的目的是让它适用于你的3个案例。它不存在,但根据输入列表(或数组)的长度进行更改应该不难。你知道吗我相信可以做更多的事情,但这应该从正确的方向开始。你知道吗
还有一个
numpy
piecewise
计算器:但我认为构建这两个输入列表将是同样多的工作。你知道吗
相关问题 更多 >
编程相关推荐