从教程更改Python中的预测范围

2024-04-16 17:41:39 发布

您现在位置:Python中文网/ 问答频道 /正文

我可能是在这里跳进了Python的最深处,但是我从头到尾都遵循了一个tutorial,它工作得很好,我一般(我认为)理解。你知道吗

我试图用自己的数据重现我学到的东西。你知道吗

我已经得到了这项工作以及然而教程显示了预测线的绘图图生成的实际数据。你知道吗

我需要做什么改变才能预测,比如说提前28天,而不是在我已有的数据之上?你知道吗

这是我的代码(我说我的…主要来自教程!)你知道吗

from connectionstring import conn 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.offline as pyoff
import plotly.graph_objs as go
#import Keras
import keras
from keras.layers import Dense
from keras.models import Sequential
from keras.optimizers import Adam 
from keras.callbacks import EarlyStopping
from keras.utils import np_utils
from keras.layers import LSTM
from sklearn.model_selection import KFold, cross_val_score, train_test_split

params = ("20180801","20191002")
sqlString = "SELECT ODCDAT AS DATE, SUM(ODORDQ) AS DROPIN FROM mytable.mydb WHERE ODCDAT BETWEEN %s AND %s GROUP BY ODCDAT ORDER BY ODCDAT"
command = (sqlString % params)
SQL_Query = pd.read_sql_query(command, conn)

df = pd.DataFrame(SQL_Query, columns=['DATE','DROPIN'])

df['DATE'] = pd.to_datetime(df['DATE'], format='%Y%m%d')

print(df.head(10))

#new dataframe
df_diff = df.copy()
df_diff['prev_day'] = df_diff['DROPIN'].shift(1)
df_diff = df_diff.dropna()
df_diff['diff'] = (df_diff['DROPIN'] - df_diff['prev_day'])
df_diff.head(10)

print(df_diff)

#plot monthly sales diff
plot_data = [
    go.Scatter(
        x=df_diff['DATE'],
        y=df_diff['diff'],
    )
]
plot_layout = go.Layout(
        title='Daily Drop In Diff'
    )
fig = go.Figure(data=plot_data, layout=plot_layout)
pyoff.plot(fig)

#create dataframe for transformation from time series to supervised
df_supervised = df_diff.drop(['prev_day'],axis=1)

#adding lags
for inc in range(1,31):
    field_name = 'lag_' + str(inc)
    df_supervised[field_name] = df_supervised['diff'].shift(inc)
#drop null values
df_supervised = df_supervised.dropna().reset_index(drop=True)

print(df_supervised)

# Import statsmodels.formula.api
import statsmodels.formula.api as smf
# Define the regression formula
model = smf.ols(formula='diff ~ lag_1 + lag_2 + lag_3 + lag_4 + lag_5 + lag_6 + lag_7 + lag_8 + lag_9 + lag_10 + lag_11 + lag_12 + lag_13 + lag_14 + lag_15 + lag_16 + lag_17 + lag_18 + lag_19 + lag_20 + lag_21 + lag_22 + lag_23 + lag_24 + lag_24 + lag_25 + lag_26 + lag_27 + lag_28 + lag_29 + lag_30', data=df_supervised)
# Fit the regression
model_fit = model.fit()
# Extract the adjusted r-squared
regression_adj_rsq = model_fit.rsquared_adj
print(regression_adj_rsq)

#import MinMaxScaler and create a new dataframe for LSTM model
from sklearn.preprocessing import MinMaxScaler
df_model = df_supervised.drop(['DROPIN','DATE'],axis=1)
#split train and test set
train_set, test_set = df_model[0:-28].values, df_model[-28:].values

#apply Min Max Scaler
scaler = MinMaxScaler(feature_range=(-1, 1))
scaler = scaler.fit(train_set)
# reshape training set
train_set = train_set.reshape(train_set.shape[0], train_set.shape[1])
train_set_scaled = scaler.transform(train_set)
# reshape test set
test_set = test_set.reshape(test_set.shape[0], test_set.shape[1])
test_set_scaled = scaler.transform(test_set)

X_train, y_train = train_set_scaled[:, 1:], train_set_scaled[:, 0:1]
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test, y_test = test_set_scaled[:, 1:], test_set_scaled[:, 0:1]
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])

model = Sequential()
model.add(LSTM(4, batch_input_shape=(1, X_train.shape[1], X_train.shape[2]), stateful=True))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(X_train, y_train, nb_epoch=100, batch_size=1, verbose=1, shuffle=False)

y_pred = model.predict(X_test,batch_size=1)
#for multistep prediction, you need to replace X_test values with the predictions coming from t-1

#reshape y_pred
y_pred = y_pred.reshape(y_pred.shape[0], 1, y_pred.shape[1])
#rebuild test set for inverse transform
pred_test_set = []
for index in range(0,len(y_pred)):
    #print np.concatenate([y_pred[index],X_test[index]],axis=1)
    pred_test_set.append(np.concatenate([y_pred[index],X_test[index]],axis=1))
#reshape pred_test_set
pred_test_set = np.array(pred_test_set)
pred_test_set = pred_test_set.reshape(pred_test_set.shape[0], pred_test_set.shape[2])
#inverse transform
pred_test_set_inverted = scaler.inverse_transform(pred_test_set)

#create dataframe that shows the predicted sales
result_list = []
sales_dates = list(df[-29:].DATE)
act_sales = list(df[-29:].DROPIN)
for index in range(0,len(pred_test_set_inverted)):
    result_dict = {}
    result_dict['pred_value'] = int(pred_test_set_inverted[index][0] + act_sales[index])
    result_dict['DATE'] = sales_dates[index+1]
    result_list.append(result_dict)
df_result = pd.DataFrame(result_list)
#for multistep prediction, replace act_sales with the predicted sales

print(df_result)

#merge with actual sales dataframe
df_sales_pred = pd.merge(df,df_result,on='DATE',how='left')
#plot actual and predicted
plot_data = [
    go.Scatter(
        x=df_sales_pred['DATE'],
        y=df_sales_pred['DROPIN'],
        name='actual'
    ),
        go.Scatter(
        x=df_sales_pred['DATE'],
        y=df_sales_pred['pred_value'],
        name='predicted'
    )

]
plot_layout = go.Layout(
        title='Sales Prediction'
    )
fig = go.Figure(data=plot_data, layout=plot_layout)
pyoff.plot(fig)

这是我画的第二张图,我想在前面预测。。。你知道吗

enter image description here


Tags: fromtestimportdfdateindexmodelplot