KeyError：使用Featu的“轴中不包含标签['Adj Close']”

ticker_input = input('Which stock ticker would you like to predict?') # Start with CLVS for testing print('Getting the historical data for: ',ticker_input) # Downloading historical data as dataframe from datetime import datetime from pandas_datareader import data as web import pandas as pd ex = 'yahoo' start = datetime(2010, 1, 1) end = datetime.now() df = web.DataReader(ticker_input, ex, start, end).reset_index() # Create the prediction dataset df = df.drop(['Close'],axis=1) df['PrevHi'] = df['High'].shift(1) df['PrevLo'] = df['Low'].shift(1) df['PrevClose'] = df['Adj Close'].shift(1) df['PrevVol'] = df['Volume'].shift(1) df['PrevOpen'] = df['Open'].shift(1) df = df.drop(['High','Low','Volume'],axis=1) # Get the 9 and 20 MA values df['9MA'] = df['Open'].rolling(window=9).mean() df['20MA'] = df['Open'].rolling(window=20).mean() import time # Reshape the df df2 = df[['Date','Open','PrevOpen','PrevHi','PrevLo','PrevClose','PrevVol','9MA','20MA','Adj Close']] df2.dropna(how='all') # THIS DROP ISN'T DROPPING ROWS W/ BLANK VALUES FOR SOME REASON??? # Auto Feature Engineering using Feature Tools import featuretools as ft #print(ft.list_primitives().to_string()) # To get full list of primitives that could be used print('Adding the engineered features to the dataframe. This may take a while...') es = ft.EntitySet(id = 'stockdata') es.entity_from_dataframe(entity_id = 'data', dataframe = df2, make_index = False,index = 'Date') # Run deep feature synthesis with transformation primitives feature_matrix, feature_defs = ft.dfs(entityset = es, target_entity = 'data', max_depth=2,verbose=True, agg_primitives = ['skew','mean','median', 'all','count','num_unique','trend','max','mode', 'std','sum','min'], trans_primitives = ['divide_numeric']) # 'diff', # 'greater_than', # 'less_than_equal_to', # 'cum_mean', # 'time_since', # 'cum_sum', # 'add_numeric', # 'multiply_numeric', # 'greater_than_equal_to', # 'negate', # 'cum_min', # 'subtract_numeric', # 'not', # 'cum_count', # 'modulo_numeric', # 'less_than']) print(feature_matrix.head()) df2 = feature_matrix df2.to_csv('FeatureMatrix.csv') # Trying to now name all the feature columns and label for FeatureSelector... features = df2.drop(['Adj Close'],axis=1) label = df2['Adj Close'].values # Now, drop all columns of low importance from feature_selector import FeatureSelector fs = FeatureSelector(data = features, labels = label) fs.identify_all(selection_params = {'missing_threshold': 0.6, 'correlation_threshold': 0.98, 'task': 'regression', 'eval_metric': 'mse', 'cumulative_importance': 0.99}) df2 = fs.remove(methods = 'all') # Somewhere above it's not recognizing my Adj Close label anymore? # Training dataset df = df2.iloc[:-90] # subtracting 90 rows/days to use as the predictions dataset later print('Printing training dataframe...') print(df) # Prediction dataset for later use prediction_df = df2.iloc[-90:] print('Printing prediction dataframe for later use...') print(prediction_df) # Can keep adding to the dataset with things like PrevIndustryHi,Lo,Close,Open and other metrics print('Pausing for 20 seconds to review before training...') time.sleep(20) # Now, train a TPOT Regressor from tpot import TPOTRegressor from sklearn.model_selection import train_test_split import os features = df.drop(['Adj Close'],axis=1) label = df['Adj Close'] X_train, X_test, y_train, y_test = train_test_split(features, label, train_size=0.75, test_size=0.25) # Create a folder to cache the pipeline work (use if not using auto) # if os.path.exists('./PipelineCache'): # pass # else: # os.mkdir('./PipelineCache') tpot = TPOTRegressor(generations=10, population_size=40, verbosity=2) #memory='./PipelineCache', memory='auto', tpot.fit(X_train, y_train) predictions = (tpot.predict(X_test)) actuals = y_test last_row = df.tail(1) print('The last closing price was :') print(last_row['Adj Close']) print("TPOT's final score on training data is : ") print(tpot.score(X_test, y_test)) if os.path.exists('./Exported Pipelines'): pass else: os.mkdir('./Exported Pipelines') tpot.export('./Exported Pipelines/1day-prediction-pipeline.py') # Now, use the TPOT model to predict on the held out predictions dataset from sklearn.metrics import mean_squared_error features = prediction_df.drop(['Adj Close'], axis=1) labels = prediction_df['Adj Close'] # Fit the model to the prediction_df and predict the labels #tpot.fit(features, labels) results = tpot.predict(features) predictions_list = [] for preds in results: predictions_list.append(preds) prediction_df['Predictions'] = predictions_list prediction_df.to_csv('PredictionsPerformance.csv', index=True) print('The Mean Square Error of the predictions is :') print(mean_squared_error(labels,results)) print('DONE!') # Clear the cache directory when you don't need it anymore. # If you're testing the same dataset over and over, use the # same cache file #from shutil import rmtree #rmtree('./PipelineCache')

1条回答

网友

1楼 · 发布于 2024-04-20 03:40:30

作为一种解决方法，我只是在删除过程之后重新添加了df列，其中包含adj close，如下所示：

# Trying to now name all the feature columns and label for FeatureSelector...
features = df.drop("Adj Close", axis=1)
label = df["Adj Close"]
# Now, drop all columns of low importance
from feature_selector import FeatureSelector
fs = FeatureSelector(data = features, labels = label)
fs.identify_all(selection_params = {'missing_threshold': 0.6,    
                                    'correlation_threshold': 0.98, 
                                    'task': 'regression',    
                                    'eval_metric': 'mse', 
                                    'cumulative_importance': 0.99})
all_to_remove = fs.check_removal()
print(all_to_remove[:])
df = fs.remove(methods = 'all')

# Re-Add the Adj Close to the df because FeatureTools removes it once you assign it as the label for some reason
df['Adj Close'] = label

相关问题更多 >

编程相关推荐

热门问题

热门文章