CSV |股价操纵问题

2024-06-16 11:11:35 发布

您现在位置:Python中文网/ 问答频道 /正文

各位!!我正在上this课程,遇到了一些问题。我遇到的问题是

df[f'{ticker}_{i}d'] = (df[ticker].shift(-i) - df[ticker]) / df[ticker]

您可以在def process_data_for_labels(ticker):函数中找到这一点。谁能告诉我发生了什么事?我准确地复制了他的代码,得到了同样的错误

import bs4 as bs
import requests
import pickle
import datetime as dt
import os
import pandas as pd
import pandas_datareader. data as web
import time
import matplotlib.pyplot as plt
from matplotlib import style
import numpy as np
from collections import Counter

style.use('dark_background')

def save_sp500_tickers():
    resp = requests.get('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
    soup = bs.BeautifulSoup(resp.text, 'lxml')
    table = soup.find('table', {'class':'wikitable sortable'})
    tickers = []
    for row in table.findAll('tr')[1:]:
        ticker = row.findAll('td')[0].text
        tickers.append(ticker.rstrip())

    with open("sp500tickers.pickle", "wb") as f:
        pickle.dump(tickers, f)
    
    print(tickers)
    return tickers

#save_sp500_tickers()


def get_data_from_yahoo(reload_sp500=False):
    if reload_sp500:
        tickers = save_sp500_tickers()

    else:
        with open("sp500tickers.pickle", "rb") as f:
            tickers = pickle.load(f)

    if not os.path.exists('stock_dfs'):
        os.makedirs('stock_dfs')

    start = dt.datetime(2015, 1, 1)
    end = dt.datetime(2020, 7, 1)

    for ticker in tickers:
        if not os.path.exists('stock_dfs/{ticker}.csv'):
            if '.' in ticker:
                ticker = ticker.replace('.', '-')
            time.sleep(1)
            print(ticker)
            df = web.DataReader(ticker, 'yahoo', start, end)
            df.to_csv('stock_dfs/{}.csv'.format(ticker))
        else:
            print(f'Already have {ticker}')

#get_data_from_yahoo()


def compile_data():
    with open("sp500tickers.pickle","rb") as f:
        tickers = pickle.load(f)

    main_df = pd.DataFrame()

    for count,ticker in enumerate(tickers):
        if '.' in ticker:
            ticker = ticker.replace('.', '-')
        df = pd.read_csv(f'stock_dfs/{ticker}.csv')
        df.set_index('Date', inplace=True)

        df.rename(columns={'Adj Close':ticker}, inplace=True)
        df.drop(['Open','High','Low','Close','Volume'],1,inplace=True)

        if main_df.empty:
            main_df = df
        else:
            main_df = main_df.join(df, how='outer')

        if count % 10 == 0:
            print(count)
    print(main_df.head())
    main_df.to_csv('sp500_joined_closes.csv')

#compile_data()


def visualize_data():
    df = pd.read_csv('sp500_joined_closes.csv')
    #df['AAPL'].plot()
    #plt.show()
    df_corr = df.corr()
    print(df_corr.head())

    data = df_corr.values
    fig = plt.figure()
    ax = fig.add_subplot(1,1,1)

    heatmap = ax.pcolor(data, cmap=plt.cm.RdYlGn)
    fig.colorbar(heatmap)
    ax.set_xticks(np.arange(data.shape[0]) + 0.5, minor=False)
    ax.set_yticks(np.arange(data.shape[1]) + 0.5, minor=False)

    ax.invert_yaxis()
    ax.xaxis.tick_top()

    column_labels = df_corr.columns
    row_labels = df_corr.index

    ax.set_xticklabels(column_labels)
    ax.set_yticklabels(row_labels)
    plt.xticks(rotation=90)
    heatmap.set_clim(-1, 1)

    plt.tight_layout()
    plt.show()

#visualize_data()



# Machine Learning
def process_data_for_labels(ticker):
    hm_days = 7
    df = pd.read_csv('sp500_joined_closes.csv', index_col=0)
    tickers = df.columns.values.tolist()
    df.fillna(0,inplace=True)

    for i in range(1, hm_days+1):
        df[f'{ticker}_{i}d'] = (df[ticker].shift(-i) - df[ticker]) / df[ticker]
    df.fillna(0, inplace=True)
    return tickers, df




def buy_sell_hold(*args):
    cols = [c for c in args]
    requirement = 0.2
    for col in cols:
        if col > requirement:
            return 1
        if col < -requirement:
            return -1
    return 0



def extract_featuresets(ticker):
    tickers, df = process_data_for_labels(ticker)
    end = [eval(f"df[f'{ticker}_{i}']") for i in range(1, 8)]
    df[f'{ticker}_target'] = list(map(
        buy_sell_hold, 
        [exec(f"df[f'{ticker}_{i}']") for i in range(1, 8)]
        ))

    vals = df[f'{ticker}_target'].values.tolist()
    str_vals = [str(i) for i in vals]
    print('Data spread: ', Counter(str_vals))

    df.fillna(0, inplace=True)
    df = df.replace([np.inf, -np.inf], np.nan)
    df.dropna(inplace=True)

    df_vals = df[[ticker for ticker in tickers]].pct_change()
    df_vals = df_vals.replace([np.inf, -np.inf], 0)
    df_vals.fillna(0, inplace=True)

    X = df_vals.values
    y = df['{ticker}_target'].values

    return X,y,df

extract_featuresets('APPL')

错误:

Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py", line 2646, in get_loc
    return self._engine.get_loc(key)
  File "pandas\_libs\index.pyx", line 111, in pandas._libs.index.IndexEngine.get_loc
  File "pandas\_libs\index.pyx", line 138, in pandas._libs.index.IndexEngine.get_loc
  File "pandas\_libs\hashtable_class_helper.pxi", line 1618, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas\_libs\hashtable_class_helper.pxi", line 1626, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'APPL'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "test.py", line 176, in <module>
    extract_featuresets('APPL')
  File "test.py", line 152, in extract_featuresets
    tickers, df = process_data_for_labels(ticker)
  File "test.py", line 132, in process_data_for_labels
    df[f'{ticker}_{i}d'] = (df[ticker].shift(-i) - df[ticker]) / df[ticker]
  File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py", line 2800, in __getitem__
    indexer = self.columns.get_loc(key)
  File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py", line 2648, in get_loc
    return self._engine.get_loc(self._maybe_cast_indexer(key))
  File "pandas\_libs\index.pyx", line 111, in pandas._libs.index.IndexEngine.get_loc
  File "pandas\_libs\index.pyx", line 138, in pandas._libs.index.IndexEngine.get_loc
  File "pandas\_libs\hashtable_class_helper.pxi", line 1618, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas\_libs\hashtable_class_helper.pxi", line 1626, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'APPL'

Tags: csvinimportpandasdffordataget
1条回答
网友
1楼 · 发布于 2024-06-16 11:11:35

您已经正确地确定了问题开始的位置,但是您需要继续遵循面包屑的指示

错误显示为“KeyError:'APPL'”,其中'APPL'是一个股票代码,是股票价格数据框'df'的列名/键(至少程序预期是这样)。但是在本例中,“df”不包含“APPL”的键/头。在使用“pd.read\u csv”读取csv文件的位置加载数据时,可能出现了问题?或者文件本身缺少数据

试着打开一个python终端并简单地加载CSV文件,这是您(或程序)所期望的吗

继续挖

相关问题 更多 >