我有个问题。我试图训练加密货币数据的神经网络,但当我得到数据时,我得到以下错误:
ValueError: Input contains infinity or a value too large for dtype('float64').
在这条线上:
train_x, train_y = preprocess_df(main_df)
这是我的密码:
def classify(current, future):
if (float(future) - float(current)) / float(current) >= 0.01:
return 1
else:
return 0
def preprocess_df(df):
df = df.drop('future', 1)
for col in df.columns:
if col != "target":
df[col] = df[col].pct_change()
df.dropna(inplace=True)
df[col] = preprocessing.scale(df[col].values)
df.dropna(inplace=True)
sequential_data = []
prev_days = deque(maxlen=SEQ_LEN)
for i in df.values:
prev_days.append([n for n in i[:-1]])
if len(prev_days) == SEQ_LEN:
sequential_data.append([np.array(prev_days), i[-1]])
#random.shuffle(sequential_data)
buys = []
sells = []
for seq, target in sequential_data:
if target == 0:
sells.append([seq, target])
elif target == 1:
buys.append([seq, target])
random.shuffle(buys)
random.shuffle(sells)
lower = min(len(buys), len(sells))
buys = buys[:lower]
sells = sells[:lower]
sequential_data = buys+sells
random.shuffle(sequential_data)
x = []
y = []
for seq, target in sequential_data:
x.append(seq)
y.append(target)
return np.array(x), y
main_df = pd.DataFrame()
ratios = ["BTC-USD"]
for ratio in ratios:
url="www.mysite.org/get_data.php?method=train&ratio=" + str(ratio) +"&period=" + PERIOD_TO_PREDICT
dataset = requests.get(url, verify=False).content
df = pd.read_csv(io.StringIO(dataset.decode('utf-8')), names=["time", "a", "b", "c", "d", "low", "high", "open", "close"])
df.rename(columns={"a": str(ratio)+"_a", "b": str(ratio) + "_b", "c": str(ratio) + "_c", "d": str(ratio) + "_d", "low": str(ratio) + "_low", "high": str(ratio) + "_high", "open": str(ratio) + "_open", "close": str(ratio) + "_close"}, inplace=True)
df.set_index("time", inplace=True)
df = df[[str(ratio) + "_a", str(ratio) + "_b", str(ratio) + "_c", str(ratio) + "_d", str(ratio) + "_low", str(ratio) + "_high", str(ratio) + "_open", str(ratio) + "_close"]]
if len(main_df) == 0:
main_df = df
else:
main_df = main_df.join(df)
main_df['future'] = main_df[str(RATIO_TO_PREDICT) + "_close"].shift(-FUTURE_PERIOD_PREDICT)
main_df['target'] = list(map(classify, main_df[str(RATIO_TO_PREDICT) + "_close"], main_df["future"]))
#print(main_df[[str(RATIO_TO_PREDICT) + "_close", "future", "target"]].head(10))
times = sorted(main_df.index.values)
last_5pct = times[-int(0.05*len(times))]
validation_main_df = main_df[(main_df.index >= last_5pct)]
main_df = main_df[(main_df.index < last_5pct)]
train_x, train_y = preprocess_df(main_df)
validation_x, validation_y = preprocess_df(validation_main_df)
以下是csv数据:
1549192500,8,82,0,18,108.10,108.38,108.38,108.15 /r/l1549193400,12,66,24,10,107.82,108.23,108.13,107.86 /r/l1549194300,13,65,4,31,107.72,108.18,107.86,108.16 /r/l1549195200,10,61,0,39,107.76,108.12,108.12,107.90 /r/l1549196100,12,59,0,41,107.72,108.13,107.89,108.13 /r/l1549197000,6,0,81,19,108.06,108.27,108.10,108.10 /r/l1549197900,4,0,47,53,108.03,108.18,108.11,108.11 /r/l1549198800,8,93,7,0,108.13,108.40,108.13,108.38 /r/l1549199700,7,48,32,20,108.20,108.45,108.37,108.25 /r/l1549200600,15,51,11,38,107.78,108.31,108.25,107.98 /r/l1549201500,10,35,62,3,107.93,108.27,107.94,108.06 /r/l1549202400,8,7,21,72,107.85,108.14,108.06,108.08 /r/l1549203300,4,7,36,57,108.01,108.15,108.10,108.09 /r/l1549204200,9,74,10,16,107.81,108.12,108.09,107.86 /r/l1549205100,14,78,4,18,107.41,107.90,107.88,107.50 /r/l1549206000,15,98,0,2,107.00,107.53,107.53,107.01 /r/l1549206900,29,72,5,23,106.82,107.80,107.04,107.75 /r/l1549207800,12,37,54,9,107.53,107.94,107.72,107.57 /r/l1549208700,10,12,24,64,107.33,107.67,107.59,107.55 /r/l1549209600,17,44,2,54,107.28,107.85,107.59,107.84 /r/l1549210500,7,21,67,12,107.76,108.00,107.84,107.79 /r/l1549211400,6,41,27,32,107.72,107.94,107.79,107.88 /r/l1549212300,4,40,47,13,107.84,107.99,107.86,107.92 /r/l1549213200,5,67,17,16,107.77,107.95,107.92,107.80 /r/l1549214100,3,23,54,23,107.75,107.88,107.81,107.78 /r/l1549215000,5,25,15,60,107.59,107.79,107.76,107.71 /r/l1549215900,9,69,28,3,107.48,107.80,107.71,107.49 /r/l1549216800,4,33,20,47,107.45,107.60,107.52,107.57 /r/l1549217700,4,40,13,47,107.50,107.65,107.57,107.63 /r/l1549218600,6,48,19,33,107.45,107.66,107.62,107.52 /r/l
我做错什么了?你知道吗
这行是因为
ValueError
:pct_change()
函数计算(Current Previous)/Previous。在您的数据中,目标数据之一是0,这会导致它被0除,从而导致无限的百分比变化。当它到达这条线时:它不接受NaN或inf数字。你知道吗
在进行预处理之前,您可以通过移除数据帧中的所有无限值来解决这个问题。你知道吗
相关问题 更多 >
编程相关推荐