我有一个Q-Learning程序,试图预测我的股票模拟市场,股票的价格是1-2-3-1-2-3。。。你知道吗
我已经试着调试这几天了,只是不能得到它。我甚至完全从零开始,问题依然存在。如果你有多余的时间,我只需要多看一眼。你知道吗
函数的作用是模拟股票价格。你知道吗
函数的作用是:获取股票并将其转换成一个put of[股票最后一次上涨还是下跌,连续下跌/上涨多少次,连续上涨/下跌多少次]
readAI()函数只读取给定输入时应该发生的事情
函数的作用是:检查先前的猜测,并根据猜测是否正确来更改policyGradient。你知道吗
非常感谢!你知道吗
import requests
import sys
import time
# Constants
learningRate = 0.5
stocksToBuy = 250
discountFactor = 0.5
# Variables declared:
# getStock()
currentStockPrice = 0
pastStockPrice = 0
# reducePricesToBinary()
binaryVersionOfPrices = ""
# Ai()
AI = dict()
# convertBinaryToInputs()
inputsForAI = [0,0,0]
# Ai
guess = 0
oldGuess = 0
reward = 0
pastInputsForAI = ['0',0,0]
firstTurnOver = False
# Buying and Selling stocks
money = 1000000
shares = 0
#
countToSaveEveryFifteen = 0
# Saving anything to a file.
def save(name, data):
with open(name, 'w') as f:
f.write(str(data))
def saveEverything():
save("AI", AI)
save("binaryStockPrices", binaryVersionOfPrices)
save("money", money)
save("shares", shares)
# Runs after an error.
def onExit():
saveEverything()
sys.exit()
# Prints and saves an error log if a function crashes.
def crashProgram(errorMessage):
print(errorMessage)
with open("crashLogs", 'w') as f:
f.write("{}\n\n".format(errorMessage))
onExit()
# Runs a function with try catches to catch an error.
def doFunction(function):
try:
function()
except Exception, e:
crashProgram("Fatal error running {}().\n{}".format(function.__name__, e))
# Gets the current stock value.
#def getStock():
# global currentStockPrice
# res = requests.get("https://markets.businessinsider.com/stocks/aapl-stock")
# stockCostString = ""
# for x in range (9):
# stockCostString += res.text[res.text.find('"price": "')+10 + x]
# currentStockPrice = float(stockCostString)
# print(currentStockPrice)
def getStock():
global currentStockPrice
currentStockPrice = 1 if currentStockPrice == 3 else (2 if currentStockPrice == 1 else 3)
# Turns the prices into 0's and 1's.
def reducePricesToBinary():
global pastStockPrice
global binaryVersionOfPrices
binaryString = "1" if currentStockPrice > pastStockPrice else "0" if currentStockPrice < pastStockPrice else ""
binaryVersionOfPrices += binaryString
pastStockPrice = currentStockPrice
# Converts the binaryStockPrices to inputs for the AI.
def convertBinaryToInputs():
global inputsForAI
inputsForAI[0] = binaryVersionOfPrices[len(binaryVersionOfPrices)-1]
counterOfFirstNumber = 1
counterOfSecondNumber = 1
while(binaryVersionOfPrices[len(binaryVersionOfPrices) - counterOfFirstNumber] == inputsForAI[0]):
counterOfFirstNumber+=1
counterOfFirstNumber-=1
while(binaryVersionOfPrices[len(binaryVersionOfPrices) - counterOfFirstNumber - counterOfSecondNumber]!=inputsForAI[0]):
counterOfSecondNumber += 1
counterOfSecondNumber-=1
inputsForAI[0] = binaryVersionOfPrices[len(binaryVersionOfPrices)-1]
inputsForAI[1] = counterOfFirstNumber
inputsForAI[2] = counterOfSecondNumber
# AI functions
def readAI():
global guess
try:
AIGuess = AI[inputsForAI[0], inputsForAI[1], inputsForAI[2]]
except:
AI[inputsForAI[0], inputsForAI[1], inputsForAI[2]] = 0.5
AIGuess = 0.5
guess = AIGuess
print("GUESS: {}".format(guess))
print("INPUTS: {}".format(inputsForAI))
return guess
def checkGuess():
global firstTurnOver
if(firstTurnOver):
global oldGuess
global reward
global pastInputsForAI
oldGuess = 0 if oldGuess == -1 else 1
print("Old guess: " + str(oldGuess) + " Input: " + str(int(round(float(inputsForAI[0])))))
reward = 1 if oldGuess == int(round(float(inputsForAI[0]))) else -1
AI[pastInputsForAI[0], pastInputsForAI[1], pastInputsForAI[2]] = (1-learningRate) * AI[pastInputsForAI[0], pastInputsForAI[1], pastInputsForAI[2]] + learningRate * (reward + discountFactor * 1)
oldGuess = int(round(float(guess)))
pastInputsForAI = inputsForAI
firstTurnOver = True
def buySellStocks():
global money
global shares
oldStocks = shares
if(guess > 0):
while(money > currentStockPrice and (shares - oldStocks) < stocksToBuy * guess):
money -= currentStockPrice
shares += 1
else:
while(shares > 0 and (oldStocks - shares) > stocksToBuy * guess):
money += currentStockPrice
shares -= 1
# Loads the binaryVersionOfPrices from a file.
def loadBinaryPrices():
global binaryVersionOfPrices
with open("binaryStockPrices", 'r') as f:
binaryVersionOfPrices = f.read()
def loadMoney():
global money
with open("money", 'r') as f:
money = int(f.read())
def loadShares():
global shares
with open("shares", 'r') as f:
shares = int(f.read())
# Loads the AI from a file.
def loadAI():
global AI
with open("AI", 'r') as f:
AI = eval(f.read())
#Prints relative information
def printStuff():
print("Stock price: {}\nCurrent balance: {}\nCurrent shares: {}\nTotal value: {}\nGuess: {}\n".format(currentStockPrice, money, shares, money + shares * currentStockPrice, guess))
# Loads all variables from files.
def onProgramStart():
doFunction(loadAI)
doFunction(loadBinaryPrices)
doFunction(loadMoney)
doFunction(loadShares)
# Saves every 15 checks
def saveEveryFifteen():
global countToSaveEveryFifteen
countToSaveEveryFifteen += 1
if(countToSaveEveryFifteen == 15):
saveEverything()
countToSaveEveryFifteen = 0
# Runs all functions.
def doAllFunctions():
doFunction(reducePricesToBinary)
doFunction(convertBinaryToInputs)
doFunction(readAI)
doFunction(checkGuess)
doFunction(buySellStocks)
doFunction(saveEveryFifteen)
doFunction(printStuff)
doFunction(getStock)
# Loads variables from files.
onProgramStart()
# Repeats the process.
while(1):
doAllFunctions()
time.sleep(0.5)
正如我在评论中提到的,以下是经过一些基本重构后的程序版本:
在校正策略梯度中的策略时,我使用的是一个周期前的输入,并通过按顺序调用函数来进行过度补偿,因为它已经使用了一个周期前的输入,有效地使梯度由两个输入关闭。因为我在3的循环输入它使它看起来像一个“关闭一”的错误,而实际上我是关闭了两个,使它很难检测。你知道吗
相关问题 更多 >
编程相关推荐