QLearning-AI并不能识别简单的模式

import requests import sys import time # Constants learningRate = 0.5 stocksToBuy = 250 discountFactor = 0.5 # Variables declared: # getStock() currentStockPrice = 0 pastStockPrice = 0 # reducePricesToBinary() binaryVersionOfPrices = "" # Ai() AI = dict() # convertBinaryToInputs() inputsForAI = [0,0,0] # Ai guess = 0 oldGuess = 0 reward = 0 pastInputsForAI = ['0',0,0] firstTurnOver = False # Buying and Selling stocks money = 1000000 shares = 0 # countToSaveEveryFifteen = 0 # Saving anything to a file. def save(name, data): with open(name, 'w') as f: f.write(str(data)) def saveEverything(): save("AI", AI) save("binaryStockPrices", binaryVersionOfPrices) save("money", money) save("shares", shares) # Runs after an error. def onExit(): saveEverything() sys.exit() # Prints and saves an error log if a function crashes. def crashProgram(errorMessage): print(errorMessage) with open("crashLogs", 'w') as f: f.write("{}\n\n".format(errorMessage)) onExit() # Runs a function with try catches to catch an error. def doFunction(function): try: function() except Exception, e: crashProgram("Fatal error running {}().\n{}".format(function.__name__, e)) # Gets the current stock value. #def getStock(): # global currentStockPrice # res = requests.get("https://markets.businessinsider.com/stocks/aapl-stock") # stockCostString = "" # for x in range (9): # stockCostString += res.text[res.text.find('"price": "')+10 + x] # currentStockPrice = float(stockCostString) # print(currentStockPrice) def getStock(): global currentStockPrice currentStockPrice = 1 if currentStockPrice == 3 else (2 if currentStockPrice == 1 else 3) # Turns the prices into 0's and 1's. def reducePricesToBinary(): global pastStockPrice global binaryVersionOfPrices binaryString = "1" if currentStockPrice > pastStockPrice else "0" if currentStockPrice < pastStockPrice else "" binaryVersionOfPrices += binaryString pastStockPrice = currentStockPrice # Converts the binaryStockPrices to inputs for the AI. def convertBinaryToInputs(): global inputsForAI inputsForAI[0] = binaryVersionOfPrices[len(binaryVersionOfPrices)-1] counterOfFirstNumber = 1 counterOfSecondNumber = 1 while(binaryVersionOfPrices[len(binaryVersionOfPrices) - counterOfFirstNumber] == inputsForAI[0]): counterOfFirstNumber+=1 counterOfFirstNumber-=1 while(binaryVersionOfPrices[len(binaryVersionOfPrices) - counterOfFirstNumber - counterOfSecondNumber]!=inputsForAI[0]): counterOfSecondNumber += 1 counterOfSecondNumber-=1 inputsForAI[0] = binaryVersionOfPrices[len(binaryVersionOfPrices)-1] inputsForAI[1] = counterOfFirstNumber inputsForAI[2] = counterOfSecondNumber # AI functions def readAI(): global guess try: AIGuess = AI[inputsForAI[0], inputsForAI[1], inputsForAI[2]] except: AI[inputsForAI[0], inputsForAI[1], inputsForAI[2]] = 0.5 AIGuess = 0.5 guess = AIGuess print("GUESS: {}".format(guess)) print("INPUTS: {}".format(inputsForAI)) return guess def checkGuess(): global firstTurnOver if(firstTurnOver): global oldGuess global reward global pastInputsForAI oldGuess = 0 if oldGuess == -1 else 1 print("Old guess: " + str(oldGuess) + " Input: " + str(int(round(float(inputsForAI[0]))))) reward = 1 if oldGuess == int(round(float(inputsForAI[0]))) else -1 AI[pastInputsForAI[0], pastInputsForAI[1], pastInputsForAI[2]] = (1-learningRate) * AI[pastInputsForAI[0], pastInputsForAI[1], pastInputsForAI[2]] + learningRate * (reward + discountFactor * 1) oldGuess = int(round(float(guess))) pastInputsForAI = inputsForAI firstTurnOver = True def buySellStocks(): global money global shares oldStocks = shares if(guess > 0): while(money > currentStockPrice and (shares - oldStocks) < stocksToBuy * guess): money -= currentStockPrice shares += 1 else: while(shares > 0 and (oldStocks - shares) > stocksToBuy * guess): money += currentStockPrice shares -= 1 # Loads the binaryVersionOfPrices from a file. def loadBinaryPrices(): global binaryVersionOfPrices with open("binaryStockPrices", 'r') as f: binaryVersionOfPrices = f.read() def loadMoney(): global money with open("money", 'r') as f: money = int(f.read()) def loadShares(): global shares with open("shares", 'r') as f: shares = int(f.read()) # Loads the AI from a file. def loadAI(): global AI with open("AI", 'r') as f: AI = eval(f.read()) #Prints relative information def printStuff(): print("Stock price: {}\nCurrent balance: {}\nCurrent shares: {}\nTotal value: {}\nGuess: {}\n".format(currentStockPrice, money, shares, money + shares * currentStockPrice, guess)) # Loads all variables from files. def onProgramStart(): doFunction(loadAI) doFunction(loadBinaryPrices) doFunction(loadMoney) doFunction(loadShares) # Saves every 15 checks def saveEveryFifteen(): global countToSaveEveryFifteen countToSaveEveryFifteen += 1 if(countToSaveEveryFifteen == 15): saveEverything() countToSaveEveryFifteen = 0 # Runs all functions. def doAllFunctions(): doFunction(reducePricesToBinary) doFunction(convertBinaryToInputs) doFunction(readAI) doFunction(checkGuess) doFunction(buySellStocks) doFunction(saveEveryFifteen) doFunction(printStuff) doFunction(getStock) # Loads variables from files. onProgramStart() # Repeats the process. while(1): doAllFunctions() time.sleep(0.5)

2条回答

网友

1楼 · 编辑于 2024-05-13 22:16:52

正如我在评论中提到的，以下是经过一些基本重构后的程序版本：

import sys
import time

# constants
learning_rate: float = 0.5
stocks_to_buy: float = 250
discount_factor: float = 0.5

# variables declared:

# get_stock()
current_stock_price: int = 0
past_stock_price: int = 0

# reduce_prices_to_binary()
binary_version_of_prices: str = ''

# ai()
a_i: dict = {}

# convert_binary_to_inputs()
inputs_for_a_i = [0, 0, 0]

# ai
guess = 0
old_guess = 0
reward = 0
past_inputs_for_a_i = ['0', 0, 0]
first_turn_over: bool = False

# buying and selling stocks
money: int = 1000000
shares: int = 0

#
count_to_save_every_fifteen: int = 0


# saving anything to a file.
def save(name, data):
    with open(name, 'w') as f:
        f.write(str(data))


def save_everything():
    save("a_i", a_i)
    save("binary_stock_prices", binary_version_of_prices)
    save("money", money)
    save("shares", shares)


# runs after an error.
def on_exit():
    save_everything()
    sys.exit()


# gets the current stock value.
# def get_stock():
#    global current_stock_price
#    res = requests.get("https://markets.businessinsider.com/stocks/aapl-stock")
#    stock_cost_string = ""
#    for x in range (9):
#        stock_cost_string += res.text[res.text.find('"price": "')+10 + x]
#    current_stock_price = float(stock_cost_string)
#    print(current_stock_price)

def get_stock():
    global current_stock_price
    if current_stock_price == 3:
        current_stock_price = 1
    elif current_stock_price == 1:
        current_stock_price = 2
    else:
        current_stock_price = 3


# turns the prices into 0's and 1's.
def reduce_prices_to_binary():
    global past_stock_price
    global binary_version_of_prices
    if current_stock_price > past_stock_price:
        binary_string = "1"
    elif current_stock_price < past_stock_price:
        binary_string = "0"
    else:
        binary_string = ""
    binary_version_of_prices += binary_string
    past_stock_price = current_stock_price


# converts the binary_stock_prices to inputs for the a_i.
def convert_binary_to_inputs():
    global inputs_for_a_i
    inputs_for_a_i[0] = binary_version_of_prices[len(binary_version_of_prices) - 1]
    counter_of_first_number = 1
    counter_of_second_number = 1
    while binary_version_of_prices[len(binary_version_of_prices) - counter_of_first_number] == inputs_for_a_i[0]:
        counter_of_first_number += 1
    counter_of_first_number -= 1
    while (binary_version_of_prices[
               len(binary_version_of_prices) - counter_of_first_number - counter_of_second_number] !=
           inputs_for_a_i[0]):
        counter_of_second_number += 1
    counter_of_second_number -= 1
    inputs_for_a_i[0] = binary_version_of_prices[len(binary_version_of_prices) - 1]
    inputs_for_a_i[1] = counter_of_first_number
    inputs_for_a_i[2] = counter_of_second_number


# a_i functions
def read_ai():
    global guess
    try:
        a_i_guess = a_i[inputs_for_a_i[0], inputs_for_a_i[1], inputs_for_a_i[2]]
    except:
        a_i[inputs_for_a_i[0], inputs_for_a_i[1], inputs_for_a_i[2]] = 0.5
        a_i_guess = 0.5
    guess = a_i_guess
    print(f'guess: {guess}')
    print(f'inputs: {inputs_for_a_i}')
    return guess


def check_guess():
    global first_turn_over
    if first_turn_over:
        global old_guess
        global reward
        global past_inputs_for_a_i
        old_guess = 0 if old_guess == -1 else 1
        print(f'old guess: {old_guess}, input: {round(float(inputs_for_a_i[0]))}')
        if old_guess == round(float(inputs_for_a_i[0])):
            reward = 1
        else:
            reward = -1
        a_i[past_inputs_for_a_i[0], past_inputs_for_a_i[1], past_inputs_for_a_i[2]] = (1 - learning_rate) * a_i[
            past_inputs_for_a_i[0], past_inputs_for_a_i[1], past_inputs_for_a_i[2]] + learning_rate * (
                                                                                              reward + discount_factor * 1)
        old_guess = int(round(float(guess)))
    past_inputs_for_a_i = inputs_for_a_i
    first_turn_over = True


def buy_sell_stocks():
    global money
    global shares
    old_stocks = shares
    if guess > 0:
        while money > current_stock_price and (shares - old_stocks) < stocks_to_buy * guess:
            money -= current_stock_price
            shares += 1
    else:
        while shares > 0 and (old_stocks - shares) > stocks_to_buy * guess:
            money += current_stock_price
            shares -= 1


# loads the binary_version_of_prices from a file.
def load_binary_prices():
    global binary_version_of_prices
    with open("../resources/ai_stock_files/binary_stock_prices", 'r') as f:
        binary_version_of_prices = f.read()


def load_money():
    global money
    with open("../resources/ai_stock_files/money") as f:
        money = int(f.read())


def load_shares():
    global shares
    with open("../resources/ai_stock_files/shares") as f:
        shares = int(f.read())


# loads the _a_i from a file.
def load_a_i():
    global a_i
    with open("../resources/ai_stock_files/a_i") as f:
        a_i = eval(f.read())


# prints relative information
def print_stuff():
    print(f"stock price: {current_stock_price}\n"
          f"current balance: {money}\n"
          f"current shares: {shares}\n"
          f"total value: {money + shares * current_stock_price}\n"
          f"guess: {guess}\n")


# loads all variables from files.
def on_program_start():
    load_a_i()
    load_binary_prices()
    load_money()
    load_shares()


# saves every 15 checks
def save_every_fifteen():
    global count_to_save_every_fifteen
    count_to_save_every_fifteen += 1
    if count_to_save_every_fifteen == 15:
        save_everything()
        count_to_save_every_fifteen = 0


# runs all functions.
def do_all_functions():
    reduce_prices_to_binary()
    convert_binary_to_inputs()
    read_ai()
    check_guess()
    buy_sell_stocks()
    save_every_fifteen()
    print_stuff()
    get_stock()


# loads variables from files.
on_program_start()

# repeats the process.
while True:
    do_all_functions()
    time.sleep(0.5)

网友

2楼 · 编辑于 2024-05-13 22:16:52

在校正策略梯度中的策略时，我使用的是一个周期前的输入，并通过按顺序调用函数来进行过度补偿，因为它已经使用了一个周期前的输入，有效地使梯度由两个输入关闭。因为我在3的循环输入它使它看起来像一个“关闭一”的错误，而实际上我是关闭了两个，使它很难检测。你知道吗

相关问题更多 >

编程相关推荐

热门问题

热门文章