在python中用多维密钥对存储值

num_episodes=500 # this is the table that will hold our summated rewards for # each action in each state r_table = np.zeros((10000, 10000)) for g in range(num_episodes): s = np.array(state.sample(), dtype=np.int) done = False count = 0 while not done: if np.sum(r_table[s, :]) == 0: # make a random selection of actions EUR_elec_sell = 0.050 EUR_elec_buy = 0.100 EUR_gas = 0.030 rranges = ((0, 1250),(0, 2000),(0, 3000)) res0 = brute(reward, rranges, finish=None) res1 = minimize(reward, res0, bounds=[(0, 1250),(0, 2000),(0, 3000)]) a = res1.x a = list(map(int, a.round(decimals=-1))) else: # select the action with highest cummulative reward a = np.argmax(r_table[s, :]) s_t1 = model.predict([np.append(s, a)]).astype(int) new_s = np.append(s_t1, np.delete(s, 1)) r = reward(a) count += 1 if count == 1000: done=True r_table[s, a] += r s = new_s

1条回答

网友

1楼 · 发布于 2024-06-06 11:03:42

您可以使用像tuple(s[0]) + tuple(a)这样的键，但实际上您需要的更复杂，因为您需要查询给定s向量的所有值。您可以让table_r成为dict的dict，其中tuple(s[0])是第一个键，tuple(a)是第二个键：

num_episodes=500

# this is the table that will hold our summated rewards for
# each action in each state
r_table = {}
for g in range(num_episodes):
    s = np.array(state.sample(), dtype=np.int)
    done = False
    count = 0
    while not done:
        s_key = tuple(s[0])
        if sum(r_table.setdefault(s_key, {}).values()) == 0:
            # make a random selection of actions
            EUR_elec_sell = 0.050
            EUR_elec_buy = 0.100
            EUR_gas = 0.030
            rranges = ((0, 1250),(0, 2000),(0, 3000))
            res0 = brute(reward, rranges, finish=None)
            res1 = minimize(reward, res0, bounds=[(0, 1250),(0, 2000),(0, 3000)])
            a = res1.x
            a = list(map(int, a.round(decimals=-1)))
        else:
            # select the action with highest cummulative reward
            a = max(r_table[s_key].items(), key=lambda it: -it[1])[0]
        s_t1 = model.predict([np.append(s, a)]).astype(int)
        new_s = np.append(s_t1, np.delete(s, 1))
        r = reward(a)
        count += 1
        if count == 1000: done=True
        a_key = tuple(a)
        r_table[s_key][a_key] = r_table[s_key].get(a_key, 0) + r
        s = new_s

相关问题更多 >

编程相关推荐

热门问题

热门文章