我正在尝试编写一个代码,为上面的表生成一个值。这些值将在遵循一组规则时生成:
当前状态和下一个状态不能相同。因此,如果A是当前状态,那么下一个状态必须是B或C。
每次执行程序时都会随机生成这些值。
所有具有相同当前状态、操作和下一状态的行都应该具有相同的奖励。
问题是,即使我列出了随机选择值的列表,我仍然无法生成不会违反规则1的值。每次我运行代码时,都违反了规则1。程序的代码如下所示。你知道吗
import random
# Creating lists of variables that will be used to pick a next state at
random while standing on a current state
NSA = ['B', 'C'] # List of states reachable from A. We use this list to pick
a next state at random
NSB = ['A', 'C'] # List of states reachable from B. We use this list to pick
a next state at random
NSC = ['A', 'B'] # List of states reachable from B. We use this list to pick
a next state at random
Current = ['A', 'B', 'C'] # List of possible current states
RandomList = [-5.0, -4.0, -3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0, 4.0] # List
of possible rewards for actions taken
Action = ["Clockwise", "Counterclockwise"] # List of possible actions taken
from a current state
# We need to make sure we know what the current state is, what the action
will be and what would be the next state and have a reward for it
# Since there are 2 possible states reachable from A after a clockwise
action, we create and declare 2 variables. These variables will be used to
pick the next
# state at random. Having 2 variables ensures that all the entries are not
populated by a single state and thus makins sure we have both states
populating the column for the action. We then do this for counterclockwise
action and repeat it for everystate
# Since python doesnt allow to declare variables without instantiation,
hence we are assigning a value to each state. This wont matter as we will
later be using the random function to choose a value from the respected
lists declared above.
# Variables as mentioned above for state A. Note cw = clockwise and ccw =
counterclockwise
AnScw1 = "B"
AnScw2 = "C"
AnSccw1 = "B"
AnSccw2 = "C"
# Variables as mentioned above for state B. Note cw = clockwise and ccw =
counterclockwise
BnScw1 = "A"
BnScw2 = "C"
BnSccw1 = "A"
BnSccw2 = "C"
# Variables as mentioned above for state C. Note cw = clockwise and ccw =
counterclockwise
AnScw1 = "A"
AnScw2 = "B"
AnSccw1 = "A"
AnSccw2 = "B"
# creating variables to pick rewards randomly
a_cw_r1 = random.choice(RandomList)
a_cw_r2 = random.choice(RandomList)
a_ccw_r1 = random.choice(RandomList)
a_ccw_r2 = random.choice(RandomList)
b_cw_r1 = random.choice(RandomList)
b_cw_r2 = random.choice(RandomList)
b_ccw_r1 = random.choice(RandomList)
b_ccw_r2 = random.choice(RandomList)
c_cw_r1 = random.choice(RandomList)
c_cw_r2 = random.choice(RandomList)
c_ccw_r1 = random.choice(RandomList)
c_ccw_r2 = random.choice(RandomList)
current = random.choice(Current) # Pick at randomly a current state from the list Current
if (current=="A") :
action = random.choice(Action) # pick a action at random, from the list of actions
if (action=="Clockwise") : # if clockwise action picked
AnScw1 = random.choice(NSA) # randomly choose a next state
AnScw2 = random.choice(NSA) # randomly choose a next state
# if the two states are the same, then change the values. So if any one of two has B as next state, the other is
# is assigned C as the next state.
if (AnScw1 == "B" or AnScw2 == "B") :
if (AnScw1 == AnScw2 and AnScw1 == "B") :
AnScw2 = "C"
a_cw_b_r1 = random.choice(RandomList) # pick a reward from list at random
a_cw_b_r2 = random.choice(RandomList) # pick a reward from list at random
if (AnScw1 == "C" or AnScw2 == "C") :
if (AnScw1 == AnScw2 and AnScw1 == "C") :
AnScw2 = "B"
a_cw_r1 = random.choice(RandomList)
a_cw_r2 = random.choice(RandomList)
if (action=="Counterclockwise") :
AnSccw1 = random.choice(NSA)
AnSccw2 = random.choice(NSA)
if (AnSccw1 == "B" or AnSccw2 == "B") :
if (AnSccw1 == AnSccw2 and AnSccw1 == "B") :
AnSccw2 = "C"
a_ccw_r1 = random.choice(RandomList)
a_ccw_r2 = random.choice(RandomList)
if (AnSccw1 == "C" or AnSccw2 == "C") :
if (AnSccw1 == AnSccw2 and AnSccw1 == "C") :
AnSccw2 = "B"
a_ccw_r1 = random.choice(RandomList)
a_ccw_r2 = random.choice(RandomList)
if (current=="B") :
action = random.choice(Action)
if (action=="Clockwise") :
BnScw1 = random.choice(NSB)
BnScw2 = random.choice(NSB)
if (BnScw1 == "A" or BnScw2 == "A") :
if (BnScw1 == BnScw2 and BnScw1 == "A") :
BnScw2 = "C"
b_cw_b_r1 = random.choice(RandomList)
b_cw_b_r2 = random.choice(RandomList)
if (BnScw1 == "C" or BnScw2 == "C") :
if (BnScw1 == BnScw2 and BnScw1 == "C") :
BnScw2 = "B"
b_cw_r1 = random.choice(RandomList)
b_cw_r2 = random.choice(RandomList)
if (action=="Counterlockwise") :
BnSccw1 = random.choice(NSB)
BnSccw2 = random.choice(NSB)
if (BnSccw1 == "A" or BnSccw2 == "A") :
if (BnSccw1 == BnSccw2 and BnSccw1 == "A") :
BnSccw2 = "C"
b_ccw_r1 = random.choice(RandomList)
b_ccw_r2 = random.choice(RandomList)
if (BnSccw1 == "C" or BnSccw2 == "C") :
if (BnSccw1 == BnSccw2 and BnSccw1 == "C") :
BnSccw2 = "B"
b_ccw_r1 = random.choice(RandomList)
b_ccw_r2 = random.choice(RandomList)
if (current=="C") :
action = random.choice(Action)
if (action=="Clockwise") :
CnScw1 = random.choice(NSC)
CnScw2 = random.choice(NSC)
if (CnScw1 == "A" or CnScw2 == "A") :
if (CnScw1 == CnScw2 and CnScw1 == "A") :
CnScw2 = "B"
c_cw_r1 = random.choice(RandomList)
c_cw_r2 = random.choice(RandomList)
if (CnScw1 == "B" or CnScw2 == "B") :
if (CnScw1 == CnScw2 and CnScw1 == "B") :
CnScw2 = "A"
c_cw_r1 = random.choice(RandomList)
c_cw_r2 = random.choice(RandomList)
if (action=="Counterclockwise") :
CnSccw1 = random.choice(NSC)
CnSccw2 = random.choice(NSC)
if (CnSccw1 == "A" or CnSccw2 == "A") :
if (CnSccw1 == CnSccw2 and CnSccw1 == "A") :
CnSccw2 = "B"
c_ccw_r1 = random.choice(RandomList)
c_ccw_r2 = random.choice(RandomList)
if (CnSccw1 == "B" or CnSccw2 == "B") :
if (CnSccw1 == CnSccw2 and CnSccw1 == "B") :
CnSccw2 = "A"
c_ccw_r1 = random.choice(RandomList)
c_ccw_r2 = random.choice(RandomList)
如你所见,我已经创建了3个名单国安局,国安局和国安局。我使用这3个列表从中随机选取一个值,但我得到的项目甚至不在列表中。例如,看看下面的代码部分
NSA = ['B', 'C']
AnScw1 = "B"
AnScw2 = "C"
AnSccw1 = "B"
AnSccw2 = "C"
#calling then later like this
AnScw1 = random.choice(NSA)
AnScw2 = random.choice(NSA)
AnSccw1 = random.choice(NSA)
AnSccw2 = random.choice(NSA)
我应该从AnScw1,AnScw2,AnSccw1,AnSccw2得到B或C,但是相反,我也得到了一个不在列表中的项目。你知道吗
所需的输出应如下所示:
===========================================================================
|| Current State || || Action Taken || || Next State || Reward ||
===========================================================================
|| A || || Clockwise || || C || -2.0 ||
|| C || || Counterclockwise || || A || 4.0 ||
|| B || || Counterclockwise || || C || 4.0 ||
|| A || || Counterclockwise || || C || 4.0 ||
我认为,为了确保规则nr1永远不会被验证,可以使用
assert
语句,如下所示:相关问题 更多 >
编程相关推荐