带有阿尔法-贝塔剪枝的点与框游戏
我在写一个点与方块游戏的时候遇到了问题,使用了alpha-beta剪枝算法。整个程序是能运行的,但算法有时候会做出一些愚蠢的决定,我觉得它对游戏状态的评估可能是错的。你能帮我看看吗?我现在完全卡住了。
(我知道这段代码可能有点乱,也没有优化。如果你有建议可以让我改进的地方,请告诉我 :))
class MinMax:
'''a class containing Min-Max algorithm
it uses recursion to evalueate given state (node)'''
def __init__(self, depth) -> None:
'''define hiperparameters of the algorithm'''
self._depth = depth
@property
def depth(self):
return self._depth
def solve(self, state, alfa=-float('inf'), beta=float('inf'), depth=None):
'''solver using MinMax algorithm with alpha-beta pruning'''
if depth is None:
depth = self.depth
children = state.generate_children()
if len(children) == 0 or depth == 0:
return state.evaluation
if state.player == 1: # max turn
for child in children:
if state.player == child.player:
alfa = max(alfa, self.solve(child, alfa, beta, depth))
else:
alfa = max(alfa, self.solve(child, alfa, beta, depth-1))
if alfa >= beta:
return beta
return alfa
else: # min turn
for child in children:
if state.player == child.player:
beta = min(beta, self.solve(child, alfa, beta, depth))
else:
beta = min(beta, self.solve(child, alfa, beta, depth-1))
if alfa >= beta:
return alfa
return beta
from __future__ import annotations
import numpy as np
import copy
class DostsAndBoxesState:
def __init__(self, player: bool, dimentions: int, state=None, evaluation=0) -> None:
'''class representing a single Node
::player: 0 for Min, 1 for Max
::state: state of the game in the node
::dimentions: dims of the game board
'''
self.player = player
self.dimentions = dimentions
self.evaluation = evaluation
if not state:
self.state = self.generate_first_state()
else:
self.state = state
def generate_first_state(self):
state = []
for i in range(2*self.dimentions - 1):
if i % 2 == 1:
state.append(np.full(self.dimentions, -1))
else:
state.append(np.full(self.dimentions - 1, -1))
return state
def generate_children(self) -> None:
'''
children: states one state under current state
while generating, it makes the evaluation based on evaluation of the parent and potentialy found box
'''
childen = []
for row_index, row in enumerate(self.state):
for connection_index, connection in enumerate(row):
if connection == -1:
child = copy.deepcopy(self.state)
new_row = copy.deepcopy(row)
new_row[connection_index] = self.player # zamieniam -1 na gracza
child[row_index] = new_row
boxes = self.find_box(row_index, connection_index)
if boxes:
component = len(boxes) if self.player else -len(boxes)
evaluation = self.evaluation + component
childen.append(DostsAndBoxesState(self.player, self.dimentions, child, evaluation))
else:
childen.append(DostsAndBoxesState(not self.player, self.dimentions, child, self.evaluation))
return childen
def find_box(self, row_index, column_index) -> np.array[list[np.array]]:
'''
looks for the boxes from up, down, left and right
return array of booleans up_down / left_right
'''
boxes = []
if row_index % 2 == 0:
up_down = [0 if row_index == 0 else 1,
0 if row_index == (2*self.dimentions - 1) - 1 else 1]
for indx, direction in enumerate(up_down):
if direction == 1: # if it's possible to go either up or down
row1 = row_index+2*((-1)**(indx+1))
row2 = row_index+1*((-1)**(indx+1))
if self.state[row1][column_index] != -1 and \
self.state[row2][column_index] != -1 and \
self.state[row2][column_index+1] != -1:
boxes.append(True)
else:
left_right = [0 if column_index == 0 else 1,
0 if column_index == self.dimentions - 1 else 1]
for indx, direction in enumerate(left_right):
if direction == 1: # if it's possible to go either left or right
column = column_index-1 if indx == 0 else column_index
if self.state[row_index][column] != -1 and \
self.state[row_index-1][column] != -1 and \
self.state[row_index+1][column] != -1:
boxes.append(True)
return boxes
def __str__(self) -> str:
'''
prints out the state
'''
result = ""
for index, row in enumerate(self.state):
if index % 2 == 1:
symbols = [' ' if connection == -1 else '|' for connection in row]
line = [symbol+" " for symbol in symbols]
str_line = "".join(line)
result = result + str_line + "\n"
else:
symbols = [' ' if connection == -1 else '-' for connection in row]
line = ["*"+symbol for symbol in symbols]
str_line = "".join(line) + "*"
result = result + str_line + "\n"
return result
from __future__ import annotations
from time import sleep
from copy import deepcopy
from random import random
import numpy as np
from State import DostsAndBoxesState
from MinMax import MinMax
class DotsAndBoxes:
def __init__(self, dimentions, depth, player_mode=1) -> None:
self.dimentions = dimentions
self.minmax_evaluator = MinMax(depth)
self.player_mode = player_mode
self.first_state = DostsAndBoxesState(1, self.dimentions)
def bot_make_move(self, state, children):
'''
makes a move based on alpha-beta pruning from MinMax class
'''
if state.player:
the_best_move = -float('inf')
for new_state in children:
evaluation = self.minmax_evaluator.solve(new_state)
if evaluation == the_best_move:
draw = random()
state = new_state if draw < 0.5 else state
elif evaluation > the_best_move:
the_best_move = evaluation
state = new_state
else:
the_best_move = float('inf')
for new_state in children:
evaluation = self.minmax_evaluator.solve(new_state)
if evaluation == the_best_move:
draw = random()
state = new_state if draw < 0.5 else state
elif evaluation < the_best_move:
the_best_move = evaluation
state = new_state
return state
def player_make_move(self, state):
'''player's move'''
player_move = [float('inf'), float('inf')]
while True:
player_move = input("Make your move: ").split(",")
try:
player_move = [int(player_move[0]), int(player_move[1])]
except Exception:
player_move = [float('inf'), float('inf')]
continue
try:
if state.state[int(player_move[0])][int(player_move[1])] != -1:
player_move = [float('inf'), float('inf')]
continue
except IndexError:
player_move = [float('inf'), float('inf')]
continue
break
state.state[int(player_move[0])][int(player_move[1])] = self.player_mode
if not state.find_box(int(player_move[0]), int(player_move[1])):
state.player = not state.player
return state
def check_scored(self, state, previous_state, message, score):
'''if the box has been just compleated'''
for index, row in enumerate(state.state):
difference = np.where(row != previous_state.state[index])[0]
if len(difference) != 0:
box = state.find_box(index, difference[0])
if box:
print(message)
score[state.player] += len(box)
break
def play(self):
'''
the game itself
'''
state = self.first_state
score = [0, 0]
player_name = "Max" if self.player_mode else "Min"
print(f"Welcome to the game, you are playing as {player_name}")
while True:
if state.player:
# Max turn
message = "Max scored!"
children = state.generate_children()
if len(children) == 0:
break
print(state)
print('\n')
previous_state = deepcopy(state)
if self.player_mode:
state = self.player_make_move(state)
else:
state = self.bot_make_move(state, children)
self.check_scored(state, previous_state, message, score)
if not state.player:
# Min turn
message = "Min scored!"
children = state.generate_children()
if len(children) == 0:
winner = "Max"
break
print(state)
print('\n')
previous_state = deepcopy(state)
if not self.player_mode:
state = self.player_make_move(state)
else:
state = self.bot_make_move(state, children)
self.check_scored(state, previous_state, message, score)
# Who won?
print(state)
if score[0] == score[1]:
print("Tie!")
else:
winner = "Max" if score.index(max(score)) == 1 else "Min"
print(f"The winner is: {winner}")
这个算法在做决定的时候很傻,可能是评估函数出了问题...?
0 个回答
暂无回答