带有阿尔法-贝塔剪枝的点与框游戏

Question

我在写一个点与方块游戏的时候遇到了问题，使用了alpha-beta剪枝算法。整个程序是能运行的，但算法有时候会做出一些愚蠢的决定，我觉得它对游戏状态的评估可能是错的。你能帮我看看吗？我现在完全卡住了。

（我知道这段代码可能有点乱，也没有优化。如果你有建议可以让我改进的地方，请告诉我 :)）

class MinMax:
    '''a class containing Min-Max algorithm
    it uses recursion to evalueate given state (node)'''
    def __init__(self, depth) -> None:
        '''define hiperparameters of the algorithm'''
        self._depth = depth

    @property
    def depth(self):
        return self._depth

    def solve(self, state, alfa=-float('inf'), beta=float('inf'), depth=None):
        '''solver using MinMax algorithm with alpha-beta pruning'''
        if depth is None:
            depth = self.depth
        children = state.generate_children()

        if len(children) == 0 or depth == 0:
            return state.evaluation

        if state.player == 1:  # max turn
            for child in children:
                if state.player == child.player:
                    alfa = max(alfa, self.solve(child, alfa, beta, depth))
                else:
                    alfa = max(alfa, self.solve(child, alfa, beta, depth-1))
                if alfa >= beta:
                    return beta
            return alfa
        else:  # min turn
            for child in children:
                if state.player == child.player:
                    beta = min(beta, self.solve(child, alfa, beta, depth))
                else:
                    beta = min(beta, self.solve(child, alfa, beta, depth-1))
                if alfa >= beta:
                    return alfa
            return beta

from __future__ import annotations
import numpy as np
import copy


class DostsAndBoxesState:
    def __init__(self, player: bool, dimentions: int, state=None, evaluation=0) -> None:
        '''class representing a single Node
        ::player: 0 for Min, 1 for Max
        ::state: state of the game in the node
        ::dimentions: dims of the game board
        '''
        self.player = player
        self.dimentions = dimentions
        self.evaluation = evaluation
        if not state:
            self.state = self.generate_first_state()
        else:
            self.state = state

    def generate_first_state(self):
        state = []
        for i in range(2*self.dimentions - 1):
            if i % 2 == 1:
                state.append(np.full(self.dimentions, -1))
            else:
                state.append(np.full(self.dimentions - 1, -1))
        return state

    def generate_children(self) -> None:
        '''
        children: states one state under current state
        while generating, it makes the evaluation based on evaluation of the parent and potentialy found box
        '''
        childen = []
        for row_index, row in enumerate(self.state):
            for connection_index, connection in enumerate(row):
                if connection == -1:
                    child = copy.deepcopy(self.state)
                    new_row = copy.deepcopy(row)
                    new_row[connection_index] = self.player  # zamieniam -1 na gracza
                    child[row_index] = new_row
                    boxes = self.find_box(row_index, connection_index)
                    if boxes:
                        component = len(boxes) if self.player else -len(boxes)
                        evaluation = self.evaluation + component
                        childen.append(DostsAndBoxesState(self.player, self.dimentions, child, evaluation))
                    else:
                        childen.append(DostsAndBoxesState(not self.player, self.dimentions, child, self.evaluation))
        return childen

    def find_box(self, row_index, column_index) -> np.array[list[np.array]]:
        '''
        looks for the boxes from up, down, left and right
        return array of booleans up_down / left_right
        '''
        boxes = []
        if row_index % 2 == 0:
            up_down = [0 if row_index == 0 else 1,
                       0 if row_index == (2*self.dimentions - 1) - 1 else 1]
            for indx, direction in enumerate(up_down):
                if direction == 1:  # if it's possible to go either up or down
                    row1 = row_index+2*((-1)**(indx+1))
                    row2 = row_index+1*((-1)**(indx+1))
                    if self.state[row1][column_index] != -1 and \
                        self.state[row2][column_index] != -1 and \
                            self.state[row2][column_index+1] != -1:
                        boxes.append(True)
        else:
            left_right = [0 if column_index == 0 else 1,
                          0 if column_index == self.dimentions - 1 else 1]
            for indx, direction in enumerate(left_right):
                if direction == 1:  # if it's possible to go either left or right
                    column = column_index-1 if indx == 0 else column_index
                    if self.state[row_index][column] != -1 and \
                        self.state[row_index-1][column] != -1 and \
                            self.state[row_index+1][column] != -1:
                        boxes.append(True)
        return boxes

    def __str__(self) -> str:
        '''
        prints out the state
        '''
        result = ""
        for index, row in enumerate(self.state):
            if index % 2 == 1:
                symbols = [' ' if connection == -1 else '|' for connection in row]
                line = [symbol+" " for symbol in symbols]
                str_line = "".join(line)
                result = result + str_line + "\n"
            else:
                symbols = [' ' if connection == -1 else '-' for connection in row]
                line = ["*"+symbol for symbol in symbols]
                str_line = "".join(line) + "*"
                result = result + str_line + "\n"
        return result

from __future__ import annotations
from time import sleep
from copy import deepcopy
from random import random
import numpy as np

from State import DostsAndBoxesState
from MinMax import MinMax


class DotsAndBoxes:
    def __init__(self, dimentions, depth, player_mode=1) -> None:
        self.dimentions = dimentions
        self.minmax_evaluator = MinMax(depth)
        self.player_mode = player_mode
        self.first_state = DostsAndBoxesState(1, self.dimentions)

    def bot_make_move(self, state, children):
        '''
        makes a move based on alpha-beta pruning from MinMax class
        '''
        if state.player:
            the_best_move = -float('inf')
            for new_state in children:
                evaluation = self.minmax_evaluator.solve(new_state)
                if evaluation == the_best_move:
                    draw = random()
                    state = new_state if draw < 0.5 else state
                elif evaluation > the_best_move:
                    the_best_move = evaluation
                    state = new_state
        else:
            the_best_move = float('inf')
            for new_state in children:
                evaluation = self.minmax_evaluator.solve(new_state)
                if evaluation == the_best_move:
                    draw = random()
                    state = new_state if draw < 0.5 else state
                elif evaluation < the_best_move:
                    the_best_move = evaluation
                    state = new_state
        return state

    def player_make_move(self, state):
        '''player's move'''
        player_move = [float('inf'), float('inf')]
        while True:
            player_move = input("Make your move: ").split(",")
            try:
                player_move = [int(player_move[0]), int(player_move[1])]
            except Exception:
                player_move = [float('inf'), float('inf')]
                continue
            try:
                if state.state[int(player_move[0])][int(player_move[1])] != -1:
                    player_move = [float('inf'), float('inf')]
                    continue
            except IndexError:
                player_move = [float('inf'), float('inf')]
                continue
            break
        state.state[int(player_move[0])][int(player_move[1])] = self.player_mode
        if not state.find_box(int(player_move[0]), int(player_move[1])):
            state.player = not state.player
        return state

    def check_scored(self, state, previous_state, message, score):
        '''if the box has been just compleated'''
        for index, row in enumerate(state.state):
            difference = np.where(row != previous_state.state[index])[0]
            if len(difference) != 0:
                box = state.find_box(index, difference[0])
                if box:
                    print(message)
                    score[state.player] += len(box)
                    break

    def play(self):
        '''
        the game itself
        '''
        state = self.first_state
        score = [0, 0]
        player_name = "Max" if self.player_mode else "Min"
        print(f"Welcome to the game, you are playing as {player_name}")
        while True:
            if state.player:
                # Max turn
                message = "Max scored!"
                children = state.generate_children()
                if len(children) == 0:
                    break
                print(state)
                print('\n')
                previous_state = deepcopy(state)
                if self.player_mode:
                    state = self.player_make_move(state)
                else:
                    state = self.bot_make_move(state, children)
                self.check_scored(state, previous_state, message, score)

            if not state.player:
                # Min turn
                message = "Min scored!"
                children = state.generate_children()
                if len(children) == 0:
                    winner = "Max"
                    break
                print(state)
                print('\n')
                previous_state = deepcopy(state)
                if not self.player_mode:
                    state = self.player_make_move(state)
                else:
                    state = self.bot_make_move(state, children)
                self.check_scored(state, previous_state, message, score)

        # Who won?
        print(state)
        if score[0] == score[1]:
            print("Tie!")
        else:
            winner = "Max" if score.index(max(score)) == 1 else "Min"
            print(f"The winner is: {winner}")

这个算法在做决定的时候很傻，可能是评估函数出了问题...？

算法优化编程调试游戏开发 alpha-beta剪枝状态评估决策树

带有阿尔法-贝塔剪枝的点与框游戏

0 个回答

撰写回答