Fix chess to white perspective, fix observation bug, add documentation (

#1004)
Farama-Foundation · Jul 6, 2023 · bb119f4 · bb119f4
1 parent 110333f
commit bb119f4
Show file tree

Hide file tree

Showing 2 changed files with 76 additions and 22 deletions.
diff --git a/pettingzoo/classic/chess/chess.py b/pettingzoo/classic/chess/chess.py
@@ -18,7 +18,7 @@
 | Agents             | 2                                  |
 | Action Shape       | Discrete(4672)                     |
 | Action Values      | Discrete(4672)                     |
-| Observation Shape  | (8,8,20)                           |
+| Observation Shape  | (8,8,111)                          |
 | Observation Values | [0,1]                              |
 
 
@@ -28,7 +28,7 @@
 
 The observation is a dictionary which contains an `'observation'` element which is the usual RL observation described below, and an  `'action_mask'` which holds the legal moves, described in the Legal Actions Mask section.
 
-Like AlphaZero, the main observation space is an 8x8 image representing the board. It has 20 channels representing:
+Like AlphaZero, the main observation space is an 8x8 image representing the board. It has 111 channels representing:
 
 * Channels 0 - 3: Castling rights:
   * Channel 0: All ones if white can castle queenside
@@ -38,13 +38,17 @@
 * Channel 4: Is black or white
 * Channel 5: A move clock counting up to the 50 move rule. Represented by a single channel where the *n* th element in the flattened channel is set if there has been *n* moves
 * Channel 6: All ones to help neural networks find board edges in padded convolutions
-* Channel 7 - 18: One channel for each piece type and player color combination. For example, there is a specific channel that represents black knights. An index of this channel is set to 1 if a black knight is in the corresponding spot on the game board, otherwise, it is set to 0. En passant
-possibilities are represented by displaying the vulnerable pawn on the 8th row instead of the 5th.
+* Channel 7 - 18: One channel for each piece type and player color combination. For example, there is a specific channel that represents black knights. An index of this channel is set to 1 if a black knight is in the corresponding spot on the game board, otherwise, it is set to 0.
+Similar to LeelaChessZero, en passant possibilities are represented by displaying the vulnerable pawn on the 8th row instead of the 5th.
 * Channel 19: represents whether a position has been seen before (whether a position is a 2-fold repetition)
+* Channel 20 - 111 represents the previous 7 boards, with each board represented by 13 channels. The latest board occupies the first 13 channels, followed by the second latest board, and so on. These 13 channels correspond to channels 7 - 20.
 
-Like AlphaZero, the board is always oriented towards the current agent (the currant agent's king starts on the 1st row). In other words, the two players are looking at mirror images of the board, not the same board.
 
-Unlike AlphaZero, the observation space does not stack the observations previous moves by default. This can be accomplished using the `frame_stacking` argument of our wrapper.
+Similar to AlphaZero, our observation space follows a stacking approach, where it accumulates the previous 8 board observations.
+
+Unlike AlphaZero, where the board orientation may vary, in our system, the `env.board_history` always maintains the orientation towards the white agent, with the white agent's king consistently positioned on the 1st row. In simpler terms, both players are observing the same board layout.
+
+Nevertheless, we have incorporated a convenient feature, the env.observe('player_1') function, specifically for the black agent's orientation. This facilitates the training of agents capable of playing proficiently as both black and white.
 
 #### Legal Actions Mask
 
@@ -202,10 +206,21 @@ def action_space(self, agent):
         return self.action_spaces[agent]
 
     def observe(self, agent):
-        observation = chess_utils.get_observation(
-            self.board, self.possible_agents.index(agent)
-        )
+        current_index = self.possible_agents.index(agent)
+
+        observation = chess_utils.get_observation(self.board, current_index)
         observation = np.dstack((observation[:, :, :7], self.board_history))
+        # We need to swap the white 6 channels with black 6 channels
+        if current_index == 1:
+            # 1. Mirror the board
+            observation = np.flip(observation, axis=0)
+            # 2. Swap the white 6 channels with the black 6 channels
+            for i in range(1, 9):
+                tmp = observation[..., 13 * i - 6 : 13 * i].copy()
+                observation[..., 13 * i - 6 : 13 * i] = observation[
+                    ..., 13 * i : 13 * i + 6
+                ]
+                observation[..., 13 * i : 13 * i + 6] = tmp
         legal_moves = (
             chess_utils.legal_moves(self.board) if agent == self.agent_selection else []
         )
@@ -272,7 +287,8 @@ def step(self, action):
         self._accumulate_rewards()
 
         # Update board after applying action
-        next_board = chess_utils.get_observation(self.board, current_agent)
+        # We always take the perspective of the white agent
+        next_board = chess_utils.get_observation(self.board, player=0)
         self.board_history = np.dstack(
             (next_board[:, :, 7:], self.board_history[:, :, :-13])
         )

diff --git a/pettingzoo/classic/chess/chess_utils.py b/pettingzoo/classic/chess/chess_utils.py
@@ -5,10 +5,12 @@
 def boards_to_ndarray(boards):
     arr64 = np.array(boards, dtype=np.uint64)
     arr8 = arr64.view(dtype=np.uint8)
-    bits = np.unpackbits(arr8)
+    # a bit array increment from LHS to RHS
+    bits = np.unpackbits(arr8, bitorder="little")
     floats = bits.astype(bool)
     boardstack = floats.reshape([len(boards), 8, 8])
-    boardimage = np.transpose(boardstack, [1, 2, 0])
+    # We do np.flip() onto `boardstack` because the 1st line of the boardimage is the 8th line of the ndarray.
+    boardimage = np.flip(np.transpose(boardstack, [1, 2, 0]), axis=0)
     return boardimage
 
 
@@ -135,7 +137,7 @@ def get_move_plane(move):
 actions_to_moves = {}
 
 
-def action_to_move(board, action, player):
+def action_to_move(board: chess.Board, action, player: int):
     base_move = chess.Move.from_uci(actions_to_moves[action])
 
     base_coord = square_to_coord(base_move.from_square)
@@ -164,7 +166,7 @@ def make_move_mapping(uci_move):
     actions_to_moves[cur_action] = uci_move
 
 
-def legal_moves(orig_board):
+def legal_moves(orig_board: chess.Board):
     """Returns legal moves.
 
     action space is a 8x8x73 dimensional array
@@ -194,7 +196,7 @@ def legal_moves(orig_board):
     return legal_moves
 
 
-def get_observation(orig_board, player):
+def get_observation(orig_board: chess.Board, player: int):
     """Returns observation array.
 
     Observation is an 8x8x(P + L) dimensional array.
@@ -281,8 +283,9 @@ def get_observation(orig_board, player):
 
         """
     base = BASE
-    OURS = 0
-    THEIRS = 1
+    # In the module `chess`, the color is represented by 1 for white and 0 for black.
+    OURS = 1
+    THEIRS = 0
     result[base + 0] = board.pieces(chess.PAWN, OURS)
     result[base + 1] = board.pieces(chess.KNIGHT, OURS)
     result[base + 2] = board.pieces(chess.BISHOP, OURS)
@@ -320,18 +323,53 @@ def get_observation(orig_board, player):
         if (history_idx > 0) flip = !flip;
       }
     """
-    # from 0-63
+
+    """
+    The LeelaChessZero-style en passant flag.
+    In FEN, the en passant flag is represented by the square that can be a possible target of an en passant, e.g. the `e3` in `4k3/8/8/8/4Pp2/8/8/4K3 b - e3 99 50`.
+    However, for a neural network, it is not easy to train the network to recognize sparse and unstructured data.
+    Therefore, we adhere to LeelaChessZero's convention, which adjusts the row number to the 1st for white pawns if the en passant flag is set, and vice versa for black pawns.
+    E.g. A white pawn(e2) just made an initial two-square advance, `e2e4`.
+         A black pawn(f4) next to that white pawn(e4) can play en passant capture on it.
+         To show this chance, we denote the white pawn at `e1` instead of `e4` once that white pawn play two-square advance.
+         The en passant flag is set only for one turn, and it is reset after the next turn.
+         Note that the en passant flag has nothing to do with the opponent's pawn.
+         i.e. an en passant flag always set after an initial two-square advance.
+
+       The board             The observation of the 7th channel(white pawn)
+    8  · · · · ♚ · · ·    8  · · · · · · · ·
+    7  · · · · · · · ·    7  · · · · · · · ·
+    6  · · · · · · · ·    6  · · · · · · · ·
+    5  · · · · · · · ·    5  · · · · · · · ·
+    4  · · · · ♙ ♟ · ·    4  · · · · · · · ·
+    3  · · · · · · · ·    3  · · · · · · · ·
+    2  · · · · · · · ·    2  · · · · · · · ·
+    1  · · · · ♔ · · ·    1  · · · · 1 · · ·
+       a b c d e f g h       a b c d e f g h
+    FEN: 4k3/8/8/8/4Pp2/8/8/4K3 b - e3 99 50
+
+    More details:
+    https://github.com/Farama-Foundation/PettingZoo/blob/master/pettingzoo/classic/chess/chess.py#L42
+    https://github.com/LeelaChessZero/lc0/blob/master/src/chess/board.cc#L1114
+    """
+
+    # square where the en passant happened, ranging from 0 to 63 (int)
     square = board.ep_square
     if square:
-        ours = square > 32
+        # Less than 32 is a white square, otherwise it's a black square
+        ours = square < 32
         row = square % 8
-        dest_col_add = 8 * 7 if ours else 0
+        dest_col_add = 0 if ours else 8 * 7
         dest_square = dest_col_add + row
         if ours:
-            result[base + 0].remove(square - 8)
+            # Set the `square + 8` position in channel `base` to False
+            result[base + 0].remove(square + 8)
+            # Set the `dest_square` position in channel `base` to True
             result[base + 0].add(dest_square)
         else:
-            result[base + 6].remove(square + 8)
+            # Set the `square + 8` position in channel `base` to False
+            result[base + 6].remove(square - 8)
+            # Set the `dest_square` position in channel `base` to True
             result[base + 6].add(dest_square)
 
     return boards_to_ndarray(result)