-
Notifications
You must be signed in to change notification settings - Fork 0
/
heuristic.py
349 lines (319 loc) · 14.2 KB
/
heuristic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
import random
from spy import Spy
import copy
from scipy import spatial
import numpy as np
import utils
class Heuristic:
def __init__(self, board):
self.board = board
self.starting_quantities = Spy(self.board)
def evaluate(self):
current_board = self.board
starting_quantities = self.starting_quantities
score = 0
for key in starting_quantities.keys():
score += abs(current_board[key] - starting_quantities[key])
return score
def get_best_board(self):
maximum = -1
bestboard = []
for board in self.population:
score = self.evaluate(board)
if score > maximum:
maximum = score
bestboard = board
return bestboard, maximum
def random_swap_list(self, list1 , list2):
r = random.random()
if r <= 0.5:
return list1, list2
return list2, list1
def compare(self, board1, board2):
if (self.evaluate(board1) < self.evaluate(board2)):
return -1
elif (self.evaluate(board1) > self.evaluate(board2)):
return 1
else:
return 0
class MinMax():
def __init__(self, board):
self.board = board
self.starting_quantities = Spy()
def __init__(self, team, depth=None):
super(MinMax, self).__init__(team=team)
# rewards for planning the move
self.kill_reward = 10 # killing an enemy piece
self.neutral_fight = 2 # a neutral outcome of a fight
self.winGameReward = 100 # finding the enemy flag
self.certainty_multiplier = 1.2 # killing known, not guessed, enemy pieces
# initial maximum depth of the minimax algorithm
self.ext_depth = depth
self.max_depth = 2 # standard max depth
# the matrix table for deciding battle outcomes between two pieces
self.battleMatrix = utils.get_bm()
def decide_move(self, state, logic):
"""
Depending on the amount of enemy pieces left, we are entering the start, mid or endgame
and planning through the minimax algorithm.
:return: tuple of tuple positions representing the move
"""
if self.ext_depth is None:
self.set_max_depth() # set max_depth each turn
else:
self.max_depth = self.ext_depth
# make sure a flag win will be discounted by a factor that guarantees a preference towards immediate flag kill
self.winGameReward = max(self.winGameReward, self.max_depth * self.kill_reward)
return self.minmax(max_depth=self.max_depth)
def set_max_depth(self):
n_alive_enemies = sum(
[True for piece in self.ordered_opp_pieces if not piece.dead]
)
if 7 < n_alive_enemies <= 10:
# one move each player lookahead
self.max_depth = 2
elif 4 <= n_alive_enemies <= 7:
# two moves each player lookahead
self.max_depth = 4
elif n_alive_enemies <= 3:
# four moves each player lookahead
self.max_depth = 8
def minmax(self, max_depth):
"""
given the maximum depth, copy the known board so far, assign the pieces by random, while still
respecting the current knowledge, and then decide the move via minimax algorithm.
:param max_depth: int
:return: tuple of spatial tuples
"""
curr_board = copy.deepcopy(self.board)
curr_board = self.draw_consistent_enemy_setup(curr_board)
chosen_action = self.max_val(
curr_board, 0, -float("inf"), float("inf"), max_depth
)[1]
return chosen_action
def max_val(self, board, current_reward, alpha, beta, depth):
"""
Do the max players step in the minimax algorithm. Check first if the given board is in
a terminal state. If not, we will do each possible move once and send the process to
min_val to do the min players step.
:param board: the current board, numpy array
:param current_reward: the current value the path has accumulated
:param alpha: alpha threshold of the minimax alg
:param beta: beta threshold of the minimax alg
:param depth: the depth the process is at, integer
:return: tuple of best value, and a associated best_action (float, tuple)
"""
# this is what the expectimax agent will think
# get my possible actions, then shuffle them to ensure randomness when no action
# stands out as the best
my_doable_actions = utils.get_poss_moves(board, self.team)
np.random.shuffle(my_doable_actions)
# check for terminal-state scenario
done, won = self.goal_test(my_doable_actions, board, max_val=True)
if done or depth == 0:
return current_reward + self.get_terminal_reward(done, won, depth), None
val = -float("inf")
best_action = None
for action in my_doable_actions:
board, fight_result = self.do_move(
action, board=board, bookkeeping=False, true_gameplay=False
)
temp_reward = current_reward + self.add_temp_reward(fight_result)
new_val = self.min_val(board, temp_reward, alpha, beta, depth - 1)[0]
if val < new_val:
val = new_val
best_action = action
if val >= beta:
self.undo_last_move(board)
best_action = action
return val, best_action
alpha = max(alpha, val)
board = self.undo_last_move(board)
return val, best_action
def min_val(self, board, current_reward, alpha, beta, depth):
"""
Step of the minimizing player in the minimax algorithm. See max_val for documentation.
"""
# this is what the opponent will think, the min-player
# get my possible actions, then shuffle them to ensure randomness when no action
# stands out as the best
my_doable_actions = utils.get_poss_moves(board, self.other_team)
np.random.shuffle(my_doable_actions)
# check for terminal-state scenario or maximum depth
done, won = self.goal_test(my_doable_actions, board, max_val=False)
if done or depth == 0:
return current_reward + self.get_terminal_reward(done, won, depth), None
val = float("inf") # initial value set, so min comparison later possible
best_action = None
# iterate through all actions
for action in my_doable_actions:
board, fight_result = self.do_move(
action, board=board, bookkeeping=False, true_gameplay=False
)
temp_reward = current_reward - self.add_temp_reward(fight_result)
new_val = self.max_val(board, temp_reward, alpha, beta, depth - 1)[0]
if val > new_val:
val = new_val
best_action = action
if val <= alpha:
self.undo_last_move(board)
return val, best_action
beta = min(beta, val)
board = self.undo_last_move(board)
return val, best_action
def add_temp_reward(self, fight_result):
"""
reward the fight given the outcome of it.
:param fight_result: integer category of the fight outcome
:return: reward, float
"""
# depending on the fight we want to update the current paths value
temp_reward = 0
if fight_result is not None:
if fight_result == 1: # attacker won
temp_reward = self.kill_reward
elif fight_result == 2: # attacker won, every piece was known before
temp_reward = int(self.certainty_multiplier * self.kill_reward)
elif fight_result == 0: # neutral outcome
temp_reward = self.neutral_fight # both pieces die
elif fight_result == -1: # attacker lost
temp_reward = -self.kill_reward
elif fight_result == -2: # attacker lost, every piece was known before
temp_reward = -int(self.certainty_multiplier * self.kill_reward)
return temp_reward
def goal_test(self, actions_possible, board, max_val):
"""
check the board for whether a flag has been captured already and return the winning game rewards,
if not check whether there are no actions possible anymore, return TRUE then, or FALSE.
:param actions_possible: list of moves
:param board: numpy array (5, 5)
:param max_val: boolean, decider whether this is a goal test for maximizing player
:return: boolean: reached terminal state, boolean: own team (True) or other team won (False)
"""
flag_alive = [False, False]
for pos, piece in np.ndenumerate(board):
if piece is not None and int == 0:
flag_alive[piece.team] = True
if not flag_alive[self.other_team]:
return True, True
# if not flag_alive[self.team]:
# return True, False
if not actions_possible:
# print('cannot move anymore')
if (
max_val
): # the minmax agent is the one doing max_val, so if he cant move -> loss for him
won = False
else:
won = True
return True, won
else:
return False, None
def get_terminal_reward(self, done, won, depth):
"""
Reward for ending the game on a certain depth. If ended because of flag capture, then
reward with a depth discounted winGameReward. If ended because of depth limitation,
return 0
:param done: boolean, indicate whether the game ended
:param won: boolean, indicate whether the game was won or lost
:param depth: the depth at which the game ended
:return: game end reward, float
"""
if not done:
return 0
else:
if won:
terminal_reward = self.winGameReward
else:
terminal_reward = -self.winGameReward
return (
terminal_reward
* (depth + 1)
/ (self.max_depth + 1)
* (terminal_reward / self.kill_reward)
)
def update_prob_by_fight(self, enemy_piece):
"""
update the information about the given piece, after a fight occured
:param enemy_piece: object of class Piece
:return: change is in-place, no value specified
"""
enemy_piece.potential_types = [2]
def update_prob_by_move(self, move, moving_piece):
"""
update the information about the given piece, after it did the given move
:param move: tuple of positions tuples
:param moving_piece: object of class Piece
:return: change is in-place, no value specified
"""
move_dist = spatial.distance.cityblock(move[0], move[1])
if move_dist > 1:
moving_piece.hidden = False
moving_piece.potential_types = [2] # piece is 2
else:
immobile_enemy_types = [
idx
for idx, type in enumerate(moving_piece.potential_types)
if type in [0, 11]
]
moving_piece.potential_types = np.delete(
moving_piece.potential_types, immobile_enemy_types
)
def draw_consistent_enemy_setup(self, board):
"""
Draw a setup of the enemies pieces on the board provided that aligns with the current status of
information about said pieces, then place them on the board. This is done via iterative random sampling,
until a consistent draw occurs. This draw may or may not represent the overall true distribution of the pieces.
:param board: numpy array (5, 5)
:return: board with the assigned enemy pieces in it.
"""
# get information about enemy pieces (how many, which alive, which types, and indices in assign. array)
enemy_pieces = copy.deepcopy(self.ordered_opp_pieces)
enemy_pieces_alive = [piece for piece in enemy_pieces if not piece.dead]
tokens_alive = [piece.token for piece in enemy_pieces_alive]
# do the following as long as the drawn assignment is not consistent with the current knowledge about them
consistent = False
sample = None
while not consistent:
# choose as many pieces randomly as there are enemy pieces alive
sample = np.random.choice(tokens_alive, len(tokens_alive), replace=False)
# while-loop break condition
consistent = True
for idx, piece in enumerate(enemy_pieces_alive):
# if the drawn type doesn't fit the potential types of the current piece, then redraw
if sample[idx] not in piece.potential_types:
consistent = False
break
# place this draw now on the board by assigning the types and changing critical attributes
for idx, piece in enumerate(enemy_pieces_alive):
# add attribute of the piece being guessed (only happens in non-real gameplay aka planning)
piece.guessed = not piece.hidden
int = sample[idx]
if int in [0, 11]:
piece.can_move = False
piece.move_range = 0
elif int == 2:
piece.can_move = True
piece.move_range = float("inf")
else:
piece.can_move = True
piece.move_range = 1
piece.hidden = False
board[piece.position] = piece
return board
def undo_last_move(self, board):
"""
Undo the last move in the memory. Return the updated board.
:param board: numpy array (5, 5)
:return: board
"""
last_move = self.last_N_moves.pop_last()
if last_move is None:
raise ValueError("No last move to undo detected!")
before_piece = self.pieces_last_N_Moves_beforePos.pop_last()
board[last_move[0]] = before_piece
# the piece at the 'before' spatial was the one that moved, so needs its
# last entry in the move history deleted
before_piece.position = last_move[0]
board[last_move[1]] = self.pieces_last_N_Moves_afterPos.pop_last()
return board