forked from fcarsten/tic-tac-toe
-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_part5.py
36 lines (28 loc) · 1.81 KB
/
test_part5.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import tensorflow as tf
import matplotlib.pyplot as plt
from util import evaluate_players
from tic_tac_toe.TFSessionManager import TFSessionManager
from tic_tac_toe.RandomPlayer import RandomPlayer
from tic_tac_toe.EGreedyNNQPlayer import EGreedyNNQPlayer
from tic_tac_toe.MinMaxAgent import MinMaxAgent
from tic_tac_toe.RndMinMaxAgent import RndMinMaxAgent
from tic_tac_toe.ExpDoubleDuelQPlayer import ExpDoubleDuelQPlayer
tf.reset_default_graph()
nnplayer = ExpDoubleDuelQPlayer("QLearner1") # , win_value=100.0, loss_value=-100.0)
# nn2player = EGreedyNNQPlayer("QLearner2", win_value=100.0, loss_value=-100.0)
# nnplayer = EGreedyNNQPlayer("QLearner1")#, learning_rate=0.001, win_value=10.0, loss_value=-10.0)
# nn2player = EGreedyNNQPlayer("QLearner2")#, learning_rate=0.001, win_value=10.0, loss_value=-10.0)
mm_player = MinMaxAgent()
rndplayer = RandomPlayer()
rm_player = RndMinMaxAgent()
TFSessionManager.set_session(tf.Session())
TFSessionManager.get_session().run(tf.global_variables_initializer())
# game_number, p1_wins, p2_wins, draws = evaluate_players(rndplayer, nnplayer, num_battles=10000) #, num_battles = 20)
# game_number, p1_wins, p2_wins, draws = evaluate_players(rndplayer, nnplayer) #, num_battles = 20)
# game_number, p1_wins, p2_wins, draws = evaluate_players( mm_player, nnplayer, num_battles=300) # , num_battles = 20)
game_number, p1_wins, p2_wins, draws = evaluate_players(rm_player, nnplayer, num_battles=300) # , num_battles = 20)
# game_number, p1_wins, p2_wins, draws = evaluate_players(nnplayer, rndplayer, num_battles=100) # , num_battles = 20)
# game_number, p1_wins, p2_wins, draws = evaluate_players(mm_player, nn2player, num_battles=100) # , num_battles = 20)
p = plt.plot(game_number, draws, 'r-', game_number, p1_wins, 'g-', game_number, p2_wins, 'b-')
plt.show()
TFSessionManager.set_session(None)