-
Notifications
You must be signed in to change notification settings - Fork 0
/
constants.py
65 lines (61 loc) · 1.85 KB
/
constants.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
"""
Constants
---------
This file contains hyperparameters and constants. For each environment,
its dictionary of parameters must exist here in order to train agents on it.
"""
HYPERPARAMS = {
'BipedalWalker-v3': {
'n_steps': 1_000_000, # env steps
'evaluation_frequency': 100, # evaluate agent every that many episodes.
'actor_lr': 0.0001,
'critic_lr': 0.0001,
'gamma': 0.99,
'tau': 0.001,
'minibatch': 128,
'buffer_size': 1_000_000,
'n_neurons': [400, 300], # number of neurons for each of the hidden layers
'learning_starts': 20_000, # i.e. play randomly for that many steps
'random_process_parameters': [0, 0.1] # [mean, stddev]
},
'Pendulum-v1': {
'n_steps': 10_000,
'evaluation_frequency': 25,
'actor_lr': 0.001,
'critic_lr': 0.002,
'gamma': 0.99,
'tau': 0.005,
'minibatch': 64,
'buffer_size': 10_000,
'n_neurons': [256, 256],
'learning_starts': 500,
'random_process_parameters': [0, 0.2]
},
'LunarLanderContinuous-v2': {
'n_steps': 500_000,
'evaluation_frequency': 50,
'actor_lr': 0.001,
'critic_lr': 0.001,
'gamma': 0.99,
'tau': 0.003,
'minibatch': 128,
'buffer_size': 500_000,
'n_neurons': [400, 300],
'learning_starts': 10_000,
'random_process_parameters': [0, 0.1]
},
'HalfCheetah-v4': {
'n_steps': 1_000_000,
'evaluation_frequency': 100,
'actor_lr': 0.0001,
'critic_lr': 0.0001,
'gamma': 0.99,
'tau': 0.001,
'minibatch': 128,
'buffer_size': 1_000_000,
'n_neurons': [400, 300],
'learning_starts': 20_000,
'random_process_parameters': [0, 0.1]
}
}
# =============== END OF FILE ===============