-
Notifications
You must be signed in to change notification settings - Fork 0
/
rewards.py
98 lines (93 loc) · 3.11 KB
/
rewards.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import os
import math
def formulate_reward(reward_type, reward_reason, spring_touch=False, monster_touch=False, score=0):
"""
- Params
- reward_type: type of reward set by agent (baseline is default)
- reward_reason: reason for calling the reward function
(did agent die, did agent get stuck, did score increment)
- score_inc: was score incremented (bool)
- spring_touch: was spring touched (bool)
- monster_touch: was monster touched (bool)
- Returns:
- A reward value based on type and reason
- to be called to assign a reward value to the agent
"""
reward = None
if reward_type == 1:
# using baseline rewards
if reward_reason == "DEFAULT":
reward = 0
if reward_reason == "DEAD":
reward = -2
if reward_reason == "STUCK":
reward = -2
if reward_reason == "SCORED":
reward = 3
elif reward_type == 2:
# version 2 discourages agent standing at one place
if reward_reason == "DEFAULT":
reward = -1
if reward_reason == "DEAD":
reward = -2
if reward_reason == "STUCK":
reward = -2
if reward_reason == "SCORED":
reward = 3
elif reward_type == 3:
# version 3 reward takes into account monster and spring
if reward_reason == "DEFAULT":
reward = -1
if reward_reason == "DEAD":
reward = -2
if reward_reason == "STUCK":
reward = -2
if reward_reason == "SCORED":
reward = 3
if spring_touch:
reward += 3
if monster_touch:
reward -= 4
elif reward_type == 4:
# version 4 dynamic reward
if reward_reason == "DEFAULT":
reward = -1
if reward_reason == "DEAD":
reward = -2
if reward_reason == "STUCK":
reward = -2
if reward_reason == "SCORED":
reward = 3 + math.log(score)
if spring_touch:
reward += 3
if monster_touch:
reward -= 4
elif reward_type == 5:
# version 5 - agent not penalised for no points scored
if reward_reason == "DEFAULT":
reward = 0
if reward_reason == "DEAD":
reward = -2
if reward_reason == "STUCK":
reward = -2
if reward_reason == "SCORED":
reward = 3 + math.log(score)
if spring_touch:
reward += 3
if monster_touch:
reward -= 4
elif reward_type == 6:
# version 6 - same as type 5 but high penalty for dying/stuck
if reward_reason == "DEFAULT":
reward = 0
if reward_reason == "DEAD":
reward = -20
if reward_reason == "STUCK":
reward = -20
if reward_reason == "SCORED":
reward = 3 + math.log(score)
if spring_touch:
reward += 3
if monster_touch:
reward -= 4
return reward