-
Notifications
You must be signed in to change notification settings - Fork 0
/
Integration_Demo_for_ROM_Exhibit_new.py
266 lines (238 loc) · 13.1 KB
/
Integration_Demo_for_ROM_Exhibit_new.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Aug 23 00:37:13 2018
@author: jack.lingheng.meng
"""
import logging
from datetime import datetime, date
from threading import Timer
import os
from gym import spaces
import numpy as np
from Environment.LASEnv import LASEnv
from LASAgent.InternalEnvOfAgent import InternalEnvOfAgent
from LASAgent.InternalEnvOfCommunity import InternalEnvOfCommunity
# Logging
experiment_results_dir = os.path.join(os.path.abspath('..'), 'ROM_Experiment_results')
if not os.path.exists(experiment_results_dir):
os.makedirs(experiment_results_dir)
logging.basicConfig(filename = os.path.join(experiment_results_dir,'ROM_experiment_'+datetime.now().strftime("%Y%m%d_%H%M%S")+'.log'),
level = logging.DEBUG,
format='%(asctime)s:%(levelname)s: %(message)s')
#######################################################################
# Instatiate LAS virtual environment #
# TODO: This part is not needed when interacting with real exhibit #
#######################################################################
# Instantiate LAS environment object
envLAS = LASEnv('127.0.0.1', 19997, reward_function_type = 'occupancy')
observation = envLAS.reset()
#######################################################################
# Instatiate LAS-Agent #
#######################################################################
# Note: 1. Set load_pretrained_agent_flag to "True" only when you have
# and want to load pretrained agent.
# 2. Keep observation unchanged if using pretrained agent.
# TODO: Adam provides information and we will define these variables.
# In realy system, there is no virtual environment. We need to define:
# 1. observation_space: gym.spaces.Box object
# 2. observation_space_name: each entry is the name of sensor
# 3. action_space: gym.spaces.Box object
# 4. action_space_name: ench entry is the name of actuator
agent_name = 'LAS_Single_Agent'
observation_space = envLAS.observation_space
action_space = envLAS.action_space
observation_space_name = [],
action_space_name = []
x_order_sensor_reading = 20
x_order_sensor_reading_sliding_window = 5
x_order_sensor_reading_preprocess_type = 'max_pool_sensory_readings'#'average_pool_sensory_readings'#'concatenate_sensory_readings'
occupancy_reward_type = 'IR_distance'
interaction_mode = 'real_interaction'
load_pretrained_agent_flag = True
single_agent = InternalEnvOfAgent(agent_name,
observation_space,
action_space,
observation_space_name,
action_space_name,
x_order_sensor_reading,
x_order_sensor_reading_sliding_window,
x_order_sensor_reading_preprocess_type,
occupancy_reward_type,
interaction_mode,
load_pretrained_agent_flag)
logging.info('Instantiate LAS-Agent done!')
#######################################################################
# Instatiate LAS-Agent-Community #
#######################################################################
# Note: 1. Set load_pretrained_agent_flag to "True" only when you have and want
# to load pretrained agent.
# 2. Keep observation unchanged if using pretrained agent.
community_name = 'LAS_Agent_Community'
community_size = 3
observation_space = envLAS.observation_space
action_space = envLAS.action_space
observation_space_name = envLAS.observation_space_name
action_space_name = envLAS.action_space_name
x_order_sensor_reading = 20
x_order_sensor_reading_sliding_window = 5
x_order_sensor_reading_preprocess_type = 'max_pool_sensory_readings'#'average_pool_sensory_readings'#'concatenate_sensory_readings'
occupancy_reward_type = 'IR_distance'
interaction_mode = 'real_interaction'
load_pretrained_agent_flag = True
LAS_agent_community = InternalEnvOfCommunity(community_name,
community_size,
observation_space,
action_space,
observation_space_name,
action_space_name,
x_order_sensor_reading,
x_order_sensor_reading_sliding_window,
x_order_sensor_reading_preprocess_type,
occupancy_reward_type,
interaction_mode,
load_pretrained_agent_flag)
logging.info('Instantiate LAS-Agent-Community done!')
#######################################################################
# Schedual two experiments #
# Note:
# 1. Initialize Single_Agent and Agent_Community will take about 10 minutes.
# Thus, the master script should be run, at least, before 9:45am everyday.
# 2. Single_Agent.stop() will take about 3 minutes to save learned models.
# 3. Agent_Community.stop() will take about 10 minutes to save learned models.
# Problem:
# We don't want visitors to feel the pause when saving learned models
# Solution:
# To get rid of time-gap when switching behavior modes, use multiple threads
# to interact with visitors with different interaction modes i.e. when one
# interaction mode is done, another interaction mode starts interacting
# immediately at the same time the previous thread keeps saving learned models.
#######################################################################
def interact_with_learning_agent(agent, end_time = '143000'):
"""
Note:
When integrate with master_script, replease:
1. envLAS._self_observe() -> get_obseravtion()
2. envLAS.step(action) -> take_action(action)
Parameters
----------
agent: learning agent object
end_time: str (in format %HH%MM%SS)
the end time of interaction
"""
logging.info('{}: Start interaction. Default End_time: {}'.format(agent.name, end_time))
# Interact untill end_time
while not datetime.now().strftime("%H%M%S") > end_time:
observation = envLAS._self_observe() # TODO: replace with get_observation()
take_action_flag, action = agent.feed_observation(observation)
if take_action_flag == True:
observation, _, _, _ = envLAS.step(action) # TODO: replace with take_action(action)
# Save learned model
logging.info('{}: Interaction is done. Saving learned models...'.format(agent.name))
agent.stop()
logging.info('{}: Saving learned models done.'.format(agent.name))
def interact_with_prescribed_behavior(agent = 'prescribed_behavior', end_time = '130000'):
"""
TODO: Please put prescribed behavior in this function.
Parameters
----------
agent: str
not important paramter just for keeping the same format with interact_with_learning_agent
end_time: str (in format %HH%MM%SS)
the end time of interaction
"""
logging.info('{}: Start interaction. Default End_time: {}'.format(agent, end_time))
# Interact untill end_time
while not datetime.now().strftime("%H%M%S") > end_time:
observation = envLAS._self_observe() # TODO: replace with get_observation()
action = envLAS.action_space.sample() # TODO: replace with prescribed-behavior
observation, _, _, _ = envLAS.step(action) # TODO: replace with take_action(action)
logging.info('{}: Interaction is done.'.format(agent))
def interaction_mode_scheduler(interaction_mode, agent,
start_time, end_time, schedule_start_time):
"""
Parameters
----------
interaction_mode: func
function
agent: depends on interaction mode
1. agent object: for learning agent
2. 'priscribed_behavior': for priscribed behavior
start_time: str (with format'hhmmss')
end_time: str (with format'hhmmss')
schedule_start_time: datetime object
Returns
-------
interaction_thread
a delayed thread for an interaction mode which will start at a given time.
"""
start_delay = (datetime.strptime(date.today().strftime("%Y%m%d")+'-'+start_time, '%Y%m%d-%H%M%S') - schedule_start_time).total_seconds()
if start_delay < 0:
logging.error('{} starts earlier than schedualing time!'.format(interaction_mode.__name__))
interaction_thread = Timer(interval = start_delay,
function = interaction_mode,
kwargs={'agent': agent,
'end_time': end_time})
return interaction_thread
# Get current time to calculate interaction start-time-delay
schedule_start_time = datetime.now()
# Schedule first experiment
# TODO: set start and end times to '130002' and '143000'
first_experiment_start_time = '175501' # format: %H%M%S e.g. 1:00pm is 130000
first_experiment_end_time = '180500' # format: %H%M%S e.g. 2:30pm is 143000
first_experiment_thread = interaction_mode_scheduler(interact_with_learning_agent,
single_agent,
first_experiment_start_time,
first_experiment_end_time,
schedule_start_time)
# Schedule second experiment
# TODO: set start and end times to '143002' and '160000'
second_experiment_start_time = '180501' # format: %H%M%S e.g. 2:30pm is 143000
second_experiment_end_time = '182500' # format: %H%M%S e.g. 4:00pm is 160000
second_experiment_thread = interaction_mode_scheduler(interact_with_learning_agent,
LAS_agent_community,
second_experiment_start_time,
second_experiment_end_time,
schedule_start_time)
# Schedule prescribed-behavior 1
# Note:
# Make sure to leave an, at least 10 minuts, time-gap between the time you
# start thsi script and the start time for the first interaction.
# (This is because instantiating learning agent takes around 8 minutes.)
# TODO: set start and end times to '093000' and '130000'
prescribed_behavior_start_time_1 = '175301' # format: %H%M%S e.g. 10:00am is 100000
prescribed_behavior_end_time_1 = '175500' # format: %H%M%S e.g. 1:00pm is 130000
prescribed_behavior_thread_1 = interaction_mode_scheduler(interact_with_prescribed_behavior,
'prescribed_behavior',
prescribed_behavior_start_time_1,
prescribed_behavior_end_time_1,
schedule_start_time)
# Schedule prescribed-behavior 2
# TODO: set start and end times to '160002' and '173000'
prescribed_behavior_start_time_2 = '182501' # format: %H%M%S e.g. 4:00pm is 160000
prescribed_behavior_end_time_2 = '182800' # format: %H%M%S e.g. 5:30pm is 173000
prescribed_behavior_thread_2 = interaction_mode_scheduler(interact_with_prescribed_behavior,
'prescribed_behavior',
prescribed_behavior_start_time_2,
prescribed_behavior_end_time_2,
schedule_start_time)
if __name__ == '__main__':
# TODO: put initialization work for master script in here
# Schedule interaction with learning agent
first_experiment_thread.start()
logging.info('first_experiment_thread scheduled: {}-{}'.format(first_experiment_start_time, first_experiment_end_time))
second_experiment_thread.start()
logging.info('second_experiment_thread scheduled: {}-{}'.format(second_experiment_start_time, second_experiment_end_time))
# Schedule interaction with presribed-behavior
prescribed_behavior_thread_1.start()
logging.info('prescribed_behavior_thread_1 scheduled: {}-{}'.format(prescribed_behavior_start_time_1, prescribed_behavior_end_time_1))
prescribed_behavior_thread_2.start()
logging.info('prescribed_behavior_thread_2 scheduled: {}-{}'.format(prescribed_behavior_start_time_2, prescribed_behavior_end_time_2))
# Check if all interactions are done.
while True:
if not first_experiment_thread.is_alive()\
and not second_experiment_thread.is_alive()\
and not prescribed_behavior_thread_1.is_alive()\
and not prescribed_behavior_thread_2.is_alive():
logging.info('All interactions are done.')
break