-
Notifications
You must be signed in to change notification settings - Fork 0
/
FlowData_create.py
134 lines (121 loc) · 5.49 KB
/
FlowData_create.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
'''
@Author: PangAoyu
@Date: 2023-02-15 14:33:49
@Description: 训练 RL 模型,训练的时候使用多个环境
@LastEditTime: 2023-02-24 23:20:10
'''
import os
import argparse
import torch
from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import CallbackList, EvalCallback, CheckpointCallback
from stable_baselines3.common.vec_env import SubprocVecEnv, VecNormalize
from aiolos.utils.get_abs_path import getAbsPath
from aiolos.trafficLog.initLog import init_logging
pathConvert = getAbsPath(__file__)
from env import makeENV
from models import scnn, ernn, eattention,ecnn, inference, flowdata
from create_params import create_params
from utils.lr_schedule import linear_schedule
from utils.env_normalize import VecNormalizeCallback, VecBestNormalizeCallback
def experiment(
net_name,net_env,n_stack, n_delay, model_name, num_cpus
):
assert model_name in ['scnn', 'ernn','eattention','ecnn','inference','flowdata'], f'Model name error, {model_name}' #增加模型
# args
N_STACK = n_stack # 堆叠
N_DELAY = n_delay # 时延
NUM_CPUS = num_cpus
EVAL_FREQ = 2000 # 一把交互 700 次
SAVE_FREQ = EVAL_FREQ*2 # 保存的频率
FLOWDATA_PATH_root=pathConvert(f'./FlowData/')
FLOWDATA_PATH=pathConvert(f'./FlowData/{net_env}_{net_name}_{N_STACK}_{N_DELAY}.csv')#存储Flow数据
MODEL_PATH = pathConvert(f'./results/models/{model_name}/{net_env}_{net_name}_{N_STACK}_{N_DELAY}/')
LOG_PATH = pathConvert(f'./results/log/{model_name}/{net_env}_{net_name}_{N_STACK}_{N_DELAY}/') # 存放仿真过程的数据
TENSORBOARD_LOG_DIR = pathConvert(f'./results/tensorboard_logs/{model_name}_{net_env}_{net_name}/')
if not os.path.exists(MODEL_PATH):
os.makedirs(MODEL_PATH)
if not os.path.exists(TENSORBOARD_LOG_DIR):
os.makedirs(TENSORBOARD_LOG_DIR)
if not os.path.exists(LOG_PATH):
os.makedirs(LOG_PATH)
if not os.path.exists(FLOWDATA_PATH_root):
os.makedirs(FLOWDATA_PATH_root)
train_params = create_params(
is_eval=False,
N_DELAY=N_DELAY, N_STACK=N_STACK, LOG_PATH=LOG_PATH, net_env=net_env,net_name=net_name,
)
eval_params = create_params(
is_eval=True,
N_DELAY=N_DELAY, N_STACK=N_STACK, LOG_PATH=LOG_PATH, net_env=net_env,net_name=net_name,
)
# The environment for training
env = SubprocVecEnv([makeENV.make_env(env_index=f'{N_STACK}_{N_DELAY}_{i}', **train_params) for i in range(NUM_CPUS)])
env = VecNormalize(env, norm_obs=False, norm_reward=False) # 不进行标准化,保留原始特征
# The environment for evaluating
eval_env = SubprocVecEnv([makeENV.make_env(env_index=f'evaluate_{N_STACK}_{N_DELAY}', **eval_params) for i in range(1)])
eval_env = VecNormalize(eval_env, norm_obs=False, norm_reward=False) # 是否进行标准化,此处选择不进行标准化,保留原始特征
eval_env.training = False # 测试的时候不要更新
eval_env.norm_reward = False
# ########
# callback
# ########
save_vec_normalize = VecBestNormalizeCallback(save_freq=1, save_path=MODEL_PATH)
eval_callback = EvalCallback(
eval_env, # 这里换成 eval env 会更加稳定
eval_freq=EVAL_FREQ,
best_model_save_path=MODEL_PATH,
callback_on_new_best=save_vec_normalize,
verbose=1
) # 保存最优模型
checkpoint_callback = CheckpointCallback(
save_freq=SAVE_FREQ,
save_path=MODEL_PATH,
) # 定时保存模型
vec_normalize_callback = VecNormalizeCallback(
save_freq=SAVE_FREQ,
save_path=MODEL_PATH,
) # 保存环境参数
callback_list = CallbackList([eval_callback, checkpoint_callback, vec_normalize_callback])
# ###########
# start train
# ###########
feature_extract = {
'scnn': scnn.SCNN,
'ernn': ernn.ERNN,
'eattention': eattention.EAttention,
'ecnn':ecnn.ECNN,
'inference':inference.Inference,
'flowdata':flowdata.FlowData,
}
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
policy_kwargs = dict(
features_extractor_class=feature_extract[model_name],
features_extractor_kwargs=dict(features_dim=32, FLOWDATA_PATH=FLOWDATA_PATH)# features_dim 提取的特征维数
)
model = PPO(
"MlpPolicy", env, verbose=True,
policy_kwargs=policy_kwargs, learning_rate=linear_schedule(3e-4),
tensorboard_log=TENSORBOARD_LOG_DIR, device=device
)
#import pdb; pdb.set_trace()
model.learn(total_timesteps=1e5, tb_log_name=f'{N_STACK}_{N_DELAY}', callback=callback_list) # log 的名称
model.policy.features_extractor.f.close()# 关闭csv文件
# #########
# save env
# #########
env.save(f'{MODEL_PATH}/vec_normalize.pkl')
if __name__ == '__main__':
init_logging(log_path=pathConvert('./log'), log_level=0)
parser = argparse.ArgumentParser()
parser.add_argument('--stack', type=int, default=4)
parser.add_argument('--delay', type=int, default=0)
parser.add_argument('--cpus', type=int, default=1) # 同时开启的仿真数量
parser.add_argument('--net_env', type=str, default='train_four_345')
parser.add_argument('--net_name', type=str, default='4phases.net.xml')
parser.add_argument('--model_name', type=str, default='flowdata')
args = parser.parse_args()
experiment(
net_env=args.net_env, net_name=args.net_name , n_stack=args.stack, n_delay=args.delay,
model_name=args.model_name, num_cpus=args.cpus
)