1 Star 0 Fork 24

happyhzq/future_agent

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
optimize_ppo.py 3.39 KB
一键复制 编辑 原始数据 按行查看 历史
邹吉华 提交于 2023-04-04 17:02 . 1.2.4
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.utils import set_random_seed
from stable_baselines3 import PPO
from training_env import TrainingEnv
from stable_baselines3.common.vec_env import SubprocVecEnv
import torch as th
import optuna
from stable_baselines3.common.evaluation import evaluate_policy
TB_LOG_PATH = "../tb_log"
TRAINING_BEGIN_TIME = ["2022-08-14","2022-08-15"
,"2022-08-18","2022-08-19","2022-08-20","2022-08-21","2022-08-22","2022-08-25","2022-08-26","2022-08-27","2022-08-28"
,"2022-08-29","2022-09-01","2022-09-02","2022-09-03","2022-09-04"]
# The algorithms require a vectorized environment to run
EVALUATE_BEGIN_TIME = ["2022-08-29"]
# The algorithms require a vectorized environment to run
def make_env(rank, seed=0):
"""
Utility function for multiprocessed env.
:param env_id: (str) the environment ID
:param num_env: (int) the number of environments you wish to have in subprocesses
:param seed: (int) the inital seed for RNG
:param rank: (int) index of the subprocess
"""
def _init():
env = Monitor(TrainingEnv(TRAINING_BEGIN_TIME))
env.seed(seed + rank)
return env
set_random_seed(seed)
return _init
def optimize_ppo(trial):
na_num = trial.suggest_int('na_num', 4, 16)
net_arch = []
for i in range(na_num):
net_arch.append(trial.suggest_categorical(str(i), [32,64,128,256,512,768,1024]))
all_fn = [
th.nn.ReLU,
th.nn.RReLU,
th.nn.Hardtanh,
th.nn.ReLU6,
th.nn.Sigmoid,
th.nn.Hardsigmoid,
th.nn.Tanh,
th.nn.SiLU,
th.nn.Mish,
th.nn.Hardswish,
th.nn.ELU,
th.nn.CELU,
th.nn.SELU,
th.nn.GELU,
th.nn.Hardshrink,
th.nn.LeakyReLU,
th.nn.LogSigmoid,
th.nn.Softplus,
th.nn.Softshrink,
th.nn.PReLU,
th.nn.Softsign,
th.nn.Tanhshrink,
th.nn.Softmin,
th.nn.Softmax,
th.nn.LogSoftmax
]
fn_index = trial.suggest_categorical('fn_index', [0,4,6])
return {
'n_steps':trial.suggest_categorical("n_steps", [2048, 3072, 4096, 8192]),
'gamma':trial.suggest_loguniform('gamma', 0.8, 0.99),
'learning_rate':trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
'clip_range':trial.suggest_uniform('clip_range', 0.1, 0.3),
'gae_lambda':trial.suggest_uniform('gae_lambda', 0.8, 0.99),
'policy_kwargs':dict(
activation_fn=all_fn[fn_index],
net_arch=net_arch
)
}
def optimize_agent(trial):
try:
num_cpu = 64 # Number of processes to use
# Create the vectorized environment
#env = DummyVecEnv([lambda: env])
env = SubprocVecEnv([make_env(i) for i in range(num_cpu)])
model_params = optimize_ppo(trial)
model = PPO('MlpPolicy', env,**model_params)
model.learn(total_timesteps=100000)
mean_reward, _ = evaluate_policy(model, Monitor(TrainingEnv(EVALUATE_BEGIN_TIME)))
print("mean_reward",mean_reward)
model.save('./model/trial_{}'.format(trial.number))
return mean_reward
except Exception as e:
print(e)
return -10000
if __name__ == '__main__':
study = optuna.create_study(direction='maximize')
study.optimize(optimize_agent, n_trials=100,gc_after_trial=True)
print(study.best_params)
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/happyhzq/future_agent.git
[email protected]:happyhzq/future_agent.git
happyhzq
future_agent
future_agent
master

搜索帮助