1 Star 0 Fork 24

七龙珠/future_agent

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
learn_sac.py 2.20 KB
一键复制 编辑 原始数据 按行查看 历史
邹吉华 提交于 2023-04-04 17:02 . 1.2.4
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.utils import set_random_seed
from stable_baselines3 import SAC
from training_env import TrainingEnv
from save_model import SaveModelCallback
from stable_baselines3.common.vec_env import VecFrameStack,SubprocVecEnv
import torch as th
from stable_baselines3.common.evaluation import evaluate_policy
TB_LOG_PATH = "../tb_log"
MODEL_PATH = "./model/sac"
LEARN_TIMES = 2000000
TRAINING_BEGIN_TIME = ["2022-08-14","2022-08-15"
,"2022-08-18","2022-08-19","2022-08-20","2022-08-21","2022-08-22","2022-08-25","2022-08-26","2022-08-27","2022-08-28"
,"2022-08-29","2022-09-01","2022-09-02","2022-09-03","2022-09-04"]
# The algorithms require a vectorized environment to run
def make_env(rank, seed=0):
"""
Utility function for multiprocessed env.
:param env_id: (str) the environment ID
:param num_env: (int) the number of environments you wish to have in subprocesses
:param seed: (int) the inital seed for RNG
:param rank: (int) index of the subprocess
"""
def _init():
env = Monitor(TrainingEnv(TRAINING_BEGIN_TIME), MODEL_PATH)
env.seed(seed + rank)
return env
set_random_seed(seed)
return _init
def optimize_params():
policy = dict(
activation_fn=th.nn.ReLU,
net_arch=[
508,
494,
865,
754,
417,
798,
799,
343
]
)
return {
'gamma':0.8595542347091839,
'learning_rate':1.8359176676795943e-05,
'policy_kwargs':policy
}
if __name__ == '__main__':
num_cpu = 128 # Number of processes to use
# Create the vectorized environment
#env = DummyVecEnv([lambda: Monitor(TrainingEnv(TRAINING_BEGIN_TIME), MODEL_PATH)])
env = SubprocVecEnv([make_env(i) for i in range(num_cpu)])
model_params = optimize_params()
model = SAC('MlpPolicy', env,verbose=1,tensorboard_log=TB_LOG_PATH,**model_params)
model.learn(total_timesteps=LEARN_TIMES,callback=SaveModelCallback(check_freq=4096, path=MODEL_PATH,env=env))
mean_reward, std_reward = evaluate_policy(model, env)
print(f"{mean_reward} {std_reward}")
model.save("sac_stock")
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/kingjinlong/future_agent.git
[email protected]:kingjinlong/future_agent.git
kingjinlong
future_agent
future_agent
master

搜索帮助