1 Star 1 Fork 0

h505030475/pymarl3rd

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
记录程序运行流 11.31 KB
一键复制 编辑 原始数据 按行查看 历史
h505030475 提交于 2021-09-28 16:04 . before
run_sequential (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:172)
run (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:54)
my_main (\home\fuqingxu\pymarl2\pymarl2src\main.py:35)
<module> (\home\fuqingxu\pymarl2\pymarl2src\main.py:109)
run_sequential (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:221)
run (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:54)
my_main (\home\fuqingxu\pymarl2\pymarl2src\main.py:35)
<module> (\home\fuqingxu\pymarl2\pymarl2src\main.py:109)
log_stat (\home\fuqingxu\pymarl2\pymarl2src\utils\logging.py:28)
_log (\home\fuqingxu\pymarl2\pymarl2src\runners\parallel_runner.py:214)
run (\home\fuqingxu\pymarl2\pymarl2src\runners\parallel_runner.py:206)
run_sequential (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:177)
run (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:54)
my_main (\home\fuqingxu\pymarl2\pymarl2src\main.py:35)
<module> (\home\fuqingxu\pymarl2\pymarl2src\main.py:109)
self.args.batch_size_run决定了并行环境数量
# 并行环境 reset
reset (\home\fuqingxu\pymarl2\pymarl2src\runners\parallel_runner.py:72)
run (\home\fuqingxu\pymarl2\pymarl2src\runners\parallel_runner.py:89)
run_sequential (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:190)
run (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:67)
my_main (\home\fuqingxu\pymarl2\pymarl2src\main.py:35)
<module> (\home\fuqingxu\pymarl2\pymarl2src\main.py:109)
# build input
_build_inputs (\home\fuqingxu\pymarl2\pymarl2src\controllers\basic_controller.py:71)
forward (\home\fuqingxu\pymarl2\pymarl2src\controllers\n_controller.py:21)
select_actions (\home\fuqingxu\pymarl2\pymarl2src\controllers\n_controller.py:16)
run (\home\fuqingxu\pymarl2\pymarl2src\runners\parallel_runner.py:107)
run_sequential (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:190)
run (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:67)
my_main (\home\fuqingxu\pymarl2\pymarl2src\main.py:35)
<module> (\home\fuqingxu\pymarl2\pymarl2src\main.py:109)
# 网络
forward (\home\fuqingxu\pymarl2\pymarl2src\modules\agents\n_rnn_agent.py:23)
forward (\home\fuqingxu\pymarl2\pymarl2src\controllers\n_controller.py:23)
select_actions (\home\fuqingxu\pymarl2\pymarl2src\controllers\n_controller.py:16)
run (\home\fuqingxu\pymarl2\pymarl2src\runners\parallel_runner.py:107)
run_sequential (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:190)
run (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:67)
my_main (\home\fuqingxu\pymarl2\pymarl2src\main.py:35)
<module> (\home\fuqingxu\pymarl2\pymarl2src\main.py:109)
# 选择epsilon随机动作
select_action (\home\fuqingxu\pymarl2\pymarl2src\components\action_selectors.py:164)
select_actions (\home\fuqingxu\pymarl2\pymarl2src\controllers\n_controller.py:17)
run (\home\fuqingxu\pymarl2\pymarl2src\runners\parallel_runner.py:107)
run_sequential (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:190)
run (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:67)
my_main (\home\fuqingxu\pymarl2\pymarl2src\main.py:35)
<module> (\home\fuqingxu\pymarl2\pymarl2src\main.py:109)
# EpsilonGreedyActionSelector 的初始化
__init__ (\home\fuqingxu\pymarl2\pymarl2src\components\action_selectors.py:156)
__init__ (\home\fuqingxu\pymarl2\pymarl2src\controllers\basic_controller.py:15)
__init__ (\home\fuqingxu\pymarl2\pymarl2src\controllers\n_controller.py:11)
run_sequential (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:131)
run (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:68)
my_main (\home\fuqingxu\pymarl2\pymarl2src\main.py:35)
<module> (\home\fuqingxu\pymarl2\pymarl2src\main.py:111)
# 更新训练缓存
update (\home\fuqingxu\pymarl2\pymarl2src\components\episode_buffer.py:88)
run (\home\fuqingxu\pymarl2\pymarl2src\runners\parallel_runner.py:118)
run_sequential (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:190)
run (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:67)
my_main (\home\fuqingxu\pymarl2\pymarl2src\main.py:35)
<module> (\home\fuqingxu\pymarl2\pymarl2src\main.py:109)
# Only send the actions to the env if it hasn't terminated
run (\home\fuqingxu\pymarl2\pymarl2src\runners\parallel_runner.py:124)
run_sequential (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:190)
run (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:67)
my_main (\home\fuqingxu\pymarl2\pymarl2src\main.py:35)
<module> (\home\fuqingxu\pymarl2\pymarl2src\main.py:109)
# Receive data back for each unterminated env
run (\home\fuqingxu\pymarl2\pymarl2src\runners\parallel_runner.py:147)
run_sequential (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:190)
run (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:67)
my_main (\home\fuqingxu\pymarl2\pymarl2src\main.py:35)
<module> (\home\fuqingxu\pymarl2\pymarl2src\main.py:109)
# insert_episode_batch(self, ep_batch) ????
insert_episode_batch (\home\fuqingxu\pymarl2\pymarl2src\components\episode_buffer.py:215)
run_sequential (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:191)
run (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:67)
my_main (\home\fuqingxu\pymarl2\pymarl2src\main.py:35)
<module> (\home\fuqingxu\pymarl2\pymarl2src\main.py:109)
# 每隔几个episode执行一次测试
run (\home\fuqingxu\pymarl2\pymarl2src\runners\parallel_runner.py:89)
run_sequential (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:217)
run (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:67)
my_main (\home\fuqingxu\pymarl2\pymarl2src\main.py:35)
<module> (\home\fuqingxu\pymarl2\pymarl2src\main.py:109)
ReplayBuffer???
# 训练
train (\home\fuqingxu\pymarl2\pymarl2src\learners\nq_learner.py:54)
run_sequential (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:203)
run (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:67)
my_main (\home\fuqingxu\pymarl2\pymarl2src\main.py:35)
<module> (\home\fuqingxu\pymarl2\pymarl2src\main.py:109)
forward (\home\fuqingxu\pymarl2\pymarl2src\modules\mixers\nmix.py:32)
train (\home\fuqingxu\pymarl2\pymarl2src\learners\nq_learner.py:91)
run_sequential (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:203)
run (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:67)
my_main (\home\fuqingxu\pymarl2\pymarl2src\main.py:35)
<module> (\home\fuqingxu\pymarl2\pymarl2src\main.py:109)
build_td_lambda_targets (\home\fuqingxu\pymarl2\pymarl2src\utils\rl_utils.py:8)
train (\home\fuqingxu\pymarl2\pymarl2src\learners\nq_learner.py:100)
run_sequential (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:203)
run (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:67)
my_main (\home\fuqingxu\pymarl2\pymarl2src\main.py:35)
<module> (\home\fuqingxu\pymarl2\pymarl2src\main.py:109)
# pytorch梯度反传
train (\home\fuqingxu\pymarl2\pymarl2src\learners\nq_learner.py:119)
run_sequential (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:203)
run (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:67)
my_main (\home\fuqingxu\pymarl2\pymarl2src\main.py:35)
<module> (\home\fuqingxu\pymarl2\pymarl2src\main.py:109)
# 策略主网络
forward (\home\fuqingxu\pymarl2\pymarl2src\modules\agents\n_rnn_agent.py:23)
_call_impl (\home\fuqingxu\.local\lib\python3.8\site-packages\torch\nn\modules\module.py:889)
forward (\home\fuqingxu\pymarl2\pymarl2src\controllers\n_controller.py:23)
select_actions (\home\fuqingxu\pymarl2\pymarl2src\controllers\n_controller.py:16)
run (\home\fuqingxu\pymarl2\pymarl2src\runners\parallel_runner.py:112)
run_sequential (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:194)
run (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:57)
my_main (\home\fuqingxu\pymarl2\pymarl2src\main.py:35)
captured_function (\home\fuqingxu\.local\lib\python3.8\site-packages\sacred\config\captured_function.py:42)
__call__ (\home\fuqingxu\.local\lib\python3.8\site-packages\sacred\run.py:238)
run (\home\fuqingxu\.local\lib\python3.8\site-packages\sacred\experiment.py:276)
run_commandline (\home\fuqingxu\.local\lib\python3.8\site-packages\sacred\experiment.py:312)
<module> (\home\fuqingxu\pymarl2\pymarl2src\main.py:111)
_run_code (\usr\lib\python3.8\runpy.py:87)
_run_module_code (\usr\lib\python3.8\runpy.py:97)
run_path (\usr\lib\python3.8\runpy.py:265)
_run_code (\usr\lib\python3.8\runpy.py:87)
_run_module_as_main (\usr\lib\python3.8\runpy.py:194)
0:'./pymarl2src/main.py'
1:'--config=qmix' # pymarl2/config/algs/qmix.yaml has batch_size_run, epsilon_start, epsilon_finish, epsilon_anneal_time
mac: "n_mac"
agent: "n_rnn"
agent_output_type: q
learner: "nq_learner"
mixer: "qmix"
mixing_embed_dim: 32
hypernet_embed: 64
lr: 0.001 # Learning rate for agents
td_lambda: 0.6 # 0.3 for 6h_vs_8z
optimizer: 'adam'
q_lambda: False
2:'--env-config=sc2' # pymarl2/config/envs/sc2.yaml has env_args,env
3:'with'
4:'env_args.map_name=corridor'
python ./pymarl2src/main.py --config=qmix_increase_eps_time --env-config=sc2 with env_args.map_name=corridor
python ./pymarl2src/main.py --config=qmix_increase_eps_time --env-config=sc2 with env_args.map_name=corridor
python ./pymarl2src/main.py --config=qmix_decrease_eps_time --env-config=sc2 with env_args.map_name=corridor
一个batch的生命周期:
(1)初始化
__init__ (\home\fuqingxu\pymarl2\pymarl2src\components\episode_buffer.py:15)
reset (\home\fuqingxu\pymarl2\pymarl2src\runners\parallel_runner.py:69)
run (\home\fuqingxu\pymarl2\pymarl2src\runners\parallel_runner.py:93)
run_sequential (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:193)
run (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:57)
my_main (\home\fuqingxu\pymarl2\pymarl2src\main.py:35)
<module> (\home\fuqingxu\pymarl2\pymarl2src\main.py:111)
(2)即刻载入reset获得的obs
reset (\home\fuqingxu\pymarl2\pymarl2src\runners\parallel_runner.py:87)
run (\home\fuqingxu\pymarl2\pymarl2src\runners\parallel_runner.py:93)
run_sequential (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:193)
run (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:57)
my_main (\home\fuqingxu\pymarl2\pymarl2src\main.py:35)
<module> (\home\fuqingxu\pymarl2\pymarl2src\main.py:111)
(3)用于动作决策
run (\home\fuqingxu\pymarl2\pymarl2src\runners\parallel_runner.py:112)
(4)记录决策的动作
run (\home\fuqingxu\pymarl2\pymarl2src\runners\parallel_runner.py:123)
(5)等待得到奖励后,记录奖励,terminate状态
run (\home\fuqingxu\pymarl2\pymarl2src\runners\parallel_runner.py:177)
(6)记录下一时刻的obs
run (\home\fuqingxu\pymarl2\pymarl2src\runners\parallel_runner.py:183)
(7)循环,直到结束,这个batch被插入buffer
子进程的任务:
env_worker (\home\fuqingxu\pymarl2\pymarl2src\runners\parallel_runner.py:233)
__init__ (\home\fuqingxu\pymarl2\pymarl2src\runners\parallel_runner.py:32)
run_sequential (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:88)
run (\home\fuqingxu\pymarl2\pymarl2src\run\run.py:57)
my_main (\home\fuqingxu\pymarl2\pymarl2src\main.py:35)
<module> (\home\fuqingxu\pymarl2\pymarl2src\main.py:111)
reward, terminated, env_info = env.step(actions)
# Return the observations, avail_actions and state to make the next action
state = env.get_state()
avail_actions = env.get_avail_actions()
obs = env.get_obs()
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/hh505030475/pymarl3rd.git
[email protected]:hh505030475/pymarl3rd.git
hh505030475
pymarl3rd
pymarl3rd
master

搜索帮助