[{"title":"( 72 个子文件 3.92MB ) 强化学习算法:此存储库包含大多数基于pytorch实现的经典深度强化学习算法,包括-DQN,DDQN,Dualling Network,DDPG,SAC,A2C,PPO,TRPO。 (更多算法仍在进行中)-源码","children":[{"title":"reinforcement-learning-algorithms-master","children":[{"title":"figures","children":[{"title":"hopper.gif <span style='color:#111;'> 1.79MB </span>","children":null,"spread":false},{"title":"06_sac.png <span style='color:#111;'> 135.70KB </span>","children":null,"spread":false},{"title":"04_trpo.png <span style='color:#111;'> 141.83KB </span>","children":null,"spread":false},{"title":"01_dqn.png <span style='color:#111;'> 233.10KB </span>","children":null,"spread":false},{"title":"05_ppo.png <span style='color:#111;'> 130.17KB </span>","children":null,"spread":false},{"title":"03_a2c.png <span style='color:#111;'> 164.86KB </span>","children":null,"spread":false},{"title":"breakout.gif <span style='color:#111;'> 451.78KB </span>","children":null,"spread":false},{"title":"logo.png <span style='color:#111;'> 12.62KB </span>","children":null,"spread":false},{"title":"bipedal.gif <span style='color:#111;'> 815.07KB </span>","children":null,"spread":false},{"title":"02_ddpg.png <span style='color:#111;'> 135.88KB </span>","children":null,"spread":false}],"spread":true},{"title":"rl_utils","children":[{"title":"mpi_utils","children":[{"title":"utils.py <span style='color:#111;'> 1.39KB </span>","children":null,"spread":false},{"title":"__init__.py <span style='color:#111;'> 0B </span>","children":null,"spread":false},{"title":"normalizer.py <span style='color:#111;'> 2.71KB </span>","children":null,"spread":false}],"spread":true},{"title":"running_filter","children":[{"title":"__init__.py <span style='color:#111;'> 0B </span>","children":null,"spread":false},{"title":"running_filter.py <span style='color:#111;'> 1.67KB </span>","children":null,"spread":false}],"spread":true},{"title":"logger","children":[{"title":"bench.py <span style='color:#111;'> 5.57KB </span>","children":null,"spread":false},{"title":"__init__.py <span style='color:#111;'> 0B </span>","children":null,"spread":false},{"title":"logger.py <span style='color:#111;'> 14.46KB </span>","children":null,"spread":false},{"title":"plot.py <span style='color:#111;'> 3.87KB </span>","children":null,"spread":false}],"spread":true},{"title":"experience_replay","children":[{"title":"experience_replay.py <span style='color:#111;'> 1.35KB </span>","children":null,"spread":false}],"spread":true},{"title":"__init__.py <span style='color:#111;'> 0B </span>","children":null,"spread":false},{"title":"env_wrapper","children":[{"title":"create_env.py <span style='color:#111;'> 2.19KB </span>","children":null,"spread":false},{"title":"atari_wrapper.py <span style='color:#111;'> 10.09KB </span>","children":null,"spread":false},{"title":"multi_envs_wrapper.py <span style='color:#111;'> 3.98KB </span>","children":null,"spread":false},{"title":"__init__.py <span style='color:#111;'> 5.74KB </span>","children":null,"spread":false},{"title":"frame_stack.py <span style='color:#111;'> 1.13KB </span>","children":null,"spread":false}],"spread":true},{"title":"seeds","children":[{"title":"seeds.py <span style='color:#111;'> 407B </span>","children":null,"spread":false}],"spread":true}],"spread":true},{"title":"rl_algorithms","children":[{"title":"ddpg","children":[{"title":"ddpg_agent.py <span style='color:#111;'> 8.63KB </span>","children":null,"spread":false},{"title":"train.py <span style='color:#111;'> 717B </span>","children":null,"spread":false},{"title":"arguments.py <span style='color:#111;'> 2.06KB </span>","children":null,"spread":false},{"title":"utils.py <span style='color:#111;'> 686B </span>","children":null,"spread":false},{"title":"models.py <span style='color:#111;'> 950B </span>","children":null,"spread":false},{"title":"demo.py <span style='color:#111;'> 1.48KB </span>","children":null,"spread":false},{"title":"README.md <span style='color:#111;'> 354B </span>","children":null,"spread":false}],"spread":true},{"title":"dqn_algos","children":[{"title":"train.py <span style='color:#111;'> 589B </span>","children":null,"spread":false},{"title":"dqn_agent.py <span style='color:#111;'> 5.51KB </span>","children":null,"spread":false},{"title":"arguments.py <span style='color:#111;'> 2.19KB </span>","children":null,"spread":false},{"title":"utils.py <span style='color:#111;'> 1.58KB </span>","children":null,"spread":false},{"title":"models.py <span style='color:#111;'> 2.54KB </span>","children":null,"spread":false},{"title":"demo.py <span style='color:#111;'> 1.11KB </span>","children":null,"spread":false},{"title":"README.md <span style='color:#111;'> 437B </span>","children":null,"spread":false}],"spread":true},{"title":"ppo","children":[{"title":"train.py <span style='color:#111;'> 757B </span>","children":null,"spread":false},{"title":"arguments.py <span style='color:#111;'> 2.22KB </span>","children":null,"spread":false},{"title":"utils.py <span style='color:#111;'> 1.34KB </span>","children":null,"spread":false},{"title":"models.py <span style='color:#111;'> 3.82KB </span>","children":null,"spread":false},{"title":"demo.py <span style='color:#111;'> 2.58KB </span>","children":null,"spread":false},{"title":"README.md <span style='color:#111;'> 754B </span>","children":null,"spread":false},{"title":"ppo_agent.py <span style='color:#111;'> 10.88KB </span>","children":null,"spread":false}],"spread":true},{"title":"a2c","children":[{"title":"a2c_agent.py <span style='color:#111;'> 6.22KB </span>","children":null,"spread":false},{"title":"train.py <span style='color:#111;'> 612B </span>","children":null,"spread":false},{"title":"arguments.py <span style='color:#111;'> 1.84KB </span>","children":null,"spread":false},{"title":"utils.py <span style='color:#111;'> 749B </span>","children":null,"spread":false},{"title":"models.py <span style='color:#111;'> 1.91KB </span>","children":null,"spread":false},{"title":"demo.py <span style='color:#111;'> 1.17KB </span>","children":null,"spread":false},{"title":"README.md <span style='color:#111;'> 269B </span>","children":null,"spread":false}],"spread":true},{"title":"sac","children":[{"title":"train.py <span style='color:#111;'> 450B </span>","children":null,"spread":false},{"title":"arguments.py <span style='color:#111;'> 3.01KB </span>","children":null,"spread":false},{"title":"utils.py <span style='color:#111;'> 2.77KB </span>","children":null,"spread":false},{"title":"models.py <span style='color:#111;'> 1.70KB </span>","children":null,"spread":false},{"title":"sac_agent.py <span style='color:#111;'> 10.62KB </span>","children":null,"spread":false},{"title":"demo.py <span style='color:#111;'> 1.40KB </span>","children":null,"spread":false},{"title":"README.md <span style='color:#111;'> 268B </span>","children":null,"spread":false}],"spread":true},{"title":"trpo","children":[{"title":"train.py <span style='color:#111;'> 461B </span>","children":null,"spread":false},{"title":"arguments.py <span style='color:#111;'> 1.49KB </span>","children":null,"spread":false},{"title":"utils.py <span style='color:#111;'> 1.98KB </span>","children":null,"spread":false},{"title":"models.py <span style='color:#111;'> 1.34KB </span>","children":null,"spread":false},{"title":"demo.py <span style='color:#111;'> 1.35KB </span>","children":null,"spread":false},{"title":"README.md <span style='color:#111;'> 261B </span>","children":null,"spread":false},{"title":"trpo_agent.py <span style='color:#111;'> 9.08KB </span>","children":null,"spread":false}],"spread":true}],"spread":true},{"title":"setup.py <span style='color:#111;'> 275B </span>","children":null,"spread":false},{"title":"README.md <span style='color:#111;'> 5.96KB </span>","children":null,"spread":false},{"title":".gitignore <span style='color:#111;'> 1.25KB </span>","children":null,"spread":false}],"spread":true}],"spread":true}]