[{"title":"( 42 个子文件 391KB ) DeepReinforcementLearning:深度RL实施。 在pytorch中实现的DQN,SAC,DDPG,TD3,PPO和VPG。 经过测试的环境:LunarLander-v2和Pendulum-v0-源码","children":[{"title":"DeepReinforcementLearning-main","children":[{"title":"td3.py <span style='color:#111;'> 4.28KB </span>","children":null,"spread":false},{"title":".ipynb_checkpoints","children":[{"title":"test_and_intial_Experimentation-checkpoint.ipynb <span style='color:#111;'> 72B </span>","children":null,"spread":false},{"title":"Policy Gradient Methods-checkpoint.ipynb <span style='color:#111;'> 13.08KB </span>","children":null,"spread":false}],"spread":true},{"title":"RLUtils","children":[{"title":"__init__.py <span style='color:#111;'> 21B </span>","children":null,"spread":false},{"title":"utils.py <span style='color:#111;'> 3.37KB </span>","children":null,"spread":false},{"title":"__pycache__","children":[{"title":"utils.cpython-37.pyc <span style='color:#111;'> 3.93KB </span>","children":null,"spread":false},{"title":"__init__.cpython-37.pyc <span style='color:#111;'> 179B </span>","children":null,"spread":false}],"spread":true}],"spread":true},{"title":"SoftActorCritic.py <span style='color:#111;'> 3.47KB </span>","children":null,"spread":false},{"title":"Policy Gradient Methods.ipynb <span style='color:#111;'> 13.07KB </span>","children":null,"spread":false},{"title":"Readme.md <span style='color:#111;'> 2.43KB </span>","children":null,"spread":false},{"title":".idea","children":[{"title":".gitignore <span style='color:#111;'> 47B </span>","children":null,"spread":false},{"title":"misc.xml <span style='color:#111;'> 292B </span>","children":null,"spread":false},{"title":"vcs.xml <span style='color:#111;'> 180B </span>","children":null,"spread":false},{"title":"inspectionProfiles","children":[{"title":"Project_Default.xml <span style='color:#111;'> 659B </span>","children":null,"spread":false},{"title":"profiles_settings.xml <span style='color:#111;'> 174B </span>","children":null,"spread":false}],"spread":true},{"title":"modules.xml <span style='color:#111;'> 294B </span>","children":null,"spread":false},{"title":"ReinforcementLearning.iml <span style='color:#111;'> 317B </span>","children":null,"spread":false}],"spread":true},{"title":"ppo_clip.py <span style='color:#111;'> 4.45KB </span>","children":null,"spread":false},{"title":"ddpg.py <span style='color:#111;'> 9.33KB </span>","children":null,"spread":false},{"title":"agents","children":[{"title":"__init__.py <span style='color:#111;'> 57B </span>","children":null,"spread":false},{"title":"agent.py <span style='color:#111;'> 125B </span>","children":null,"spread":false},{"title":"__pycache__","children":[{"title":"__init__.cpython-37.pyc <span style='color:#111;'> 225B </span>","children":null,"spread":false},{"title":"agent.cpython-37.pyc <span style='color:#111;'> 560B </span>","children":null,"spread":false}],"spread":true},{"title":"ActorCriticAgents","children":[{"title":"__init__.py <span style='color:#111;'> 63B </span>","children":null,"spread":false},{"title":"PPO_clip_agent.py <span style='color:#111;'> 11.10KB </span>","children":null,"spread":false},{"title":"td3_agent.py <span style='color:#111;'> 6.62KB </span>","children":null,"spread":false},{"title":"soft_Actor_critic_Agent.py <span style='color:#111;'> 7.01KB </span>","children":null,"spread":false},{"title":"__pycache__","children":[{"title":"soft_Actor_critic_Agent.cpython-37.pyc <span style='color:#111;'> 5.97KB </span>","children":null,"spread":false},{"title":"td3_agent.cpython-37.pyc <span style='color:#111;'> 5.67KB </span>","children":null,"spread":false},{"title":"PPO_clip_agent.cpython-37.pyc <span style='color:#111;'> 8.49KB </span>","children":null,"spread":false},{"title":"__init__.cpython-37.pyc <span style='color:#111;'> 235B </span>","children":null,"spread":false}],"spread":false}],"spread":false},{"title":"MLPAgent.py <span style='color:#111;'> 0B </span>","children":null,"spread":false}],"spread":true},{"title":"figures","children":[{"title":"PPO_MountainCarContinuous-v0_rewards.png <span style='color:#111;'> 22.09KB </span>","children":null,"spread":false},{"title":"DQN_Lunar_lander_losses.png <span style='color:#111;'> 38.10KB </span>","children":null,"spread":false},{"title":"VPG_LunarLander-v2_rewards.png <span style='color:#111;'> 37.66KB </span>","children":null,"spread":false},{"title":"SAC_Pendulum-v0_rewards.png <span style='color:#111;'> 50.90KB </span>","children":null,"spread":false},{"title":"DQN_Lunar_lander_rewards.png <span style='color:#111;'> 48.31KB </span>","children":null,"spread":false},{"title":"TD3_Pendulum_rewards.png <span style='color:#111;'> 61.74KB </span>","children":null,"spread":false},{"title":"DDPG_Pendulum-v0_rewards.png <span style='color:#111;'> 42.82KB </span>","children":null,"spread":false},{"title":"PPO_Pendulum-v0_rewards.png <span style='color:#111;'> 57.13KB </span>","children":null,"spread":false}],"spread":true},{"title":"vanilla_policy_gradient.py <span style='color:#111;'> 8.05KB </span>","children":null,"spread":false},{"title":"DQN.py <span style='color:#111;'> 19.12KB </span>","children":null,"spread":false}],"spread":false}],"spread":true}]