[{"title":"( 87 个子文件 932KB ) 强化学习算法:一些流行的RL算法的实现","children":[{"title":"Algorithms-for-Reinforcement-Learning-master","children":[{"title":".gitignore <span style='color:#111;'> 13B </span>","children":null,"spread":false},{"title":"README.md <span style='color:#111;'> 92B </span>","children":null,"spread":false},{"title":".pre-commit-config.yaml <span style='color:#111;'> 260B </span>","children":null,"spread":false},{"title":"references","children":[{"title":"README.md <span style='color:#111;'> 936B </span>","children":null,"spread":false}],"spread":true},{"title":"src","children":[{"title":"RL","children":[{"title":"approximation","children":[{"title":"eligibility_traces","children":[{"title":"README.md <span style='color:#111;'> 2.16KB </span>","children":null,"spread":false},{"title":"sarsa_lambda.py <span style='color:#111;'> 284B </span>","children":null,"spread":false},{"title":"__init__.py <span style='color:#111;'> 0B </span>","children":null,"spread":false}],"spread":true},{"title":"value_based_method","children":[{"title":"README.md <span style='color:#111;'> 2.07KB </span>","children":null,"spread":false},{"title":"dqn.py <span style='color:#111;'> 11.73KB </span>","children":null,"spread":false},{"title":"__pycache__","children":[{"title":"__init__.cpython-36.pyc <span style='color:#111;'> 195B </span>","children":null,"spread":false},{"title":"__init__.cpython-37.pyc <span style='color:#111;'> 179B </span>","children":null,"spread":false},{"title":"ddqn.cpython-37.pyc <span style='color:#111;'> 1.56KB </span>","children":null,"spread":false},{"title":"dqn.cpython-37.pyc <span style='color:#111;'> 8.60KB </span>","children":null,"spread":false},{"title":"dqn.cpython-36.pyc <span style='color:#111;'> 8.77KB </span>","children":null,"spread":false},{"title":"ddqn.cpython-36.pyc <span style='color:#111;'> 1.58KB </span>","children":null,"spread":false}],"spread":true},{"title":"__init__.py <span style='color:#111;'> 0B </span>","children":null,"spread":false},{"title":"ddqn.py <span style='color:#111;'> 1.60KB </span>","children":null,"spread":false}],"spread":true},{"title":"__pycache__","children":[{"title":"__init__.cpython-36.pyc <span style='color:#111;'> 176B </span>","children":null,"spread":false},{"title":"__init__.cpython-37.pyc <span style='color:#111;'> 160B </span>","children":null,"spread":false}],"spread":true},{"title":"__init__.py <span style='color:#111;'> 0B </span>","children":null,"spread":false},{"title":"policy_gradient","children":[{"title":"README.md <span style='color:#111;'> 1.20KB </span>","children":null,"spread":false},{"title":"reinforce.py <span style='color:#111;'> 7.88KB </span>","children":null,"spread":false},{"title":"a2c.py <span style='color:#111;'> 8.53KB </span>","children":null,"spread":false},{"title":"__pycache__","children":[{"title":"a2c.cpython-37.pyc <span style='color:#111;'> 6.56KB </span>","children":null,"spread":false},{"title":"__init__.cpython-36.pyc <span style='color:#111;'> 192B </span>","children":null,"spread":false},{"title":"__init__.cpython-37.pyc <span style='color:#111;'> 176B </span>","children":null,"spread":false},{"title":"a2c.cpython-36.pyc <span style='color:#111;'> 6.66KB </span>","children":null,"spread":false}],"spread":false},{"title":"__init__.py <span style='color:#111;'> 0B </span>","children":null,"spread":false}],"spread":true}],"spread":true},{"title":"tabular","children":[{"title":"README.md <span style='color:#111;'> 1.04KB </span>","children":null,"spread":false},{"title":"TD.py <span style='color:#111;'> 3.95KB </span>","children":null,"spread":false},{"title":"MC.py <span style='color:#111;'> 1.49KB </span>","children":null,"spread":false},{"title":"utils.py <span style='color:#111;'> 7.88KB </span>","children":null,"spread":false},{"title":"__pycache__","children":[{"title":"__init__.cpython-37.pyc <span style='color:#111;'> 154B </span>","children":null,"spread":false},{"title":"utils.cpython-37.pyc <span style='color:#111;'> 7.64KB </span>","children":null,"spread":false}],"spread":true},{"title":"__init__.py <span style='color:#111;'> 0B </span>","children":null,"spread":false},{"title":"nstep.py <span style='color:#111;'> 4.42KB </span>","children":null,"spread":false}],"spread":true},{"title":"utils.py <span style='color:#111;'> 118B </span>","children":null,"spread":false},{"title":"__pycache__","children":[{"title":"__init__.cpython-36.pyc <span style='color:#111;'> 162B </span>","children":null,"spread":false},{"title":"__init__.cpython-37.pyc <span style='color:#111;'> 146B </span>","children":null,"spread":false},{"title":"utils.cpython-37.pyc <span style='color:#111;'> 352B </span>","children":null,"spread":false},{"title":"utils.cpython-36.pyc <span style='color:#111;'> 368B </span>","children":null,"spread":false}],"spread":true},{"title":"__init__.py <span style='color:#111;'> 0B </span>","children":null,"spread":false},{"title":"envs","children":[{"title":"gridworld.py <span style='color:#111;'> 11.42KB </span>","children":null,"spread":false},{"title":"frozen_lake.py <span style='color:#111;'> 3.19KB </span>","children":null,"spread":false},{"title":"discretization.py <span style='color:#111;'> 3.72KB </span>","children":null,"spread":false},{"title":"test_env.py <span style='color:#111;'> 1.91KB </span>","children":null,"spread":false},{"title":"cliffwalk.py <span style='color:#111;'> 883B </span>","children":null,"spread":false},{"title":"__pycache__","children":[{"title":"test_env.cpython-37.pyc <span style='color:#111;'> 2.24KB </span>","children":null,"spread":false},{"title":"gridworld.cpython-37.pyc <span style='color:#111;'> 8.78KB </span>","children":null,"spread":false},{"title":"__init__.cpython-37.pyc <span style='color:#111;'> 151B </span>","children":null,"spread":false},{"title":"finite_env.cpython-37.pyc <span style='color:#111;'> 3.08KB </span>","children":null,"spread":false},{"title":"cliffwalk.cpython-37.pyc <span style='color:#111;'> 922B </span>","children":null,"spread":false}],"spread":false},{"title":"__init__.py <span style='color:#111;'> 0B </span>","children":null,"spread":false},{"title":"finite_env.py <span style='color:#111;'> 2.59KB </span>","children":null,"spread":false},{"title":".DS_Store <span style='color:#111;'> 6.00KB </span>","children":null,"spread":false},{"title":"coldstart.py <span style='color:#111;'> 3.35KB </span>","children":null,"spread":false}],"spread":true}],"spread":true}],"spread":true},{"title":"figures","children":[{"title":"eligibility_trace_td_lambda.png <span style='color:#111;'> 3.93KB </span>","children":null,"spread":false},{"title":"reinforce_baseline_pseudo_code.png <span style='color:#111;'> 63.72KB </span>","children":null,"spread":false},{"title":"td_lambda_update.png <span style='color:#111;'> 1.94KB </span>","children":null,"spread":false},{"title":"true_online_sarsa_pseudo_code.png <span style='color:#111;'> 75.20KB </span>","children":null,"spread":false},{"title":"reinforce_pseudo_code.png <span style='color:#111;'> 49.24KB </span>","children":null,"spread":false},{"title":"generalized_pg_theorem.png <span style='color:#111;'> 5.77KB </span>","children":null,"spread":false},{"title":"policy_gradient_theorem.png <span style='color:#111;'> 5.10KB </span>","children":null,"spread":false},{"title":"reinforce_update.png <span style='color:#111;'> 4.82KB </span>","children":null,"spread":false},{"title":"td_error.png <span style='color:#111;'> 3.55KB </span>","children":null,"spread":false},{"title":"dqn_pseudo_code.png <span style='color:#111;'> 71.07KB </span>","children":null,"spread":false},{"title":"ddqn_update.png <span style='color:#111;'> 5.74KB </span>","children":null,"spread":false},{"title":"monte_carlo_es_pseudo_code.png <span style='color:#111;'> 72.58KB </span>","children":null,"spread":false},{"title":"actor_critic_pseudo_code.png <span style='color:#111;'> 67.17KB </span>","children":null,"spread":false},{"title":"sarsa_pseudo_code.png <span style='color:#111;'> 55.75KB </span>","children":null,"spread":false},{"title":"update_dqn.png <span style='color:#111;'> 8.48KB </span>","children":null,"spread":false},{"title":"td_lambda_pseudo_code.png <span style='color:#111;'> 59.87KB </span>","children":null,"spread":false},{"title":"sarsa_lambda_pseudo_code.png <span style='color:#111;'> 87.30KB </span>","children":null,"spread":false},{"title":"qlearning_pseudo_code.png <span style='color:#111;'> 49.70KB </span>","children":null,"spread":false},{"title":"true_online_td_lambda_pseudo_code.png <span style='color:#111;'> 74.52KB </span>","children":null,"spread":false}],"spread":false},{"title":"requirements.txt <span style='color:#111;'> 37B </span>","children":null,"spread":false},{"title":".idea","children":[{"title":".gitignore <span style='color:#111;'> 47B </span>","children":null,"spread":false},{"title":"vcs.xml <span style='color:#111;'> 181B </span>","children":null,"spread":false},{"title":"misc.xml <span style='color:#111;'> 195B </span>","children":null,"spread":false},{"title":"modules.xml <span style='color:#111;'> 257B </span>","children":null,"spread":false},{"title":"RL.iml <span style='color:#111;'> 571B </span>","children":null,"spread":false},{"title":"inspectionProfiles","children":[{"title":"profiles_settings.xml <span style='color:#111;'> 175B </span>","children":null,"spread":false}],"spread":true}],"spread":true},{"title":"notebooks","children":[{"title":"REINFORCE.ipynb <span style='color:#111;'> 199.52KB </span>","children":null,"spread":false},{"title":"Temporal_Difference_Learning.ipynb <span style='color:#111;'> 10.25KB </span>","children":null,"spread":false}],"spread":true},{"title":"setup.py <span style='color:#111;'> 696B </span>","children":null,"spread":false},{"title":"data","children":[{"title":"frozen_lake_deterministic_transition.npy <span style='color:#111;'> 8.13KB </span>","children":null,"spread":false},{"title":"frozen_lake_stochastic_transition.npy <span style='color:#111;'> 8.13KB </span>","children":null,"spread":false}],"spread":true}],"spread":false}],"spread":true}]