[{"title":"( 96 个子文件 4.39MB ) reinforcement-learning:单智能体和多智能体强化学习算法的实现。 的MATLAB-源码","children":[{"title":"reinforcement-learning-master","children":[{"title":".gitattributes <span style='color:#111;'> 66B </span>","children":null,"spread":false},{"title":"graduate_thesis.pdf <span style='color:#111;'> 2.62MB </span>","children":null,"spread":false},{"title":"README.md <span style='color:#111;'> 759B </span>","children":null,"spread":false},{"title":"SAL","children":[{"title":"03 TD","children":[{"title":"qLearning.m <span style='color:#111;'> 2.91KB </span>","children":null,"spread":false},{"title":"sarsa_iterationCount.mat <span style='color:#111;'> 89.83KB </span>","children":null,"spread":false},{"title":"qLearning_iterationCount.mat <span style='color:#111;'> 88.46KB </span>","children":null,"spread":false},{"title":"sarsa.m <span style='color:#111;'> 3.25KB </span>","children":null,"spread":false},{"title":"sarsa_q.mat <span style='color:#111;'> 26.90KB </span>","children":null,"spread":false},{"title":"qLearning_reward.mat <span style='color:#111;'> 5.16KB </span>","children":null,"spread":false},{"title":"sarsa_reward.mat <span style='color:#111;'> 1.52KB </span>","children":null,"spread":false},{"title":"qLearning_q.mat <span style='color:#111;'> 26.86KB </span>","children":null,"spread":false}],"spread":true},{"title":"01 DP","children":[{"title":"VI_simulationTime.mat <span style='color:#111;'> 192B </span>","children":null,"spread":false},{"title":"VI_Q.svg <span style='color:#111;'> 55.47KB </span>","children":null,"spread":false},{"title":"VI_Q.mat <span style='color:#111;'> 24.90KB </span>","children":null,"spread":false},{"title":"VI_P.mat <span style='color:#111;'> 361B </span>","children":null,"spread":false},{"title":"PI_V.mat <span style='color:#111;'> 6.76KB </span>","children":null,"spread":false},{"title":"VI_V.mat <span style='color:#111;'> 4.77KB </span>","children":null,"spread":false},{"title":"PI_P.mat <span style='color:#111;'> 696B </span>","children":null,"spread":false},{"title":"policy_evaluation.m <span style='color:#111;'> 781B </span>","children":null,"spread":false},{"title":"PI.m <span style='color:#111;'> 2.33KB </span>","children":null,"spread":false},{"title":"VI.m <span style='color:#111;'> 2.40KB </span>","children":null,"spread":false},{"title":"PI_simulationTime.mat <span style='color:#111;'> 192B </span>","children":null,"spread":false},{"title":"PE.m <span style='color:#111;'> 522B </span>","children":null,"spread":false},{"title":"PI_P.svg <span style='color:#111;'> 54.97KB </span>","children":null,"spread":false},{"title":"PE_V.mat <span style='color:#111;'> 4.92KB </span>","children":null,"spread":false},{"title":"VI_P.svg <span style='color:#111;'> 53.07KB </span>","children":null,"spread":false},{"title":"Values.xlsx <span style='color:#111;'> 25.94KB </span>","children":null,"spread":false}],"spread":false},{"title":"05 DQN","children":[{"title":"dqn_rwd.png <span style='color:#111;'> 9.68KB </span>","children":null,"spread":false},{"title":"DQNEstimator.m <span style='color:#111;'> 3.01KB </span>","children":null,"spread":false},{"title":"DQN_iterationCount.mat <span style='color:#111;'> 23.08KB </span>","children":null,"spread":false},{"title":"DQN_reward.mat <span style='color:#111;'> 2.91KB </span>","children":null,"spread":false},{"title":"DQN.m <span style='color:#111;'> 4.06KB </span>","children":null,"spread":false},{"title":"DQN_weights.mat <span style='color:#111;'> 824.80KB </span>","children":null,"spread":false},{"title":"DQN_simulationTime.mat <span style='color:#111;'> 241B </span>","children":null,"spread":false}],"spread":true},{"title":"06 LPG","children":[{"title":"ValueEstimator.m <span style='color:#111;'> 844B </span>","children":null,"spread":false},{"title":"policy_gradient.m <span style='color:#111;'> 3.29KB </span>","children":null,"spread":false},{"title":"value_weights.mat <span style='color:#111;'> 211B </span>","children":null,"spread":false},{"title":"PolicyEstimator.m <span style='color:#111;'> 1.11KB </span>","children":null,"spread":false},{"title":"policy_weights.mat <span style='color:#111;'> 330B </span>","children":null,"spread":false},{"title":"pg_iterationCount.mat <span style='color:#111;'> 100.89KB </span>","children":null,"spread":false},{"title":"pg_reward.mat <span style='color:#111;'> 2.08KB </span>","children":null,"spread":false}],"spread":true},{"title":"Basic Functions","children":[{"title":"sigmoid.m <span style='color:#111;'> 51B </span>","children":null,"spread":false},{"title":"ds2nfu.m <span style='color:#111;'> 2.88KB </span>","children":null,"spread":false},{"title":"make_epsilon_policy.m <span style='color:#111;'> 334B </span>","children":null,"spread":false},{"title":"make_random_policy.m <span style='color:#111;'> 92B </span>","children":null,"spread":false},{"title":"make_greedy_policy.m <span style='color:#111;'> 277B </span>","children":null,"spread":false},{"title":"q_value_or_policy2fig.m <span style='color:#111;'> 3.14KB </span>","children":null,"spread":false}],"spread":true},{"title":"02 MC","children":[{"title":"offpmc_q.mat <span style='color:#111;'> 19.62KB </span>","children":null,"spread":false},{"title":"offpmc_policy.mat <span style='color:#111;'> 1.29KB </span>","children":null,"spread":false},{"title":"onpmc_reward.mat <span style='color:#111;'> 1015B </span>","children":null,"spread":false},{"title":"onpmc_q.mat <span style='color:#111;'> 27.04KB </span>","children":null,"spread":false},{"title":"offpmc_iterationCount.mat <span style='color:#111;'> 106.64KB </span>","children":null,"spread":false},{"title":"onpmc_policy.mat <span style='color:#111;'> 2.43KB </span>","children":null,"spread":false},{"title":"onpmc_returns.mat <span style='color:#111;'> 35.22KB </span>","children":null,"spread":false},{"title":"onpmc.m <span style='color:#111;'> 4.16KB </span>","children":null,"spread":false},{"title":"offpmc.m <span style='color:#111;'> 4.54KB </span>","children":null,"spread":false},{"title":"offpmc_c.mat <span style='color:#111;'> 19.86KB </span>","children":null,"spread":false},{"title":"onpmc_iterationCount.mat <span style='color:#111;'> 77.11KB </span>","children":null,"spread":false},{"title":"offpmc_reward.mat <span style='color:#111;'> 1.84KB </span>","children":null,"spread":false}],"spread":false},{"title":"04 LFA","children":[{"title":"onp_lfa_iterationCount.mat <span style='color:#111;'> 122.68KB </span>","children":null,"spread":false},{"title":"linear_function_approximation.m <span style='color:#111;'> 3.37KB </span>","children":null,"spread":false},{"title":"onp_lfa_weights.mat <span style='color:#111;'> 328B </span>","children":null,"spread":false},{"title":"onp_lfa_reward.mat <span style='color:#111;'> 1.66KB </span>","children":null,"spread":false},{"title":"LFAEstimator.m <span style='color:#111;'> 807B </span>","children":null,"spread":false}],"spread":true},{"title":"Environment","children":[{"title":"SAEnvironment.m <span style='color:#111;'> 7.34KB </span>","children":null,"spread":false},{"title":"clcAngle.m <span style='color:#111;'> 463B </span>","children":null,"spread":false}],"spread":true}],"spread":true},{"title":"MAL","children":[{"title":"03 MAPG","children":[{"title":"mapg_iterationCount.mat <span style='color:#111;'> 3.47KB </span>","children":null,"spread":false},{"title":"ValueEstimator.m <span style='color:#111;'> 844B </span>","children":null,"spread":false},{"title":"mapg_reward.mat <span style='color:#111;'> 3.52KB </span>","children":null,"spread":false},{"title":"value_weights.mat <span style='color:#111;'> 216B </span>","children":null,"spread":false},{"title":"agent2_policy_weights.mat <span style='color:#111;'> 537B </span>","children":null,"spread":false},{"title":"agent1_policy_weights.mat <span style='color:#111;'> 536B </span>","children":null,"spread":false},{"title":"PolicyEstimator.m <span style='color:#111;'> 1.10KB </span>","children":null,"spread":false},{"title":"mapg.m <span style='color:#111;'> 3.31KB </span>","children":null,"spread":false},{"title":"MAEnvironment.m <span style='color:#111;'> 7.59KB </span>","children":null,"spread":false}],"spread":true},{"title":"Basic Functions","children":[{"title":"sigmoid.m <span style='color:#111;'> 51B </span>","children":null,"spread":false},{"title":"ds2nfu.m <span style='color:#111;'> 2.88KB </span>","children":null,"spread":false},{"title":"make_epsilon_policy.m <span style='color:#111;'> 334B </span>","children":null,"spread":false},{"title":"make_random_policy.m <span style='color:#111;'> 92B </span>","children":null,"spread":false},{"title":"make_greedy_policy.m <span style='color:#111;'> 277B </span>","children":null,"spread":false},{"title":"clcAngle.m <span style='color:#111;'> 463B </span>","children":null,"spread":false},{"title":"compare_fig.m <span style='color:#111;'> 1.72KB </span>","children":null,"spread":false},{"title":"q_value_or_policy2fig.m <span style='color:#111;'> 3.14KB </span>","children":null,"spread":false}],"spread":true},{"title":"01 MA Centralized-Q","children":[{"title":"maq_reward.mat <span style='color:#111;'> 5.85KB </span>","children":null,"spread":false},{"title":"macq.m <span style='color:#111;'> 3.04KB </span>","children":null,"spread":false},{"title":"maq_iterationCount.mat <span style='color:#111;'> 5.85KB </span>","children":null,"spread":false},{"title":"weights.mat <span style='color:#111;'> 1.45KB </span>","children":null,"spread":false},{"title":"LFAEstimator.m <span style='color:#111;'> 807B </span>","children":null,"spread":false},{"title":"MAEnvironment.m <span style='color:#111;'> 7.42KB </span>","children":null,"spread":false}],"spread":true},{"title":"02 MA Hysteretic-Q","children":[{"title":"mahq.m <span style='color:#111;'> 3.63KB </span>","children":null,"spread":false},{"title":"a2_weights.mat <span style='color:#111;'> 535B </span>","children":null,"spread":false},{"title":"a1_weights.mat <span style='color:#111;'> 536B </span>","children":null,"spread":false},{"title":"maq_reward.mat <span style='color:#111;'> 5.63KB </span>","children":null,"spread":false},{"title":"maq_iterationCount.mat <span style='color:#111;'> 5.60KB </span>","children":null,"spread":false},{"title":"LFAEstimator.m <span style='color:#111;'> 1003B </span>","children":null,"spread":false},{"title":"MAEnvironment.m <span style='color:#111;'> 7.59KB </span>","children":null,"spread":false}],"spread":true}],"spread":true}],"spread":true}],"spread":true}]