Path Lines of Code LICENSE.md 169 README.md 26 notebooks/unit1/requirements-unit1.txt 4 notebooks/unit2/requirements-unit2.txt 10 notebooks/unit4/requirements-unit4.txt 5 notebooks/unit6/requirements-unit6.txt 4 units/en/communication/certification.mdx 17 units/en/communication/conclusion.mdx 13 units/en/live1/live1.mdx 5 units/en/unit0/discord101.mdx 22 units/en/unit0/introduction.mdx 83 units/en/unit0/setup.mdx 18 units/en/unit1/additional-readings.mdx 9 units/en/unit1/conclusion.mdx 10 units/en/unit1/deep-rl.mdx 14 units/en/unit1/exp-exp-tradeoff.mdx 22 units/en/unit1/glossary.mdx 42 units/en/unit1/hands-on.mdx 456 units/en/unit1/introduction.mdx 12 units/en/unit1/quiz.mdx 123 units/en/unit1/rl-framework.mdx 88 units/en/unit1/summary.mdx 11 units/en/unit1/tasks.mdx 16 units/en/unit1/two-methods.mdx 55 units/en/unit1/what-is-rl.mdx 20 units/en/unit2/additional-readings.mdx 9 units/en/unit2/bellman-equation.mdx 35 units/en/unit2/conclusion.mdx 9 units/en/unit2/glossary.mdx 29 units/en/unit2/hands-on.mdx 828 units/en/unit2/introduction.mdx 14 units/en/unit2/mc-vs-td.mdx 69 units/en/unit2/mid-way-quiz.mdx 74 units/en/unit2/mid-way-recap.mdx 10 units/en/unit2/q-learning-example.mdx 44 units/en/unit2/q-learning-recap.mdx 13 units/en/unit2/q-learning.mdx 91 units/en/unit2/quiz2.mdx 69 units/en/unit2/two-types-value-based-methods.mdx 50 units/en/unit2/what-is-rl.mdx 13 units/en/unit3/additional-readings.mdx 7 units/en/unit3/conclusion.mdx 8 units/en/unit3/deep-q-algorithm.mdx 59 units/en/unit3/deep-q-network.mdx 24 units/en/unit3/from-q-to-dqn.mdx 19 units/en/unit3/glossary.mdx 27 units/en/unit3/hands-on.mdx 212 units/en/unit3/introduction.mdx 9 units/en/unit3/quiz.mdx 69 units/en/unit4/additional-readings.mdx 11 units/en/unit4/advantages-disadvantages.mdx 42 units/en/unit4/conclusion.mdx 10 units/en/unit4/glossary.mdx 15 units/en/unit4/hands-on.mdx 763 units/en/unit4/introduction.mdx 14 units/en/unit4/pg-theorem.mdx 38 units/en/unit4/policy-gradient.mdx 71 units/en/unit4/quiz.mdx 66 units/en/unit4/what-are-policy-based-methods.mdx 27 units/en/unit5/bonus.mdx 10 units/en/unit5/conclusion.mdx 12 units/en/unit5/curiosity.mdx 27 units/en/unit5/hands-on.mdx 266 units/en/unit5/how-mlagents-works.mdx 45 units/en/unit5/introduction.mdx 19 units/en/unit5/pyramids.mdx 20 units/en/unit5/quiz.mdx 115 units/en/unit5/snowball-target.mdx 32 units/en/unit6/additional-readings.mdx 10 units/en/unit6/advantage-actor-critic.mdx 39 units/en/unit6/conclusion.mdx 6 units/en/unit6/hands-on.mdx 265 units/en/unit6/introduction.mdx 12 units/en/unit6/quiz.mdx 98 units/en/unit6/variance-problem.mdx 19 units/en/unit7/additional-readings.mdx 13 units/en/unit7/conclusion.mdx 6 units/en/unit7/hands-on.mdx 204 units/en/unit7/introduction-to-marl.mdx 36 units/en/unit7/introduction.mdx 23 units/en/unit7/multi-agent-setting.mdx 38 units/en/unit7/quiz.mdx 119 units/en/unit7/self-play.mdx 77 units/en/unit8/additional-readings.mdx 14 units/en/unit8/clipped-surrogate-objective.mdx 41 units/en/unit8/conclusion-sf.mdx 7 units/en/unit8/conclusion.mdx 5 units/en/unit8/hands-on-cleanrl.mdx 867 units/en/unit8/hands-on-sf.mdx 298 units/en/unit8/introduction-sf.mdx 7 units/en/unit8/introduction.mdx 15 units/en/unit8/intuition-behind-ppo.mdx 11 units/en/unit8/visualize.mdx 43 units/en/unitbonus1/conclusion.mdx 6 units/en/unitbonus1/how-huggy-works.mdx 36 units/en/unitbonus1/introduction.mdx 4 units/en/unitbonus1/play.mdx 11 units/en/unitbonus1/train.mdx 218 units/en/unitbonus2/hands-on.mdx 8 units/en/unitbonus2/introduction.mdx 4 units/en/unitbonus2/optuna.mdx 7 units/en/unitbonus3/curriculum-learning.mdx 34 units/en/unitbonus3/decision-transformers.mdx 17 units/en/unitbonus3/envs-to-try.mdx 52 units/en/unitbonus3/generalisation.mdx 6 units/en/unitbonus3/godotrl.mdx 165 units/en/unitbonus3/introduction.mdx 6 units/en/unitbonus3/language-models.mdx 28 units/en/unitbonus3/learning-agents.mdx 21 units/en/unitbonus3/model-based.mdx 18 units/en/unitbonus3/offline-online.mdx 22 units/en/unitbonus3/rl-documentation.mdx 37 units/en/unitbonus3/rlhf.mdx 35 units/en/unitbonus3/student-works.mdx 37 units/en/unitbonus5/conclusion.mdx 3 units/en/unitbonus5/customize-the-environment.mdx 14 units/en/unitbonus5/getting-started.mdx 199 units/en/unitbonus5/introduction.mdx 14 units/en/unitbonus5/the-environment.mdx 6 units/en/unitbonus5/train-our-robot.mdx 31