Path Lines of Code MANIFEST.in 6 pyproject.toml 25 scripts/add_copyrights.py 41 scripts/generate_tiny_models.py 199 scripts/generate_toolcall_dataset.py 113 scripts/generate_zen_dataset.py 614 scripts/log_example_reports.py 120 scripts/log_reports.py 122 setup.cfg 87 setup.py 2 trl/__init__.py 199 trl/accelerate_configs/fsdp1.yaml 28 trl/accelerate_configs/fsdp2.yaml 24 trl/accelerate_configs/multi_gpu.yaml 16 trl/accelerate_configs/single_gpu.yaml 16 trl/accelerate_configs/zero1.yaml 20 trl/accelerate_configs/zero2.yaml 21 trl/accelerate_configs/zero3.yaml 22 trl/cli.py 72 trl/core.py 105 trl/data_utils.py 325 trl/environment/__init__.py 10 trl/environment/base_environment.py 299 trl/extras/__init__.py 10 trl/extras/best_of_n_sampler.py 67 trl/extras/dataset_formatting.py 60 trl/extras/profiling.py 27 trl/extras/vllm_client.py 148 trl/import_utils.py 91 trl/mergekit_utils.py 180 trl/models/__init__.py 54 trl/models/activation_offloading.py 236 trl/models/auxiliary_modules.py 64 trl/models/modeling_base.py 481 trl/models/modeling_sd_base.py 490 trl/models/modeling_value_head.py 237 trl/models/sd_utils.py 74 trl/models/utils.py 208 trl/rewards/__init__.py 10 trl/rewards/format_rewards.py 7 trl/scripts/__init__.py 10 trl/scripts/dpo.py 82 trl/scripts/env.py 57 trl/scripts/grpo.py 82 trl/scripts/kto.py 51 trl/scripts/sft.py 63 trl/scripts/utils.py 138 trl/scripts/vllm_serve.py 316 trl/trainer/__init__.py 145 trl/trainer/alignprop_config.py 96 trl/trainer/alignprop_trainer.py 291 trl/trainer/bco_config.py 128 trl/trainer/bco_trainer.py 1073 trl/trainer/callbacks.py 363 trl/trainer/cpo_config.py 111 trl/trainer/cpo_trainer.py 741 trl/trainer/ddpo_config.py 187 trl/trainer/ddpo_trainer.py 433 trl/trainer/dpo_config.py 264 trl/trainer/dpo_trainer.py 1257 trl/trainer/gkd_config.py 61 trl/trainer/gkd_trainer.py 239 trl/trainer/grpo_config.py 367 trl/trainer/grpo_trainer.py 971 trl/trainer/iterative_sft_config.py 48 trl/trainer/iterative_sft_trainer.py 330 trl/trainer/judges.py 201 trl/trainer/kto_config.py 147 trl/trainer/kto_trainer.py 1235 trl/trainer/model_config.py 100 trl/trainer/nash_md_config.py 17 trl/trainer/nash_md_trainer.py 360 trl/trainer/online_dpo_config.py 111 trl/trainer/online_dpo_trainer.py 544 trl/trainer/orpo_config.py 97 trl/trainer/orpo_trainer.py 730 trl/trainer/ppo_config.py 72 trl/trainer/ppo_trainer.py 666 trl/trainer/prm_config.py 66 trl/trainer/prm_trainer.py 217 trl/trainer/reward_config.py 60 trl/trainer/reward_trainer.py 293 trl/trainer/rloo_config.py 58 trl/trainer/rloo_trainer.py 549 trl/trainer/sft_config.py 159 trl/trainer/sft_trainer.py 590 trl/trainer/utils.py 1120 trl/trainer/xpo_config.py 16 trl/trainer/xpo_trainer.py 404