Path Lines of Code CODE_OF_CONDUCT.md 97 CONTRIBUTING.md 527 README.md 143 examples/README.md 2 examples/accelerate_configs/deepspeed_zero1.yaml 20 examples/accelerate_configs/deepspeed_zero2.yaml 21 examples/accelerate_configs/deepspeed_zero3.yaml 22 examples/accelerate_configs/fsdp1.yaml 28 examples/accelerate_configs/fsdp2.yaml 24 examples/accelerate_configs/multi_gpu.yaml 16 examples/accelerate_configs/single_gpu.yaml 16 examples/cli_configs/example_config.yaml 10 examples/datasets/hh-rlhf-helpful-base.py 54 examples/datasets/lm-human-preferences-descriptiveness.py 53 examples/datasets/lm-human-preferences-sentiment.py 50 examples/datasets/math_shepherd.py 82 examples/datasets/prm800k.py 84 examples/datasets/rlaif-v.py 44 examples/datasets/tldr.py 44 examples/datasets/tldr_preference.py 49 examples/datasets/ultrafeedback-prompt.py 44 examples/datasets/ultrafeedback.py 82 examples/notebooks/README.md 5 examples/notebooks/best_of_n.ipynb 662 examples/notebooks/gpt2-sentiment.ipynb 861 examples/research_projects/README.md 5 examples/research_projects/layer_skip/README.md 12 examples/research_projects/layer_skip/scripts/benchmark_layer_skip.py 51 examples/research_projects/layer_skip/scripts/config.py 10 examples/research_projects/layer_skip/scripts/custom_trainer.py 26 examples/research_projects/layer_skip/scripts/layer_skip_sft.py 53 examples/research_projects/stack_llama/scripts/README.md 15 examples/research_projects/stack_llama/scripts/merge_peft_adapter.py 31 examples/research_projects/stack_llama/scripts/reward_modeling.py 255 examples/research_projects/stack_llama/scripts/rl_training.py 179 examples/research_projects/stack_llama/scripts/supervised_finetuning.py 159 examples/research_projects/stack_llama_2/scripts/README.md 57 examples/research_projects/stack_llama_2/scripts/dpo_llama2.py 179 examples/research_projects/stack_llama_2/scripts/requirements.txt 7 examples/research_projects/stack_llama_2/scripts/sft_llama2.py 154 examples/research_projects/toxicity/README.md 5 examples/research_projects/toxicity/scripts/evaluate-toxicity.py 101 examples/research_projects/toxicity/scripts/gpt-j-6b-toxicity.py 121 examples/scripts/alignprop.py 90 examples/scripts/bco.py 69 examples/scripts/cpo.py 30 examples/scripts/ddpo.py 150 examples/scripts/dpo.py 1 examples/scripts/dpo_online.py 91 examples/scripts/dpo_vlm.py 75 examples/scripts/evals/judge_tldr.py 37 examples/scripts/gkd.py 65 examples/scripts/kto.py 40 examples/scripts/nash_md.py 82 examples/scripts/orpo.py 30 examples/scripts/ppo/ppo.py 96 examples/scripts/ppo/ppo_tldr.py 97 examples/scripts/prm.py 60 examples/scripts/reward_modeling.py 62 examples/scripts/rloo/rloo.py 67 examples/scripts/rloo/rloo_tldr.py 66 examples/scripts/sft.py 1 examples/scripts/sft_gemma3.py 29 examples/scripts/sft_video_llm.py 141 examples/scripts/sft_vlm.py 64 examples/scripts/sft_vlm_gemma3.py 131 examples/scripts/sft_vlm_smol_vlm.py 74 examples/scripts/xpo.py 82 requirements.txt 3 trl/templates/lm_model_card.md 40