recipes/zephyr-141b-A35b/orpo/config_full.yaml (34 lines of code) (raw):
# Model arguments
model_name_or_path: mistral-community/Mixtral-8x22B-v0.1
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
chat_template: "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}"
dataset_mixer:
argilla/distilabel-capybara-dpo-7k-binarized: 1.0
dataset_splits:
- train
preprocessing_num_workers: 8
# ORPOTrainer arguments
bf16: true
beta: 0.05
gradient_accumulation_steps: 1
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: true
hub_model_id: zephyr-orpo-141b-A35b
learning_rate: 5.0e-6
log_level: info
logging_steps: 10
lr_scheduler_type: inverse_sqrt
max_length: 2048
max_prompt_length: 1792
num_train_epochs: 3
optim: adamw_bnb_8bit
output_dir: data/zephyr-orpo-141b-A35b
per_device_train_batch_size: 1
push_to_hub: true
report_to:
- tensorboard
- wandb
save_strategy: "no"
seed: 42
warmup_steps: 100