in src/open-r1-multimodal/src/open_r1/utils/callbacks.py [0:0]
def on_save(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs):
if state.is_world_process_zero:
global_step = state.global_step
# WARNING: if you use dataclasses.replace(args, ...) the accelerator dist state will be broken, so I do this workaround
# Also if you instantiate a new SFTConfig, the accelerator dist state will be broken
dummy_config = DummyConfig(
hub_model_id=args.hub_model_id,
hub_model_revision=f"{args.hub_model_revision}-step-{global_step:09d}",
output_dir=f"{args.output_dir}/checkpoint-{global_step}",
system_prompt=args.system_prompt,
)
future = push_to_hub_revision(
dummy_config, extra_ignore_patterns=["*.pt"]
) # don't push the optimizer states
if is_slurm_available():
dummy_config.benchmarks = args.benchmarks
def run_benchmark_callback(_):
print(f"Checkpoint {global_step} pushed to hub.")
run_benchmark_jobs(dummy_config, self.model_config)
future.add_done_callback(run_benchmark_callback)