phi3/src_train/train.py [75:101]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    train_conf = SFTConfig(
        bf16=True,
        do_eval=False,
        learning_rate=args.learning_rate,
        log_level="info",
        logging_steps=args.logging_steps,
        logging_strategy="steps",
        lr_scheduler_type=args.lr_scheduler_type,
        num_train_epochs=args.epochs,
        max_steps=-1,
        output_dir=args.output_dir,
        overwrite_output_dir=True,
        per_device_train_batch_size=args.train_batch_size,
        per_device_eval_batch_size=args.eval_batch_size,
        remove_unused_columns=True,
        save_steps=args.save_steps,
        save_total_limit=1,
        seed=args.seed,
        gradient_checkpointing=True,
        gradient_checkpointing_kwargs={"use_reentrant": False},
        gradient_accumulation_steps=args.grad_accum_steps,
        warmup_ratio=args.warmup_ratio,
        max_seq_length=args.max_seq_length,
        packing=True,
        report_to="wandb" if use_wandb else "none",
        run_name=args.wandb_run_name if use_wandb else None    
    )    
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


phi3/src_train/train_mlflow.py [89:115]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    train_conf = SFTConfig(
        bf16=True,
        do_eval=False,
        learning_rate=args.learning_rate,
        log_level="info",
        logging_steps=args.logging_steps,
        logging_strategy="steps",
        lr_scheduler_type=args.lr_scheduler_type,
        num_train_epochs=args.epochs,
        max_steps=-1,
        output_dir=args.output_dir,
        overwrite_output_dir=True,
        per_device_train_batch_size=args.train_batch_size,
        per_device_eval_batch_size=args.eval_batch_size,
        remove_unused_columns=True,
        save_steps=args.save_steps,
        save_total_limit=1,
        seed=args.seed,
        gradient_checkpointing=True,
        gradient_checkpointing_kwargs={"use_reentrant": False},
        gradient_accumulation_steps=args.grad_accum_steps,
        warmup_ratio=args.warmup_ratio,
        max_seq_length=args.max_seq_length,
        packing=True,
        report_to="wandb" if use_wandb else "none",
        run_name=args.wandb_run_name if use_wandb else None    
    )    
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -