sat/configs/tora/train_dense.yaml (63 lines of code) (raw):

args: checkpoint_activations: True model_parallel_size: 1 mode: finetune load: "ckpts/CogVideoX-5b-sat" no_load_rng: True train_iters: 15000 eval_iters: 1 eval_interval: 99999 eval_batch_size: 1 save: outputs save_interval: 2000 log_interval: 1 train_data: ["sat/training_examples"] valid_data: ["sat/training_examples"] split: 1,0,0 num_workers: 8 force_train: True only_log_video_latents: True vis_traj_features: False sample_flow: False seed: 1234 data: target: data_video.SFTDataset params: video_size: [480, 720] fps: 8 max_num_frames: 49 skip_frms_num: 3. deepspeed: train_micro_batch_size_per_gpu: 1 gradient_accumulation_steps: 1 steps_per_print: 50 gradient_clipping: 0.1 zero_optimization: stage: 2 cpu_offload: false contiguous_gradients: false overlap_comm: true reduce_scatter: true reduce_bucket_size: 1000000000 allgather_bucket_size: 1000000000 load_from_fp32_weights: false zero_allow_untested_optimizer: true bf16: enabled: True fp16: enabled: False loss_scale: 0 loss_scale_window: 400 hysteresis: 2 min_loss_scale: 1 optimizer: type: sat.ops.FusedEmaAdam params: lr: 0.0001 betas: [0.9, 0.95] eps: 1e-8 weight_decay: 1e-4 activation_checkpointing: partition_activations: false contiguous_memory_optimization: false wall_clock_breakdown: false