def add_sd_args_to

def add_sd_args_to_parser()

in assets/training/finetune_acft_image/src/finetune/finetune.py [0:0]
241 lines of code
2 McCabe index (conditional complexity)

def add_sd_args_to_parser(parser):
    """Add Stable Diffusion related args to parser."""
    # # Data inputs
    parser.add_argument(
        "--class_data_dir",
        type=str,
        default="class_data_dir",
        required=False,
        help="A folder containing the training data of class images."
    )

    # Instance prompt
    parser.add_argument(
        "--instance_prompt",
        type=str,
        default=None,
        required=False,
        help="The prompt with identifier specifying the instance"
    )

    parser.add_argument(
        "--resolution",
        type=int,
        default=512,
        required=False,
        help="The image resolution for training."
    )

    parser.add_argument(
        "--sample_batch_size",
        type=int,
        default=4,
        required=False,
        help="Batch size (per device) for sampling class images for prior preservation."
    )

    # Tokenizer
    parser.add_argument(
        "--tokenizer_name",
        type=str,
        default="openai/clip-vit-large-patch14",
        choices=("openai/clip-vit-large-patch14"),
        help="Pretrained tokenizer name or path if not the same as model_name"
    )
    parser.add_argument(
        "--tokenizer_max_length",
        type=int,
        default=None,
        required=False,
        help="The maximum length of the tokenizer. If not set, will default to the tokenizer's max length."
    )

    # Text Encoder:
    parser.add_argument(
        "--text_encoder_type",
        type=str,
        default="CLIPTextModel",
        choices=("CLIPTextModel", "T5EncoderModel"),
        help="Text Encoder or path if not the same as model_name"
    )
    parser.add_argument(
        "--text_encoder_name",
        type=str,
        required=False,
        help="Text Encoder or path if not the same as model_name",
    )
    parser.add_argument(
        "--train_text_encoder",
        type=lambda x: bool(str2bool(str(x), "train_text_encoder")),
        default=False,
        help="Whether to train the text encoder. If set, the text encoder should be float32 precision."
    )
    parser.add_argument(
        "--pre_compute_text_embeddings",
        type=lambda x: bool(str2bool(str(x), "pre_compute_text_embeddings")),
        default=True,
        help=(
            "Whether or not to pre-compute text embeddings. If text embeddings are pre-computed,"
            "the text encoder will not be kept in memory during training and will leave more GPU memory"
            "available for training the rest of the model. This is not compatible with `--train_text_encoder`."
        )
    )
    parser.add_argument(
        "--text_encoder_use_attention_mask",
        type=lambda x: bool(str2bool(str(x), "text_encoder_use_attention_mask")),
        default=False,
        required=False,
        help="Whether to use attention mask for the text encoder"
    )
    parser.add_argument(
        "--skip_save_text_encoder",
        type=lambda x: bool(str2bool(str(x), "skip_save_text_encoder")),
        default=False,
        required=False,
        help="Set to not save text encoder"
    )

    # Residual noise predictio using UNET - decide whether to use timesteps as labels or None
    parser.add_argument(
        "--class_labels_conditioning",
        type=str,
        required=False,
        default=None,
        choices=("timesteps", None),
        help="The optional `class_label` conditioning to pass to the unet, available values are `timesteps`."
    )

    # Noise Scheduler
    parser.add_argument(
        "--noise_scheduler_name",
        type=str,
        required=False,
        choices=("DPMSolverMultistepScheduler", "DDPMScheduler", "PNDMScheduler"),
        help="The noise scheduler name to use for the diffusion process."
    )
    parser.add_argument(
        "--noise_scheduler_num_train_timesteps",
        type=int,
        required=False,
        help="The number of diffusion steps to train the model."
    )
    parser.add_argument(
        "--noise_scheduler_variance_type",
        type=str,
        choices=("fixed_small", "fixed_small_log", "fixed_large", "fixed_large_log", "learned", "learned_range"),
        required=False,
        help="Clip the variance when adding noise to the denoised sample."
    )
    parser.add_argument(
        "--noise_scheduler_prediction_type",
        type=str,
        choices=("epsilon", "sample", "v_prediction"),
        required=False,
        help=(
            "Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion"
            "process), `sample` (directly predicts the noisy sample`) or `v_prediction` "
            "(see section 2.4 of [Imagen Video](https://imagen.research.google/video/paper.pdf) paper)."
        )
    )
    parser.add_argument(
        "--noise_scheduler_timestep_spacing",
        type=str,
        required=False,
        help=(
            "The way the timesteps should be scaled. Refer to Table 2 of the "
            "[Common Diffusion Noise Schedules and Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) "
            "for more information."
        )
    )
    parser.add_argument(
        "--noise_scheduler_steps_offset",
        type=int,
        default=0,
        required=False,
        help=(
            "An offset added to the inference steps. You can use a combination of `offset=1` and "
            "`set_alpha_to_one=False` to make the last step use step 0 for the previous "
            " alpha product like in Stable Diffusion."
        ),
    )
    parser.add_argument(
        "--extra_noise_scheduler_args",
        type=str,
        required=False,
        help=(
            "Optional additional arguments that are supplied to noise scheduler. The arguments should be semi-colon "
            "separated key value pairs and should be enclosed in double quotes. "
            "For example, 'clip_sample_range=1.0; clip_sample=True' for DDPMScheduler."
        )
    )

    # Offset Noise
    parser.add_argument(
        "--offset_noise",
        type=lambda x: bool(str2bool(str(x), "offset_noise")),
        required=False,
        help=(
            "Fine-tuning against a modified noise"
            " See: https://www.crosslabs.org//blog/diffusion-with-offset-noise for more information."
        )
    )

    # Rebalance the loss
    parser.add_argument(
        "--snr_gamma",
        type=float,
        default=None,
        help=(
            "SNR weighting gamma to be used if rebalancing the loss. Recommended value is 5.0. "
            "More details here: https://arxiv.org/abs/2303.09556."
        )
    )

    # Prior preservation loss
    parser.add_argument(
        "--with_prior_preservation",
        type=lambda x: bool(str2bool(str(x), "with_prior_preservation")),
        default=True,
        help="Ste to True for enabling prior preservation loss."
    )
    parser.add_argument(
        "--class_prompt",
        type=str,
        default=None,
        help="The prompt to specify images in the same class as provided instance images."
    )
    parser.add_argument(
        "--num_class_images",
        type=int,
        default=100,
        help=(
            "Minimal class images for prior preservation loss. If there are not enough images already present in"
            " class_data_dir, additional images will be sampled with class_prompt."
        )
    )
    parser.add_argument(
        "--prior_generation_precision",
        type=str,
        default="fp32",
        choices=["fp32", "fp16", "bf16"],
        help=(
            "Choose prior generation precision between fp32, fp16 and bf16 (bfloat16). Bf16 requires PyTorch >="
            " 1.10.and an Nvidia Ampere GPU.  Default to  fp16 if a GPU is available else fp32."
        )
    )
    parser.add_argument(
        "--prior_loss_weight",
        type=float,
        default=1.0,
        help="The weight of prior preservation loss."
    )

    # Validation
    parser.add_argument(
        "--validation_prompt",
        type=str,
        default=None,
        help="A prompt that is used during validation to verify that the model is learning."
    )
    parser.add_argument(
        "--num_validation_images",
        type=int,
        default=0,
        help="Number of images that should be generated during validation with `instance prompt`."
    )
    parser.add_argument(
        "--validation_steps",
        type=int,
        default=100,
        help=(
            "Run validation every X steps. Validation consists of running the prompt"
            " `args.validation_prompt` multiple times: `args.num_validation_images`"
            " and logging the images."
        )
    )
    parser.add_argument(
        "--validation_scheduler",
        type=str,
        default="DPMSolverMultistepScheduler",
        choices=("DPMSolverMultistepScheduler", "DDPMScheduler"),
        help="Select which scheduler to use for validation. DDPMScheduler is recommended for DeepFloyd IF."
    )

    return parser