def get_parser()

in assets/training/finetune_acft_image/src/finetune/finetune.py [0:0]
436 lines of code
19 McCabe index (conditional complexity)

def get_parser():
    """Get the parser object."""
    parser = argparse.ArgumentParser(
        description="Image Tasks which include Image classification, object detection and instance segmentation."
    )

    # # component input: model path from model_selector component
    parser.add_argument(
        "--model_path",
        default=None,
        type=str,
        help=(
            "output folder of model selector containing model configs, checkpoints in case"
            "model_path is provided as input to model selector component."
            "If model_name is provided as input to model selector component,"
            "the model download happens dynamically on the fly."
        )
    )

    # # Component input: Training and validation dataset
    parser.add_argument(
        "--train_mltable_path",
        type=str,
        required=True,
        help="Path to the mltable of the training dataset."
    )
    parser.add_argument(
        "--valid_mltable_path",
        type=str,
        default=None,
        help="Path to the mltable of the validation dataset."
    )

    # # Image height and width
    parser.add_argument(
        "--image_width",
        type=int,
        default=-1,
        help=(
            "Final Image width after augmentation that is input to the network."
            "Default value is -1 which means it would be overwritten by default image "
            "width in Hugging Face feature extractor. If either image_width or image_height "
            "is set to -1, default value would be used for both width and height."
        )
    )
    parser.add_argument(
        "--image_height",
        type=int,
        default=-1,
        help=(
            "Final Image width after augmentation that is input to the network."
            "Default value is -1 which means it would be overwritten by default image "
            "width in Hugging Face feature extractor. If either image_width or image_height "
            "is set to -1, default value would be used for both width and height."
        )
    )

    # Image min_size and max_size. Only applicable for OD and IS.
    parser.add_argument(
        "--image_min_size",
        type=int,
        help=(
            "Minimum image size after augmentation that is input to the network. Default "
            "is -1 which means it would be overwritten by image_scale in model config. "
            "The image will be rescaled as large as possible within "
            "the range [image_min_size, image_max_size]. "
            "The image size will be constraint so that the max edge is no longer than "
            "image_max_size and short edge is no longer than image_min_size."
        )
    )
    parser.add_argument(
        "--image_max_size",
        type=int,
        help=(
            "Maximum image size after augmentation that is input to the network. Default "
            "is -1 which means it would be overwritten by image_scale in model config. "
            "The image will be rescaled as large as possible within "
            "the range [image_min_size, image_max_size]. "
            "The image size will be constraint so that the max edge is no longer than "
            "image_max_size and short edge is no longer than image_min_size."
        )
    )

    # # Task name
    parser.add_argument(
        "--task_name",
        type=str,
        choices=(
            Tasks.HF_MULTI_CLASS_IMAGE_CLASSIFICATION,
            Tasks.HF_MULTI_LABEL_IMAGE_CLASSIFICATION,
            Tasks.MM_OBJECT_DETECTION,
            Tasks.MM_INSTANCE_SEGMENTATION,
            Tasks.MM_MULTI_OBJECT_TRACKING,
            Tasks.HF_SD_TEXT_TO_IMAGE,
        ),
        required=True,
        help="Name of the task the model is solving."
    )

    # # Apply augmentaions
    parser.add_argument(
        "--apply_augmentations",
        type=lambda x: bool(str2bool(str(x), "apply_augmentations")),
        default=False,
        help="If set to true, will enable augmentations for training"
    )

    # # Data loader num workers
    parser.add_argument(
        "--dataloader_num_workers",
        type=int,
        default=8,
        help=(
            "Number of subprocesses to use for data loading (PyTorch only). 0 means that the data will be "
            "loaded in the main process."
        )
    )

    # # Deepspeed
    parser.add_argument(
        "--apply_deepspeed",
        type=lambda x: bool(str2bool(str(x), "apply_deepspeed")),
        help=(
            "If set to true, will enable deepspeed for training. "
            "If left empty, will be chosen automatically based on the task type and model selected."
        )
    )
    # optional component input: deepspeed config json
    # core is using this parameter to check if deepspeed is enabled
    parser.add_argument(
        "--deepspeed_config",
        type=str,
        default=None,
        help="Deepspeed config to be used for finetuning"
    )

    # # ORT
    parser.add_argument(
        "--apply_ort",
        type=lambda x: bool(str2bool(str(x), "apply_ort")),
        help=(
            "If set to true, will enable Onnxruntime for training. "
            "If left empty, will be chosen automatically based on the task type and model selected."
        )
    )

    # # LORA
    # Lora is not supported for vision models currently.
    # So, this parameter is not exposed via yaml
    parser.add_argument(
        "--apply_lora",
        type=lambda x: bool(str2bool(str(x), "apply_lora")),
        default=False,
        help="If set to true, will enable LORA for training."
    )
    parser.add_argument("--lora_alpha", type=int, default=128, help="LORA attention alpha")
    parser.add_argument("--lora_dropout", type=float, default=0.0, help="LORA dropout value")
    parser.add_argument("--lora_r", default=8, type=int, help="LORA dimension")

    # # Epochs and steps
    parser.add_argument(
        "--num_train_epochs",
        type=int,
        help=(
            "Number of training epochs."
            "If left empty, will be chosen automatically based on the task type and model selected."
        )
    )
    parser.add_argument(
        "--max_steps",
        type=int,
        help=(
            "If set to a positive number, the total number of training steps to perform. Overrides `num_train_epochs`."
            "In case of using a finite iterable dataset the training may stop before reaching the set number of steps"
            "when all data is exhausted."
            "If left empty, will be chosen automatically based on the task type and model selected."
        )
    )

    # # Batch size
    parser.add_argument(
        "--per_device_train_batch_size",
        type=int,
        help=(
            "Train batch size. If left empty, will be chosen automatically based on the task type and model selected."
        )
    )
    parser.add_argument(
        "--per_device_eval_batch_size",
        type=int,
        help=(
            "Validation batch size."
            "If left empty, will be chosen automatically based on the task type and model selected."
        )
    )
    parser.add_argument(
        "--auto_find_batch_size",
        type=lambda x: bool(str2bool(str(x), "auto_find_batch_size")),
        default=False,
        help=(
            "Flag to enable auto finding of batch size. If the provided `train_batch_size` goes into Out Of Memory"
            " (OOM)enabling auto_find_batch_size will find the correct batch size by iteratively reducing"
            " `train_batch_size` by afactor of 2 till the OOM is fixed."
        )
    )

    # # optimizer
    parser.add_argument(
        "--optim",
        choices=(
            IncomingOptimizerNames.ADAMW_HF,
            IncomingOptimizerNames.ADAMW_TORCH,
            # # # Todo: enable or take them out post testing.
            # IncomingOptimizerNames.ADAMW_TORCH_XLA,
            # IncomingOptimizerNames.ADAMW_APEX_FUSED,
            # IncomingOptimizerNames.ADAMW_BNB,
            # IncomingOptimizerNames.ADAMW_ANYPRECISION,
            IncomingOptimizerNames.SGD,
            IncomingOptimizerNames.ADAFACTOR,
            IncomingOptimizerNames.ADAGRAD,
            IncomingOptimizerNames.ADAMW_ORT_FUSED,
        ),
        type=str,
        help=(
            "Optimizer to be used while training."
            f"'{IncomingOptimizerNames.ADAMW_ORT_FUSED}' is only supported for ORT training."
            "If left empty, will be chosen automatically based on the task type and model selected."
        )
    )
    parser.add_argument(
        "--weight_decay",
        type=float,
        help=(
            "The weight decay to apply (if not zero) to all layers except all "
            "bias and LayerNorm weights in AdamW optimizer."
            "If left empty, will be chosen automatically based on the task type and model selected."
        )
    )
    parser.add_argument(
        "--extra_optim_args",
        default="",
        type=str,
        help=(
            "Optional additional arguments that are supplied to SGD Optimizer."
            "The arguments should be semi-colon separated key value pairs. "
            "For example, 'momentum=0.5; nesterov=True' for sgd"
            "Please make sure to use a valid parameter names for the chosen optimizer. For exact parameter names"
            "please refer https://pytorch.org/docs/1.13/generated/torch.optim.SGD.html#torch.optim.SGD for SGD."
            "Parameters supplied in extra_optim_args will take precedence over the parameter supplied via"
            "other arguments such as weight_decay. If weight_decay is provided via 'weight_decay'"
            "parameter and via extra_optim_args both, values specified in extra_optim_args will be used."
        )
    )
    # # Learning rate
    parser.add_argument(
        "--learning_rate",
        type=float,
        help=(
            "Start learning rate. Defaults to 5e-05."
            "If left empty, will be chosen automatically based on the task type and model selected."
        )
    )

    # # Learning rate scheduler
    parser.add_argument(
        "--lr_scheduler_type",
        choices=(
            IncomingLearingScheduler.WARMUP_LINEAR,
            IncomingLearingScheduler.WARMUP_COSINE,
            IncomingLearingScheduler.WARMUP_COSINE_WITH_RESTARTS,
            IncomingLearingScheduler.WARMUP_POLYNOMIAL,
            IncomingLearingScheduler.CONSTANT,
            IncomingLearingScheduler.WARMUP_CONSTANT,
            IncomingLearingScheduler.STEP,
            # Step is not supported for FT-components. We are only accepting step as a parameter to show
            # a better warning message to the user.
            # "Step" LR scheduler is supported for runtime component therefore if user is using "Step" scheduler
            # for the uber component, the FT-component might get this value and
            # we will warn the user to use a different scheduler.
        ),
        type=str,
        help=(
            "The scheduler type to use."
            "If left empty, will be chosen automatically based on the task type and model selected."
        )
    )
    parser.add_argument(
        "--warmup_steps",
        type=int,
        help=(
            "Number of steps used for a linear warmup from 0 to learning_rate."
            "If left empty, will be chosen automatically based on the task type and model selected."
        )
    )

    # # Gradient accumulation steps
    parser.add_argument(
        "--gradient_accumulation_steps",
        type=int,
        help=(
            "Number of updates steps to accumulate the gradients for, before performing a backward/update pass."
            "If left empty, will be chosen automatically based on the task type and model selected."
        )
    )

    # # mixed precision training
    parser.add_argument(
        "--precision",
        type=int,
        default=32,
        choices=(16, 32),
        help=(
            "Apply mixed precision training. "
            "This can reduce memory footprint by performing operations in half-precision."
        )
    )

    # # random seed
    parser.add_argument(
        "--seed",
        type=int,
        default=42,
        help="Random seed that will be set at the beginning of training."
    )

    # # evaluation
    parser.add_argument(
        "--eval_strategy",
        type=str,
        choices=(
            IntervalStrategy.NO,
            IntervalStrategy.STEPS,
            IntervalStrategy.EPOCH,
        ),
        default="epoch",
        help="The evaluation strategy to adopt during training."
    )
    parser.add_argument(
        "--eval_steps",
        type=int,
        default=500,
        help="Number of update steps between two evals if evaluation_strategy='steps'."
    )

    # # logging
    parser.add_argument(
        "--logging_strategy",
        type=str,
        choices=(
            IntervalStrategy.NO,
            IntervalStrategy.STEPS,
            IntervalStrategy.EPOCH,
        ),
        default="epoch",
        help="The logging strategy to adopt during training."
    )
    parser.add_argument(
        "--logging_steps",
        type=int,
        default=500,
        help="Number of update steps between two logs if logging_strategy='steps'."
    )

    # # save strategy
    parser.add_argument(
        "--save_strategy",
        type=str,
        choices=(
            IntervalStrategy.NO,
            IntervalStrategy.STEPS,
            IntervalStrategy.EPOCH,
        ),
        default="epoch",
        help="The checkpoint save strategy to adopt during training."
    )
    parser.add_argument(
        "--save_steps",
        type=int,
        default=500,
        help="Number of updates steps before two checkpoint saves if save_strategy='steps'."
    )

    # # checkpoint saving limit
    parser.add_argument(
        "--save_total_limit",
        type=int,
        default=-1,
        help=(
            "If a value is passed, will limit the total amount of checkpoints. "
            "Deletes the older checkpoints in output_dir. "
            "If the value is -1 saves all checkpoints."
        )
    )

    # # metrics for model
    parser.add_argument(
        "--metric_for_best_model",
        type=str,
        choices=(
            "loss",
            # Classification - multiclass
            metrics_constants.F1Macro,
            metrics_constants.Accuracy,
            metrics_constants.PrecisionMacro,
            metrics_constants.RecallMacro,
            # Classification - multilabel
            metrics_constants.IOU,
            metrics_constants.IOUMacro,
            metrics_constants.IOUMicro,
            metrics_constants.IOUWeighted,
            # Object detectaion and instance segmentation
            metrics_constants.MEAN_AVERAGE_PRECISION,
            metrics_constants.PRECISION,
            metrics_constants.RECALL,
            metrics_constants.MOTA,
            metrics_constants.MOTP,
            metrics_constants.IDF1,
            metrics_constants.IDSW,
        ),
        help=(
            "Specify the metric to use to compare two different models."
            "If left empty, will be chosen automatically based on the task type and model selected."
        )
    )
    # label smoothing factor
    parser.add_argument(
        "--label_smoothing_factor",
        type=float,
        help=(
            "The label smoothing factor to use in range [0.0, 1,0). Zero means no label smoothing, "
            "otherwise the underlying onehot-encoded labels are changed from 0s and 1s to "
            "label_smoothing_factor/num_labels and "
            "1 - label_smoothing_factor + label_smoothing_factor/num_labels respectively."
            "If left empty, will be chosen automatically based on the task type and model selected."
        )
    )

    # # to resume training from a model given in folder, loading older states etc.
    parser.add_argument(
        "--resume_from_checkpoint",
        type=lambda x: bool(str2bool(str(x), "resume_from_checkpoint")),
        default=False,
        help="Loads Optimizer, Scheduler and Trainer state for finetuning if true."
    )

    # # early stopping - enabled through a callback ?
    parser.add_argument(
        "--apply_early_stopping",
        type=lambda x: bool(str2bool(str(x), "apply_early_stopping")),
        default=False,
        help="Enable early stopping."
    )
    parser.add_argument(
        "--early_stopping_patience",
        type=int,
        default=1,
        help="Stop training when the specified metric worsens for early_stopping_patience evaluation calls."
    )
    parser.add_argument(
        "--early_stopping_threshold",
        type=float,
        default=0.0,
        help="Denotes how much the specified metric must improve to satisfy early stopping conditions."
    )

    # Gradient norm
    parser.add_argument(
        "--max_grad_norm",
        type=float,
        help=(
            "Maximum gradient norm (for gradient clipping)"
            "If left empty, will be chosen automatically based on the task type and model selected."
        )
    )

    # # Model saving - will always be set to True for vision models
    parser.add_argument(
        "--save_as_mlflow_model",
        type=lambda x: bool(str2bool(str(x), "save_as_mlflow_model")),
        default=True,
        help="Save as mlflow model with pyfunc as flavour."
    )

    # # component output: output dir for mlflow model
    parser.add_argument(
        "--mlflow_model_folder",
        default=SettingParameters.DEFAULT_MLFLOW_OUTPUT,
        type=str,
        help="Output dir to save the finetune model as mlflow model."
    )
    parser.add_argument(
        "--pytorch_model_folder",
        default=SettingParameters.DEFAULT_PYTORCH_OUTPUT,
        type=str,
        help="Output dir to save the finetune model as pytorch model."
    )

    # ############### MMDetection specific args #################### #
    parser.add_argument(
        "--iou_threshold",
        type=float,
        help="IOU threshold used during inference in non-maximum suppression post processing."
    )
    parser.add_argument(
        "--box_score_threshold",
        type=float,
        help=(
            "During inference, only return proposals with a score greater than `box_score_threshold`. "
            "The score is the multiplication of the objectness score and classification probability."
        )
    )

    return parser