def get_parser()

in assets/training/finetune_acft_multimodal/src/preprocess/preprocess.py [0:0]
85 lines of code
1 McCabe index (conditional complexity)

def get_parser():
    """Add arguments and returns the parser."""
    parser = argparse.ArgumentParser(description="Tabular Preprocessing", allow_abbrev=False)

    parser.add_argument(
        "--problem_type",
        default=ProblemType.SINGLE_LABEL_CLASSIFICATION,
        type=str,
        help="Whether its single label or multilabel classification",
    )
    parser.add_argument(
        "--train_mltable_path",
        type=str,
        required=True,
        help="Train mltable path",
    )
    parser.add_argument(
        "--validation_mltable_path",
        type=str,
        required=True,
        help="Validation mltable path",
    )
    parser.add_argument(
        "--test_mltable_path",
        type=str,
        required=False,
        help="Test mltable path",
    )

    parser.add_argument(
        "--model_selector_output",
        default=None,
        type=str,
        help=(
            "output folder of model selector containing model configs, tokenizer, checkpoints in case of model_id."
            "If huggingface_id is selected, the model download happens dynamically on the fly"
        ),
    )

    parser.add_argument(
        "--output_dir",
        default="preprocess_output",
        type=str,
        help="folder to store model selector output and metadata for preprocessed input data",
    )

    # Tabular preprocessor settings
    parser.add_argument(
        "--label_column",
        type=str,
        required=True,
        help="Target label column name",
    )
    parser.add_argument(
        "--image_column",
        type=str,
        required=True,
        help="Image column name",
    )
    parser.add_argument(
        "--drop_columns",
        type=str,
        default="",
        help="Columns to ignore in the input data. Should be a comma-separated list of column names. "
        "Example: 'column_1,column_2'",
    )
    parser.add_argument(
        "--numerical_columns_overrides",
        type=str,
        default="",
        help="Columns to treat as numerical in the input data. This setting would override the column types detected "
        "from automatic column purpose detection. Should be a comma-separated list of column names. "
        "Example: 'column_1,column_2'",
    )
    parser.add_argument(
        "--categorical_columns_overrides",
        type=str,
        default="",
        help="Columns to treat as categorical in the input data. This setting would override the column types "
        "detected from automatic column purpose detection. Should be a comma-separated list of column names. "
        "Example: 'column_1,column_2'",
    )
    parser.add_argument(
        "--text_columns_overrides",
        type=str,
        default="",
        help="Columns to treat as text in the input data. This setting would override the column types detected "
        "from automatic column purpose detection. Should be a comma-separated list of column names. "
        "Example: 'column_1,column_2'",
    )

    return parser