in assets/training/finetune_acft_multimodal/src/preprocess/preprocess.py [0:0]
def get_parser():
"""Add arguments and returns the parser."""
parser = argparse.ArgumentParser(description="Tabular Preprocessing", allow_abbrev=False)
parser.add_argument(
"--problem_type",
default=ProblemType.SINGLE_LABEL_CLASSIFICATION,
type=str,
help="Whether its single label or multilabel classification",
)
parser.add_argument(
"--train_mltable_path",
type=str,
required=True,
help="Train mltable path",
)
parser.add_argument(
"--validation_mltable_path",
type=str,
required=True,
help="Validation mltable path",
)
parser.add_argument(
"--test_mltable_path",
type=str,
required=False,
help="Test mltable path",
)
parser.add_argument(
"--model_selector_output",
default=None,
type=str,
help=(
"output folder of model selector containing model configs, tokenizer, checkpoints in case of model_id."
"If huggingface_id is selected, the model download happens dynamically on the fly"
),
)
parser.add_argument(
"--output_dir",
default="preprocess_output",
type=str,
help="folder to store model selector output and metadata for preprocessed input data",
)
# Tabular preprocessor settings
parser.add_argument(
"--label_column",
type=str,
required=True,
help="Target label column name",
)
parser.add_argument(
"--image_column",
type=str,
required=True,
help="Image column name",
)
parser.add_argument(
"--drop_columns",
type=str,
default="",
help="Columns to ignore in the input data. Should be a comma-separated list of column names. "
"Example: 'column_1,column_2'",
)
parser.add_argument(
"--numerical_columns_overrides",
type=str,
default="",
help="Columns to treat as numerical in the input data. This setting would override the column types detected "
"from automatic column purpose detection. Should be a comma-separated list of column names. "
"Example: 'column_1,column_2'",
)
parser.add_argument(
"--categorical_columns_overrides",
type=str,
default="",
help="Columns to treat as categorical in the input data. This setting would override the column types "
"detected from automatic column purpose detection. Should be a comma-separated list of column names. "
"Example: 'column_1,column_2'",
)
parser.add_argument(
"--text_columns_overrides",
type=str,
default="",
help="Columns to treat as text in the input data. This setting would override the column types detected "
"from automatic column purpose detection. Should be a comma-separated list of column names. "
"Example: 'column_1,column_2'",
)
return parser