def parse_arguments()

in assets/aml-benchmark/components/src/aml_benchmark/inference_postprocessor/main.py [0:0]
118 lines of code
1 McCabe index (conditional complexity)

def parse_arguments() -> argparse.Namespace:
    """Parse command-line arguments."""
    parser = argparse.ArgumentParser(description=f"{__file__}")
    parser.add_argument(
        "--prediction_dataset",
        type=str,
        required=True,
        help="Path to load the prediction dataset.",
    )
    parser.add_argument(
        "--prediction_column_name",
        type=str,
        required=True,
        help="Prediction column name.",
    )
    parser.add_argument(
        "--ground_truth_dataset",
        type=str,
        default=None,
        help="Path to load the ground truth dataset.",
    )
    parser.add_argument(
        "--ground_truth_column_name",
        type=str,
        default=None,
        help="Ground truth column name.",
    )
    parser.add_argument(
        "--additional_columns",
        type=str,
        default=None,
        help="Additional columns",
    )
    parser.add_argument(
        "--separator",
        type=str,
        default=None,
        help="Few shot separator used in prompt crafter.",
    )
    parser.add_argument(
        "--find_first",
        type=str,
        default=None,
        help=(
            "A list of strings to search for in the inference results."
            "The first occurrence of each string will be extracted."
            "Must provide a comma-separated list of strings."
        ),
    )
    parser.add_argument(
        "--regex_expr",
        type=str,
        default=None,
        help=(
            "A regular expression to extract the answer from the inference results."
            "The pattern must contain a group to be extracted. The first group and the"
            "first match will be used."
        ),
    )
    parser.add_argument(
        "--remove_prefixes",
        type=str,
        default=None,
        help=(
            "A set of string prefixes separated by comma list of string prefixes to be removed"
            "from the inference results in sequence. This can also be used to remove the prompt"
            "from the inference results. The prefixes should be separated by a comma."
        ),
    )
    parser.add_argument(
        "--strip_characters",
        type=str,
        default=None,
        help=(
            "A set of characters to remove from the beginning or end of the extracted answer."
            "It is applied in the very end of the extraction process."
        ),
    )
    parser.add_argument(
        "--extract_number",
        type=str,
        default=None,
        help=(
            "If the inference results contain a number, this can be used to extract the first or last"
            "number in the inference results. The number will be extracted as a string."
        ),
    )
    parser.add_argument(
        "--label_map",
        type=str,
        default=None,
        help=(
            "JSON serialized dictionary to perform mapping. Must contain key-value pair"
            '"column_name": "<actual_column_name>" whose value needs mapping, followed by'
            "key-value pairs containing idtolabel or labeltoid mappers."
            'Example: {"column_name":"label", "0":"NEUTRAL", "1":"ENTAILMENT", "2":"CONTRADICTION"}.'
            "This is not applicable to custom scripts."
        ),
    )
    parser.add_argument(
        "--template",
        type=str,
        default=None,
        help="Jinja template containing the extraction logic of inference post-processing.",
    )
    parser.add_argument(
        "--script_path",
        type=str,
        default=None,
        help="Path to the custom inference post-processor python script.",
    )
    parser.add_argument(
        "--output_dataset_result",
        type=str,
        default=None,
        help="Path to the jsonl file where the processed data will be saved.",
    )
    argss, _ = parser.parse_known_args()
    return argss