def parse_args()

in assets/common/src/deploy.py [0:0]


def parse_args():
    """Return arguments."""
    parser = argparse.ArgumentParser()

    # Defaults for managed online endpoint has been picked mostly from:
    # https://learn.microsoft.com/en-us/azure/machine-learning/reference-yaml-deployment-managed-online
    # Some of the defaults have been tweaked to cater to large models.

    # add arguments
    parser.add_argument(
        "--registration_details_folder",
        type=Path,
        help="Folder containing model registration details in a JSON file named model_registration_details.json",
    )
    parser.add_argument(
        "--model_id",
        type=str,
        help="Registered mlflow model id",
    )
    parser.add_argument(
        "--inference_payload",
        type=Path,
        help="Json file with inference endpoint payload.",
    )
    parser.add_argument(
        "--endpoint_name",
        type=str,
        help="Name of the endpoint",
    )
    parser.add_argument("--deployment_name", type=str, help="Name of the the deployment")
    parser.add_argument(
        "--instance_type",
        type=str,
        help="Compute instance type to deploy model",
        default="Standard_NC24s_v3",
    )
    parser.add_argument(
        "--instance_count",
        type=int,
        help="Number of compute instances to deploy model",
        default=1,
        choices=range(1, MAX_INSTANCE_COUNT),
    )
    parser.add_argument(
        "--max_concurrent_requests_per_instance",
        type=int,
        default=1,
        help="Maximum concurrent requests to be handled per instance",
    )
    parser.add_argument(
        "--request_timeout_ms",
        type=int,
        default=60000,  # 1min
        help="Request timeout in ms.",
    )
    parser.add_argument(
        "--max_queue_wait_ms",
        type=int,
        default=60000,  # 1min
        help="Maximum queue wait time of a request in ms",
    )
    parser.add_argument(
        "--failure_threshold_readiness_probe",
        type=int,
        default=10,
        help="No of times system will try after failing the readiness probe",
    )
    parser.add_argument(
        "--success_threshold_readiness_probe",
        type=int,
        default=1,
        help="The minimum consecutive successes for the readiness probe to be considered successful, after fail",
    )
    parser.add_argument(
        "--timeout_readiness_probe",
        type=int,
        default=10,
        help="The number of seconds after which the readiness probe times out",
    )
    parser.add_argument(
        "--period_readiness_probe",
        type=int,
        default=10,
        help="How often (in seconds) to perform the readiness probe",
    )
    parser.add_argument(
        "--initial_delay_readiness_probe",
        type=int,
        default=10,
        help="The number of seconds after the container has started before the readiness probe is initiated",
    )
    parser.add_argument(
        "--failure_threshold_liveness_probe",
        type=int,
        default=30,
        help="No of times system will try after failing the liveness probe",
    )
    parser.add_argument(
        "--timeout_liveness_probe",
        type=int,
        default=10,
        help="The number of seconds after which the liveness probe times out",
    )
    parser.add_argument(
        "--period_liveness_probe",
        type=int,
        default=10,
        help="How often (in seconds) to perform the liveness probe",
    )
    parser.add_argument(
        "--initial_delay_liveness_probe",
        type=int,
        default=10,
        help="The number of seconds after the container has started before the liveness probe is initiated",
    )
    parser.add_argument(
        "--egress_public_network_access",
        type=str,
        default="enabled",
        help="Secures the deployment by restricting interaction between deployment and Azure resources used by it",
    )
    parser.add_argument(
        "--model_deployment_details",
        type=str,
        help="Json file to which deployment details will be written",
    )
    # parse args
    args = parser.parse_args()
    logger.info(f"Args received {args}")
    print("args received ", args)

    # Validating passed input values
    if args.max_concurrent_requests_per_instance < 1:
        raise Exception("Arg max_concurrent_requests_per_instance cannot be less than 1")
    if args.request_timeout_ms < 1 or args.request_timeout_ms > MAX_REQUEST_TIMEOUT:
        raise Exception(f"Arg request_timeout_ms should lie between 1 and {MAX_REQUEST_TIMEOUT}")
    if args.max_queue_wait_ms < 1 or args.max_queue_wait_ms > MAX_REQUEST_TIMEOUT:
        raise Exception(f"Arg max_queue_wait_ms should lie between 1 and {MAX_REQUEST_TIMEOUT}")

    return args