in assets/common/src/deploy.py [0:0]
def parse_args():
"""Return arguments."""
parser = argparse.ArgumentParser()
# Defaults for managed online endpoint has been picked mostly from:
# https://learn.microsoft.com/en-us/azure/machine-learning/reference-yaml-deployment-managed-online
# Some of the defaults have been tweaked to cater to large models.
# add arguments
parser.add_argument(
"--registration_details_folder",
type=Path,
help="Folder containing model registration details in a JSON file named model_registration_details.json",
)
parser.add_argument(
"--model_id",
type=str,
help="Registered mlflow model id",
)
parser.add_argument(
"--inference_payload",
type=Path,
help="Json file with inference endpoint payload.",
)
parser.add_argument(
"--endpoint_name",
type=str,
help="Name of the endpoint",
)
parser.add_argument("--deployment_name", type=str, help="Name of the the deployment")
parser.add_argument(
"--instance_type",
type=str,
help="Compute instance type to deploy model",
default="Standard_NC24s_v3",
)
parser.add_argument(
"--instance_count",
type=int,
help="Number of compute instances to deploy model",
default=1,
choices=range(1, MAX_INSTANCE_COUNT),
)
parser.add_argument(
"--max_concurrent_requests_per_instance",
type=int,
default=1,
help="Maximum concurrent requests to be handled per instance",
)
parser.add_argument(
"--request_timeout_ms",
type=int,
default=60000, # 1min
help="Request timeout in ms.",
)
parser.add_argument(
"--max_queue_wait_ms",
type=int,
default=60000, # 1min
help="Maximum queue wait time of a request in ms",
)
parser.add_argument(
"--failure_threshold_readiness_probe",
type=int,
default=10,
help="No of times system will try after failing the readiness probe",
)
parser.add_argument(
"--success_threshold_readiness_probe",
type=int,
default=1,
help="The minimum consecutive successes for the readiness probe to be considered successful, after fail",
)
parser.add_argument(
"--timeout_readiness_probe",
type=int,
default=10,
help="The number of seconds after which the readiness probe times out",
)
parser.add_argument(
"--period_readiness_probe",
type=int,
default=10,
help="How often (in seconds) to perform the readiness probe",
)
parser.add_argument(
"--initial_delay_readiness_probe",
type=int,
default=10,
help="The number of seconds after the container has started before the readiness probe is initiated",
)
parser.add_argument(
"--failure_threshold_liveness_probe",
type=int,
default=30,
help="No of times system will try after failing the liveness probe",
)
parser.add_argument(
"--timeout_liveness_probe",
type=int,
default=10,
help="The number of seconds after which the liveness probe times out",
)
parser.add_argument(
"--period_liveness_probe",
type=int,
default=10,
help="How often (in seconds) to perform the liveness probe",
)
parser.add_argument(
"--initial_delay_liveness_probe",
type=int,
default=10,
help="The number of seconds after the container has started before the liveness probe is initiated",
)
parser.add_argument(
"--egress_public_network_access",
type=str,
default="enabled",
help="Secures the deployment by restricting interaction between deployment and Azure resources used by it",
)
parser.add_argument(
"--model_deployment_details",
type=str,
help="Json file to which deployment details will be written",
)
# parse args
args = parser.parse_args()
logger.info(f"Args received {args}")
print("args received ", args)
# Validating passed input values
if args.max_concurrent_requests_per_instance < 1:
raise Exception("Arg max_concurrent_requests_per_instance cannot be less than 1")
if args.request_timeout_ms < 1 or args.request_timeout_ms > MAX_REQUEST_TIMEOUT:
raise Exception(f"Arg request_timeout_ms should lie between 1 and {MAX_REQUEST_TIMEOUT}")
if args.max_queue_wait_ms < 1 or args.max_queue_wait_ms > MAX_REQUEST_TIMEOUT:
raise Exception(f"Arg max_queue_wait_ms should lie between 1 and {MAX_REQUEST_TIMEOUT}")
return args