in right_size_your_sagemaker_endpoints/sagemaker_helper.py [0:0]
def deploy_single_endpoint(item, cpu_model=None, gpu_model=None):
"""
Deploys a single SageMaker endpoint
This function deploys endpoint based on a single item and models
with the name 'endpoint-<type>-<count>' and adds endpoint name to
the global list.
Inputs:
endpoints_dict: dict of instance types and counts to deploy
cpu_model: CPU model ARN, if model supports CPU
gpu_model: GPU model ARN, if model supports GPU
Output:
None
"""
instance_type = item.get("instance_type")
instance_count = item.get("instance_count")
existing_endpoints = get_existing_endpoints()
endpoint_name = f"endpoint-{instance_type.replace('.', '-')}-x{instance_count}"
# Check if endpoint already exists
if endpoint_name in existing_endpoints:
print(f'\nEndpoint {endpoint_name} already exists.')
return endpoint_name
else:
if instance_type.split('.')[1].startswith(('m', 'c', 'r')):
if cpu_model:
print(f"\nDeploying to {endpoint_name}...")
predictor = cpu_model.deploy(initial_instance_count=instance_count,
instance_type=instance_type,
endpoint_name=endpoint_name,
wait=True,
serializer=json_serializer)
else:
print(f"No CPU model specified for a CPU instance of type {instance_type}")
# gpu instance types
elif instance_type.split('.')[1].startswith(('p', 'g', 'e', 'i')):
if gpu_model:
print(f"\nDeploying to {endpoint_name}...")
predictor = gpu_model.deploy(initial_instance_count=instance_count,
instance_type=instance_type,
endpoint_name=endpoint_name,
wait=True,
serializer=json_serializer)
else:
print(f"No GPU model specified for a GPU instance of type {instance_type}")
else:
print(f"Unsupported instance type {instance_type}")
return endpoint_name