def deploy_single_endpoint()

in right_size_your_sagemaker_endpoints/sagemaker_helper.py [0:0]


def deploy_single_endpoint(item, cpu_model=None, gpu_model=None):
    """
    Deploys a single SageMaker endpoint
    
    This function deploys endpoint based on a single item and models
    with the name 'endpoint-<type>-<count>' and adds endpoint name to 
    the global list.
    
    Inputs:
    endpoints_dict: dict of instance types and counts to deploy
    cpu_model: CPU model ARN, if model supports CPU
    gpu_model: GPU model ARN, if model supports GPU
    
    Output:
    None   
    """
    instance_type = item.get("instance_type")
    instance_count = item.get("instance_count")
    existing_endpoints = get_existing_endpoints()

    endpoint_name = f"endpoint-{instance_type.replace('.', '-')}-x{instance_count}"

    # Check if endpoint already exists
    if endpoint_name in existing_endpoints:
        print(f'\nEndpoint {endpoint_name} already exists.')
        return endpoint_name

    else:
        if instance_type.split('.')[1].startswith(('m', 'c', 'r')):
            if cpu_model:
                print(f"\nDeploying to {endpoint_name}...")
                predictor = cpu_model.deploy(initial_instance_count=instance_count, 
                                         instance_type=instance_type, 
                                         endpoint_name=endpoint_name, 
                                         wait=True,
                                         serializer=json_serializer)
            else:
                print(f"No CPU model specified for a CPU instance of type {instance_type}")

            # gpu instance types
        elif instance_type.split('.')[1].startswith(('p', 'g', 'e', 'i')):
            if gpu_model:
                print(f"\nDeploying to {endpoint_name}...")
                predictor = gpu_model.deploy(initial_instance_count=instance_count, 
                                         instance_type=instance_type, 
                                         endpoint_name=endpoint_name, 
                                         wait=True,
                                         serializer=json_serializer)
            else:
                print(f"No GPU model specified for a GPU instance of type {instance_type}")
        else:
            print(f"Unsupported instance type {instance_type}")
            
        return endpoint_name