def deploy_model()

in trending_deploy/deploy.py [0:0]
56 lines of code
10 McCabe index (conditional complexity)

def deploy_model(model: Model) -> bool:
    """
    Deploy the specified model.

    Args:
        model (Model): The Model object containing model_info and viable_instance.

    Returns:
        bool: True if the model was successfully deployed, False otherwise.
    """
    try:
        model_name = model.model_info.id
        endpoint_name = f"{ENDPOINT_PREFIX}{model_name.split('/')[-1].replace('.', '-').replace('_', '-')}"[:31].lower()

        # Get task from model info
        task = model.model_info.pipeline_tag

        # Determine instance size
        initial_memory = model.viable_instance.memory_usage_bytes
        instance_size = INSTANCE_SIZE_MAPPING.get(initial_memory, "x1") # Default to x1
        instance_size = DEFAULT_INSTANCE_SIZE
        # Increase instance size by one notch for text-embeddings-inference
        # With custom images for embedding models, we might not need this anymore
        if "text-embeddings-inference" in model.model_info.tags:
            # instance_size = increase_instance_size(model, instance_size, initial_memory)
            instance_size = DEFAULT_INSTANCE_SIZE

        endpoint_kwargs = {
            "name": endpoint_name,
            "namespace": NAMESPACE,
            "repository": model_name,
            "framework": "pytorch",
            "task": task,
            "accelerator": "cpu",
            "vendor": VENDOR,
            "region": REGION,
            "type": TYPE,
            "instance_size": instance_size, # Use the potentially upgraded size
            "instance_type": DEFAULT_INSTANCE_TYPE,
            "min_replica": 1,
            "scale_to_zero_timeout": None,
            "domain": "api-inference.endpoints.huggingface.tech",
            "path": f"/models/{model_name}",
            "tags": ["auto", "api-inference"]
        }

        # Override task
        if task == "feature-extraction" and (
                any(x in model.model_info.tags for x in ["sentence-transformers", "sentence transformers"])
                or model.model_info.library_name == "sentence-transformers"
        ):
            task = "sentence-embeddings"

        endpoint_kwargs["custom_image"] = {
            "health_route": "/health",
            "port": 5000,
            "url": IMAGE
        }
        endpoint_kwargs["env"] = {
            "API_INFERENCE_COMPAT": "true",
            "HF_MODEL_DIR": "/repository",
            "HF_TASK": task,
            "UNLOAD_IDLE": "true",
            "IDLE_TIMEOUT": "60"
        }
        endpoint_kwargs["task"] = task

        print(f"Creating endpoint {endpoint_name} for model {model_name} with instance size {instance_size}...")
        endpoint = create_inference_endpoint(**endpoint_kwargs)

        print(f"Waiting for endpoint {endpoint_name} to be ready...")
        # Wait for deployment (with timeout to avoid blocking indefinitely)
        endpoint.wait(timeout=300)
        print(f"Endpoint {endpoint_name} for model {model_name} deployed successfully.")
        add_collection_item(COLLECTION_SLUG, item_id=model_name, item_type="model")

        return True
    except Exception as e:
        print(f"Error deploying model {model.model_info.id}: {e}")
        return False