in trending_deploy/deploy.py [0:0]
def deploy_model(model: Model) -> bool:
"""
Deploy the specified model.
Args:
model (Model): The Model object containing model_info and viable_instance.
Returns:
bool: True if the model was successfully deployed, False otherwise.
"""
try:
model_name = model.model_info.id
endpoint_name = f"{ENDPOINT_PREFIX}{model_name.split('/')[-1].replace('.', '-').replace('_', '-')}"[:31].lower()
# Get task from model info
task = model.model_info.pipeline_tag
# Determine instance size
initial_memory = model.viable_instance.memory_usage_bytes
instance_size = INSTANCE_SIZE_MAPPING.get(initial_memory, "x1") # Default to x1
instance_size = DEFAULT_INSTANCE_SIZE
# Increase instance size by one notch for text-embeddings-inference
# With custom images for embedding models, we might not need this anymore
if "text-embeddings-inference" in model.model_info.tags:
# instance_size = increase_instance_size(model, instance_size, initial_memory)
instance_size = DEFAULT_INSTANCE_SIZE
endpoint_kwargs = {
"name": endpoint_name,
"namespace": NAMESPACE,
"repository": model_name,
"framework": "pytorch",
"task": task,
"accelerator": "cpu",
"vendor": VENDOR,
"region": REGION,
"type": TYPE,
"instance_size": instance_size, # Use the potentially upgraded size
"instance_type": DEFAULT_INSTANCE_TYPE,
"min_replica": 1,
"scale_to_zero_timeout": None,
"domain": "api-inference.endpoints.huggingface.tech",
"path": f"/models/{model_name}",
"tags": ["auto", "api-inference"]
}
# Override task
if task == "feature-extraction" and (
any(x in model.model_info.tags for x in ["sentence-transformers", "sentence transformers"])
or model.model_info.library_name == "sentence-transformers"
):
task = "sentence-embeddings"
endpoint_kwargs["custom_image"] = {
"health_route": "/health",
"port": 5000,
"url": IMAGE
}
endpoint_kwargs["env"] = {
"API_INFERENCE_COMPAT": "true",
"HF_MODEL_DIR": "/repository",
"HF_TASK": task,
"UNLOAD_IDLE": "true",
"IDLE_TIMEOUT": "60"
}
endpoint_kwargs["task"] = task
print(f"Creating endpoint {endpoint_name} for model {model_name} with instance size {instance_size}...")
endpoint = create_inference_endpoint(**endpoint_kwargs)
print(f"Waiting for endpoint {endpoint_name} to be ready...")
# Wait for deployment (with timeout to avoid blocking indefinitely)
endpoint.wait(timeout=300)
print(f"Endpoint {endpoint_name} for model {model_name} deployed successfully.")
add_collection_item(COLLECTION_SLUG, item_id=model_name, item_type="model")
return True
except Exception as e:
print(f"Error deploying model {model.model_info.id}: {e}")
return False