dell_ai/models.py (159 lines of code) (raw):

"""Model-related functionality for the Dell AI SDK.""" from typing import TYPE_CHECKING, Dict, List, Optional from pydantic import BaseModel, Field, field_validator from dell_ai import constants from dell_ai.exceptions import ( ResourceNotFoundError, ValidationError, ) if TYPE_CHECKING: from dell_ai.client import DellAIClient class ModelConfig(BaseModel): """Configuration details for a model deployment.""" max_batch_prefill_tokens: Optional[int] = None max_input_tokens: Optional[int] = None max_total_tokens: Optional[int] = None num_gpus: int model_config = { "extra": "allow", # Allow extra fields not defined in the model } class Model(BaseModel): """Represents a model available in the Dell Enterprise Hub.""" repo_name: str = Field(alias="repoName") description: str = "" license: str = "" creator_type: str = Field(default="", alias="creatorType") size: float = Field( default=0.0, description="Number of model parameters (in millions)" ) has_system_prompt: bool = Field(default=False, alias="hasSystemPrompt") is_multimodal: bool = Field(default=False, alias="isMultimodal") status: str = "" configs_deploy: Dict[str, List[ModelConfig]] = Field( default_factory=dict, alias="configsDeploy" ) class Config: """Pydantic model configuration. The 'populate_by_name' setting allows the model to be populated using either: 1. The Pythonic snake_case attribute names (e.g., repo_name, configs_deploy) 2. The original camelCase names from the API (e.g., repoName, configsDeploy) This provides compatibility with the API response format while maintaining Pythonic naming conventions in our codebase. """ populate_by_name = True # Classes for deployment snippet generation class SnippetRequest(BaseModel): """Request model for generating deployment snippets.""" model_id: str = Field( ..., description="Model ID in format 'organization/model_name'" ) platform_id: str = Field(..., description="Platform SKU ID") engine: str = Field(..., description="Deployment engine ('docker' or 'kubernetes')") num_gpus: int = Field(..., gt=0, description="Number of GPUs to use") num_replicas: int = Field(..., gt=0, description="Number of replicas to deploy") @field_validator("engine") @classmethod def validate_engine(cls, v): if v.lower() not in ["docker", "kubernetes"]: raise ValueError( f"Invalid engine: {v}. Valid types are: docker, kubernetes" ) return v.lower() class SnippetResponse(BaseModel): """Response model for deployment snippets.""" snippet: str = Field(..., description="The deployment snippet text") def list_models(client: "DellAIClient") -> List[str]: """ Get a list of all available model IDs. Args: client: The Dell AI client Returns: A list of model IDs in the format "organization/model_name" Raises: AuthenticationError: If authentication fails APIError: If the API returns an error """ response = client._make_request("GET", constants.MODELS_ENDPOINT) return response.get("models", []) def get_model(client: "DellAIClient", model_id: str) -> Model: """ Get detailed information about a specific model. Args: client: The Dell AI client model_id: The model ID in the format "organization/model_name" Returns: Detailed model information as a Model object Raises: ValidationError: If the model_id format is invalid ResourceNotFoundError: If the model is not found AuthenticationError: If authentication fails APIError: If the API returns an error """ # Validate model_id format if "/" not in model_id: raise ValidationError( "Invalid model ID format. Expected format: 'organization/model_name'", parameter="model_id", ) try: endpoint = f"{constants.MODELS_ENDPOINT}/{model_id}" response = client._make_request("GET", endpoint) # Process configsDeploy to convert nested dictionaries to ModelConfig objects if "configsDeploy" in response and response["configsDeploy"]: for platform, configs in response["configsDeploy"].items(): response["configsDeploy"][platform] = [ ModelConfig.model_validate(config) for config in configs ] # Create a Model object from the response return Model.model_validate(response) except ResourceNotFoundError: # Reraise with more specific information raise ResourceNotFoundError("model", model_id) def _validate_request_schema(model_id, platform_id, engine, num_gpus, num_replicas): """ Validate the basic schema of the request parameters. Args: model_id: The model ID platform_id: The platform SKU ID engine: The deployment engine num_gpus: Number of GPUs num_replicas: Number of replicas Raises: ValidationError: If the parameters don't match the expected schema """ try: # Let Pydantic handle all validation _ = SnippetRequest( model_id=model_id, platform_id=platform_id, engine=engine, num_gpus=num_gpus, num_replicas=num_replicas, ) except ValueError as e: # Simply convert to our custom ValidationError while preserving the original error # This maintains a consistent error hierarchy without losing Pydantic's detailed info raise ValidationError(str(e), original_error=e) def _validate_model_id_format(model_id): """ Validate that the model ID follows the expected format. Args: model_id: The model ID to validate Returns: tuple: (creator_name, model_name) Raises: ValidationError: If the model ID format is invalid """ try: creator_name, model_name = model_id.split("/") return creator_name, model_name except ValueError: raise ValidationError( f"Invalid model_id format: {model_id}. Expected format: 'organization/model_name'" ) def _validate_model_platform_compatibility(client, model_id, platform_id, num_gpus): """ Validate that the model and platform combination is valid and the GPU configuration is supported. Args: client: The Dell AI client model_id: The model ID platform_id: The platform SKU ID num_gpus: The number of GPUs to use Raises: ValidationError: If the platform is not supported or the GPU configuration is invalid ResourceNotFoundError: If the model is not found """ model = get_model(client, model_id) # Check if the platform is supported if platform_id not in model.configs_deploy: supported_platforms = list(model.configs_deploy.keys()) platform_list = ", ".join(supported_platforms) raise ValidationError( f"Platform {platform_id} is not supported for model {model_id}. Supported platforms: {platform_list}", parameter="platform_id", valid_values=supported_platforms, ) # Validate the GPU configuration valid_configs = model.configs_deploy[platform_id] valid_gpus = {config.num_gpus for config in valid_configs} if num_gpus not in valid_gpus: gpu_list = ", ".join(str(g) for g in sorted(valid_gpus)) raise ValidationError( f"Invalid number of GPUs ({num_gpus}) for model {model_id} on platform {platform_id}. Valid GPU counts: {gpu_list}", parameter="num_gpus", valid_values=sorted(valid_gpus), config_details={ "model_id": model_id, "platform_id": platform_id, "valid_configs": valid_configs, }, ) def _handle_resource_not_found(client, e, model_id, platform_id, num_gpus): """ Handle ResourceNotFoundError by providing more specific error messages. Args: client: The Dell AI client e: The original ResourceNotFoundError model_id: The model ID platform_id: The platform SKU ID num_gpus: The number of GPUs Raises: ResourceNotFoundError: With a more specific error message ValidationError: If the configuration is invalid """ # If the error is about the model, provide a specific error if e.resource_type.lower() == "models": raise ResourceNotFoundError("model", model_id) # If we can get the model details, check if this might be a configuration issue try: model = get_model(client, model_id) # Check if platform is valid but GPU config is invalid if platform_id in model.configs_deploy: valid_configs = model.configs_deploy[platform_id] valid_gpus = {config.num_gpus for config in valid_configs} if num_gpus not in valid_gpus: gpu_list = ", ".join(str(g) for g in sorted(valid_gpus)) raise ValidationError( f"Invalid number of GPUs ({num_gpus}) for model {model_id} on platform {platform_id}. Valid GPU counts: {gpu_list}", parameter="num_gpus", valid_values=sorted(valid_gpus), ) except ResourceNotFoundError: # The model truly doesn't exist raise ResourceNotFoundError("model", model_id) # If we couldn't determine a more specific cause, re-raise the original error raise e def get_deployment_snippet( client: "DellAIClient", model_id: str, platform_id: str, engine: str, num_gpus: int, num_replicas: int, ) -> str: """ Get a deployment snippet for the specified model and configuration. Args: client: The Dell AI client model_id: The model ID in the format "organization/model_name" platform_id: The platform SKU ID engine: The deployment engine ("docker" or "kubernetes") num_gpus: The number of GPUs to use num_replicas: The number of replicas to deploy Returns: A string containing the deployment snippet (docker command or k8s manifest) Raises: ValidationError: If any of the input parameters are invalid ResourceNotFoundError: If the model, platform, or configuration is not found GatedRepoAccessError: If the model repository is gated and the user doesn't have access """ # Step 1: Validate basic request parameters _validate_request_schema(model_id, platform_id, engine, num_gpus, num_replicas) # Step 2: Parse and validate model ID format creator_name, model_name = _validate_model_id_format(model_id) # Step 3: Check if the user has access to the model repository # This will raise GatedRepoAccessError if the model is gated and the user doesn't have access client.check_model_access(model_id) # Step 4: Validate model and platform compatibility if the model exists try: _validate_model_platform_compatibility(client, model_id, platform_id, num_gpus) except ResourceNotFoundError: # We'll handle this during the API request pass # Step 5: Build API path and query parameters path = f"{constants.SNIPPETS_ENDPOINT}/models/{creator_name}/{model_name}/deploy" params = { "sku": platform_id, # API still expects "sku" as the parameter name "container": engine, "replicas": num_replicas, "gpus": num_gpus, } # Step 6: Make API request and handle errors try: response = client._make_request("GET", path, params=params) return SnippetResponse(snippet=response.get("snippet", "")).snippet except ResourceNotFoundError as e: _handle_resource_not_found(client, e, model_id, platform_id, num_gpus)