trending_deploy/constants.py (62 lines of code) (raw):

from dataclasses import asdict, dataclass from huggingface_hub import ModelInfo @dataclass class Instance: description: str memory_usage_bytes: int hourly_rate: float @dataclass class Model: model_info: ModelInfo viable_instance: Instance reward: float | None = None cost: float | None = None def to_dict(self): return asdict(self) # Based on https://github.com/huggingface/hub-docs/tree/main/docs/inference-providers/tasks ALL_TASKS = [ # Audio "audio-classification", "automatic-speech-recognition", # Image "image-classification", "image-segmentation", "image-to-image", "object-detection", "text-to-image", # Video "text-to-video", # Text "feature-extraction", "fill-mask", "sentence-similarity", "question-answering", "summarization", "text-classification", "text-generation", "text-ranking", "token-classification", "translation", "zero-shot-classification", # Table "table-question-answering", ] GPU_ONLY_TASKS = [ "image-to-image", "text-to-image", "text-to-video", "text-generation", "audio-classification", "automatic-speech-recognition", "object-detection", ] NOT_IMPLEMENTED_TASKS = [ "text-ranking", ] DEFAULT_TASKS = [ task for task in ALL_TASKS if task not in (GPU_ONLY_TASKS + NOT_IMPLEMENTED_TASKS) ] # Just AWS for now INSTANCES = [ Instance(description="Intel Sapphire Rapids, 1 vCPU, 2GB", memory_usage_bytes=2 * (1024 ** 3), hourly_rate=0.033), Instance(description="Intel Sapphire Rapids, 2 vCPU, 4GB", memory_usage_bytes=4 * (1024 ** 3), hourly_rate=0.067), Instance(description="Intel Sapphire Rapids, 4 vCPU, 8GB", memory_usage_bytes=8 * (1024 ** 3), hourly_rate=0.134), Instance(description="Intel Sapphire Rapids, 8 vCPU, 16GB", memory_usage_bytes=16 * (1024 ** 3), hourly_rate=0.268), Instance(description="Intel Sapphire Rapids, 16 vCPU, 32GB", memory_usage_bytes=32 * (1024 ** 3), hourly_rate=0.536), ] MEMORY_USAGE_TO_INSTANCE = { instance.memory_usage_bytes: instance for instance in sorted(INSTANCES, key=lambda x: x.memory_usage_bytes) }