in src/lighteval/models/endpoints/inference_providers_model.py [0:0]
def __init__(self, config: InferenceProvidersModelConfig) -> None:
"""Initialize the inference client.
Args:
config: Configuration object containing model and provider settings
"""
self.model_info = ModelInfo(
model_name=config.model_name,
model_sha="",
model_dtype=None,
model_size=-1,
)
self.model_name = config.model_name
self.provider = config.provider
self.generation_parameters = config.generation_parameters
self.API_MAX_RETRY = 5
self.API_RETRY_SLEEP = 3
self.API_RETRY_MULTIPLIER = 2
self.pairwise_tokenization = False
self.semaphore = asyncio.Semaphore(config.parallel_calls_count) # Limit concurrent API calls
self.client = AsyncInferenceClient(
provider=self.provider,
timeout=config.timeout,
proxies=config.proxies,
bill_to=config.org_to_bill,
)
try:
self._tokenizer = AutoTokenizer.from_pretrained(self.model_name)
except HfHubHTTPError:
logger.warning(f"Could not load model's tokenizer for the model {self.model_name}.")
self._tokenizer = None
self.prompt_manager = PromptManager(
use_chat_template=True, tokenizer=self.tokenizer, system_prompt=config.system_prompt
)