in src/lighteval/models/litellm_model.py [0:0]
def __call_api(self, prompt, return_logits, max_new_tokens, num_samples, stop_sequence): # noqa: C901
"""Make API call with retries."""
response = LitellmModelResponse()
for attempt in range(self.API_MAX_RETRY):
try:
stop_sequence = self._prepare_stop_sequence(stop_sequence)
max_new_tokens = self._prepare_max_new_tokens(max_new_tokens)
if return_logits and not self.provider == "openai":
logger.warning("Returning logits is not supported for this provider, ignoring.")
# Prepare kwargs for completion call
kwargs = {
"model": self.model,
"messages": prompt,
"logprobs": return_logits if self.provider == "openai" else None,
"base_url": self.base_url,
"n": num_samples,
"caching": True,
"api_key": self.api_key,
}
if num_samples > 1 and self.generation_parameters.temperature == 0:
raise ValueError(
"num_samples > 1 but temperature is set to 0, this will not sample different outputs."
)
if "o1" in self.model:
logger.warning("O1 models do not support temperature, top_p, stop sequence. Disabling.")
else:
kwargs.update(self.generation_parameters.to_litellm_dict())
if kwargs.get("max_completion_tokens", None) is None:
kwargs["max_completion_tokens"] = max_new_tokens
response = litellm.completion(**kwargs)
# If response is empty, retry without caching (maybe the error is recoverable and solved with a retry)
if response.choices[0].message.content is None:
kwargs["caching"] = False
logger.info("Response is empty, retrying without caching")
response = litellm.completion(**kwargs)
return response
except litellm.BadRequestError as e:
if "message" in e.__dict__:
error_string = (
"The response was filtered due to the prompt triggering Microsoft's content management policy"
)
if error_string in e.__dict__["message"]:
logger.warning(f"{error_string}. Returning empty response.")
return LitellmModelResponse()
except Exception as e:
wait_time = min(64, self.API_RETRY_SLEEP * (2**attempt)) # Exponential backoff with max 64s
logger.warning(
f"Error in API call: {e}, waiting {wait_time} seconds before retry {attempt + 1}/{self.API_MAX_RETRY}"
)
time.sleep(wait_time)
logger.error(f"API call failed after {self.API_MAX_RETRY} attempts, returning empty response.")
return LitellmModelResponse()