in src/lighteval/metrics/llm_as_judge.py [0:0]
def __call_litellm(self, prompts):
import litellm
def __call_api(prompt):
error_message = "ERROR: Failed to get response from the API."
for _ in range(self.API_MAX_RETRY):
try:
max_new_tokens = 512
if "o1" in self.model or "o3" in self.model or "R1" in self.model:
max_new_tokens = min(max_new_tokens * 10, 32000)
kwargs = {
"model": self.model,
"messages": prompt,
"max_tokens": max_new_tokens,
"n": 1,
"caching": True,
}
response = litellm.completion(**kwargs)
text = response.choices[0].message.content
if not text or text == error_message:
kwargs["caching"] = False
response = litellm.completion(**kwargs)
text = response.choices[0].message.content
if not text or text == error_message:
# Just return an error response if the second attempt fails too
logger.error(f"Failed to get response from the API for prompt: {prompt}")
return error_message
return text
except Exception as e:
logger.warning(f"{type(e), e}")
time.sleep(self.API_RETRY_SLEEP)
return error_message
results = []
with ThreadPoolExecutor(100) as executor:
for entry in tqdm(executor.map(__call_api, prompts), total=len(prompts)):
results.append(entry)
if None in results:
raise ValueError("Some entries are not annotated due to errors in annotate_p, please inspect and retry.")
return results