in src/alpaca_eval/decoders/huggingface_api.py [0:0]
def inference_helper(prompt: str, inference, params, n_retries=100, waiting_time=2) -> str:
for _ in range(n_retries):
try:
# TODO: check why doesn't stop after </s>
output = inference(prompt=prompt, **params)
except Exception as error:
if n_retries > 0:
if "Rate limit reached" in error:
logging.warning(f"Rate limit reached... Trying again in {waiting_time} seconds.")
time.sleep(waiting_time)
elif "Input validation error" in error and "max_new_tokens" in error:
params["max_new_tokens"] = int(params["max_new_tokens"] * 0.8)
logging.warning(
f"`max_new_tokens` too large. Reducing target length to {params['max_new_tokens']}, "
f"Retrying..."
)
if params["max_new_tokens"] == 0:
raise ValueError(f"Error in inference. Full error: {error}")
else:
raise ValueError(f"Error in inference. Full error: {error}")
else:
raise ValueError(f"Error in inference. We tried {n_retries} times and failed. Full error: {error}")
return output