def inference_helper()

in src/alpaca_eval/decoders/huggingface_api.py [0:0]


def inference_helper(prompt: str, inference, params, n_retries=100, waiting_time=2) -> str:
    for _ in range(n_retries):
        try:
            # TODO: check why doesn't stop after </s>
            output = inference(prompt=prompt, **params)
        except Exception as error:
            if n_retries > 0:
                if "Rate limit reached" in error:
                    logging.warning(f"Rate limit reached... Trying again in {waiting_time} seconds.")
                    time.sleep(waiting_time)
                elif "Input validation error" in error and "max_new_tokens" in error:
                    params["max_new_tokens"] = int(params["max_new_tokens"] * 0.8)
                    logging.warning(
                        f"`max_new_tokens` too large. Reducing target length to {params['max_new_tokens']}, "
                        f"Retrying..."
                    )
                    if params["max_new_tokens"] == 0:
                        raise ValueError(f"Error in inference. Full error: {error}")
                else:
                    raise ValueError(f"Error in inference. Full error: {error}")
            else:
                raise ValueError(f"Error in inference. We tried {n_retries} times and failed. Full error: {error}")
        return output