def convert_generation_config()

in src/optimum/nvidia/runtime.py [0:0]


def convert_generation_config(config: "GenerationConfig") -> "SamplingParams":
    return SamplingParams(
        end_id=config.eos_token_id[-1]
        if isinstance(config.eos_token_id, list)
        else config.eos_token_id,
        pad_id=config.pad_token_id[-1]
        if isinstance(config.pad_token_id, list)
        else config.pad_token_id,
        top_k=config.top_k if config.do_sample else 1,
        top_p=config.top_p,
        temperature=config.temperature,
        beam_width=config.num_beams if config.do_sample else 1,
        bad_token_ids=config.bad_words_ids,
        length_penalty=config.length_penalty,
        repetition_penalty=config.repetition_penalty,
        no_repeat_ngram_size=config.no_repeat_ngram_size
        if config.no_repeat_ngram_size > 0
        else 1,
        min_tokens=config.min_length if config.min_length > 0 else 1,
        max_tokens=config.max_new_tokens or 32,  # SamplingParams::max_tokens' default
        return_generation_logits=config.output_logits,
        return_log_probs=not config.renormalize_logits,
    )