def generate_responses()

in ml/eval/generate.py [0:0]


def generate_responses(model, tokenizer, dataset, num_examples=None):
    """Generate responses for a dataset using a given model and tokenizer."""
    results = []

    # Limit dataset to num_examples if specified
    items = list(dataset.data.items())
    if num_examples is not None:
        items = items[:num_examples]

    for prompt, key in tqdm(items):
        prompt = tokenizer.apply_chat_template(key.prompt, tokenize=False)
        inputs = tokenizer(prompt, return_tensors="pt").to(device)
        output_ids = model.generate(**inputs, max_new_tokens=4000)
        output = tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0]

        # Keys are in alpacaeval format
        results.append({
            "instruction": prompt,
            "output": output
        })
    return results