in ml/eval/generate.py [0:0]
def generate_responses(model, tokenizer, dataset, num_examples=None):
"""Generate responses for a dataset using a given model and tokenizer."""
results = []
# Limit dataset to num_examples if specified
items = list(dataset.data.items())
if num_examples is not None:
items = items[:num_examples]
for prompt, key in tqdm(items):
prompt = tokenizer.apply_chat_template(key.prompt, tokenize=False)
inputs = tokenizer(prompt, return_tensors="pt").to(device)
output_ids = model.generate(**inputs, max_new_tokens=4000)
output = tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0]
# Keys are in alpacaeval format
results.append({
"instruction": prompt,
"output": output
})
return results