glan-instruct/generate_answer_only.py (28 lines of code) (raw):
import argparse
import jsonlines
from glan import generate_answers, read_jsonl
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Options')
parser.add_argument("--questions_filepath", type=str, default="samples/GLAN_Instructions_Korean_20_Samples_3b7e.jsonl")
parser.add_argument("--model_name_for_answer", type=str, default="gpt-4o")
parser.add_argument("--answer_max_tokens", type=int, default=2048)
parser.add_argument("--answer_batch_size", type=int, default=5)
args = parser.parse_args()
filename = args.questions_filepath
all_questions = read_jsonl(filename)
all_answers = generate_answers(
all_questions,
model_name=args.model_name_for_answer,
max_tokens=args.answer_max_tokens,
batch_size=args.answer_batch_size
)
instructions = []
for q, a in zip(all_questions, all_answers):
if a not in "DO NOT KNOW":
q.update({"answer": a})
instructions.append(q)
num_instructions = len(instructions)
new_filename = filename.replace("Questions", "Instructions")
with jsonlines.open(new_filename, mode='w') as writer:
for instruction in instructions:
writer.write(instruction)