in preprocess/fewshot_gym_dataset.py [0:0]
def save(self, path, k, seed, k_shot_train, k_shot_dev, k_shot_test):
# save to path
def _apply_prompt(example):
return apply_prompt(self.hf_identifier, example, do_train=do_train, prompt_names_per_task=prompt_names_per_task, prompt_dict=prompt_dict)
if do_train and use_instruct:
# let's save k_shot_train only
grouped_k_shot_train = defaultdict(list)
for line in tqdm(k_shot_train):
line = _apply_prompt(line)
assert type(line)==dict
assert len(set(line.keys())-set(["inst:"+self.hf_identifier+":"+name for name in prompt_names_per_task[self.hf_identifier]]))==0
for key, value in line.items():
grouped_k_shot_train[key].append(json.dumps(value))
for key, lines in grouped_k_shot_train.items():
hf_identifier = key
if path:
os.makedirs(os.path.join(path, hf_identifier), exist_ok=True)
prefix = os.path.join(path, hf_identifier,
"{}_{}_{}".format(hf_identifier, k, seed))
self.write(lines, prefix + "_train.jsonl")
elif use_instruct:
k_shot_train = [_apply_prompt(example) for example in k_shot_train]
k_shot_dev = [_apply_prompt(example) for example in k_shot_dev]
k_shot_test = [_apply_prompt(example) for example in k_shot_test]
hf_identifier = "inst:"+self.hf_identifier if use_instruct else self.hf_identifier
if path:
os.makedirs(os.path.join(path, hf_identifier), exist_ok=True)
prefix = os.path.join(path, hf_identifier,
"{}_{}_{}".format(hf_identifier, k, seed))
self.write(k_shot_train, prefix + "_train.jsonl")
self.write(k_shot_dev, prefix + "_dev.jsonl")
self.write(k_shot_test, prefix + "_test.jsonl")
else:
config = config_dict[self.hf_identifier]
k_shot_train = [preprocess(self.hf_identifier, example, config) for example in k_shot_train]
if do_test:
k_shot_dev = [preprocess(self.hf_identifier, example, config) for example in k_shot_dev]
k_shot_test = [preprocess(self.hf_identifier, example, config) for example in k_shot_test]
if path:
os.makedirs(os.path.join(path, self.hf_identifier), exist_ok=True)
prefix = os.path.join(path, self.hf_identifier,
"{}_{}_{}".format(self.hf_identifier, k, seed))
self.write(k_shot_train, prefix + "_train.jsonl")
if do_test:
self.write(k_shot_dev, prefix + "_dev.jsonl")
self.write(k_shot_test, prefix + "_test.jsonl")