in scripts/adapet/ADAPET/setfit_adapet.py [0:0]
def jsonl_from_dataset(dataset, task_name, updated_args, split="train"):
"""writes jsonl files from Dataset object in ADAPET format"""
if task_name in GLUE_DATASETS:
text1 = dataset["text1"]
text2 = dataset["text2"]
else:
text = dataset["text"]
label = dataset["label"]
data_dir = updated_args.data_dir
if not os.path.exists(data_dir):
os.makedirs(data_dir)
writefile = data_dir + "/" + split + ".jsonl"
try:
os.remove(writefile)
print("removing old {} file".format(split))
except OSError:
print("no old {} files found".format(split))
pass
print("writing new {} file".format(split))
with open(writefile, "a") as f:
if task_name in GLUE_DATASETS:
for idx, txt1 in enumerate(text1):
txt2 = text2[idx]
lab = label[idx]
json_dict = {"TEXT1": txt1, "TEXT2": txt2, "LBL": str(lab)}
f.write(json.dumps(json_dict, ensure_ascii=False) + "\n", )
elif task_name in AMZ_MULTI_LING:
for idx, txt in enumerate(text):
lab = label[idx]
json_dict = {"TEXT1": txt, "LBL": str(lab)}
f.write(json.dumps(json_dict, ensure_ascii=False) + "\n")
elif task_name in SINGLE_SENT_DATASETS:
for idx, txt in enumerate(text):
lab = label[idx]
json_dict = {"TEXT1": txt, "LBL": str(lab)}
f.write(json.dumps(json_dict) + "\n")
print("{} split written".format(split))