def jsonl_from_dataset()

in scripts/adapet/ADAPET/setfit_adapet.py [0:0]


def jsonl_from_dataset(dataset, task_name, updated_args, split="train"):
    """writes jsonl files from Dataset object in ADAPET format"""
    if task_name in GLUE_DATASETS:
        text1 = dataset["text1"]
        text2 = dataset["text2"]
    else:
        text = dataset["text"]
    label = dataset["label"]
    data_dir = updated_args.data_dir
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)
    writefile = data_dir + "/" + split + ".jsonl"
    try:
        os.remove(writefile)
        print("removing old {} file".format(split))

    except OSError:
        print("no old {} files found".format(split))
        pass
    print("writing new {} file".format(split))
    with open(writefile, "a") as f:
        if task_name in GLUE_DATASETS:
            for idx, txt1 in enumerate(text1):
                txt2 = text2[idx]
                lab = label[idx]
                json_dict = {"TEXT1": txt1, "TEXT2": txt2, "LBL": str(lab)}
                f.write(json.dumps(json_dict, ensure_ascii=False) + "\n", )
        elif task_name in AMZ_MULTI_LING: 
            for idx, txt in enumerate(text):
                lab = label[idx]
                json_dict = {"TEXT1": txt, "LBL": str(lab)}
                f.write(json.dumps(json_dict, ensure_ascii=False) + "\n")
        elif task_name in SINGLE_SENT_DATASETS:
            for idx, txt in enumerate(text):
                lab = label[idx]
                json_dict = {"TEXT1": txt, "LBL": str(lab)}
                f.write(json.dumps(json_dict) + "\n")
    print("{} split written".format(split))