def load_few_shot_file()

in src/hf/run_ner.py [0:0]


def load_few_shot_file(dataset_name, few_shot_seed, features=None):
    ner_file = os.path.join(FEW_SHOT_DIR, dataset_name, f"few_shot_5_{few_shot_seed}.ner")
    word_file = os.path.join(FEW_SHOT_DIR, dataset_name, f"few_shot_5_{few_shot_seed}.words")
    ids = []
    tokens = []
    with open(word_file) as f:
        for i, line in enumerate(f):
            words = line.strip().split()
            tokens.append(words)
            ids.append(f"fewshot-train-{i}")
    ner_tags = []
    if features:
        label_list = features["ner_tags"].feature.names
        label_to_id = {label: i for i, label in enumerate(label_list)}
        print(label_list)
        with open(ner_file) as f:
            for line in f:
                tags = line.strip().split()
                if dataset_name == "wnut_17":
                    tags = [t.replace("_", "-") for t in tags]
                tag_ids = [label_to_id[t] for t in tags]
                ner_tags.append(tag_ids)
    else:
        raise NotImplementedError
    return Dataset.from_dict({"id": ids, "tokens": tokens, "ner_tags": ner_tags}, features=features)