in src/hf/run_ner.py [0:0]
def load_few_shot_file(dataset_name, few_shot_seed, features=None):
ner_file = os.path.join(FEW_SHOT_DIR, dataset_name, f"few_shot_5_{few_shot_seed}.ner")
word_file = os.path.join(FEW_SHOT_DIR, dataset_name, f"few_shot_5_{few_shot_seed}.words")
ids = []
tokens = []
with open(word_file) as f:
for i, line in enumerate(f):
words = line.strip().split()
tokens.append(words)
ids.append(f"fewshot-train-{i}")
ner_tags = []
if features:
label_list = features["ner_tags"].feature.names
label_to_id = {label: i for i, label in enumerate(label_list)}
print(label_list)
with open(ner_file) as f:
for line in f:
tags = line.strip().split()
if dataset_name == "wnut_17":
tags = [t.replace("_", "-") for t in tags]
tag_ids = [label_to_id[t] for t in tags]
ner_tags.append(tag_ids)
else:
raise NotImplementedError
return Dataset.from_dict({"id": ids, "tokens": tokens, "ner_tags": ner_tags}, features=features)