in src/utils.py [0:0]
def run_ner_linking(texts: typing.List[str], ner_model_path: str):
"""Loads and runs the Named Entity Recognition + Entity Linking model on all `texts`,
saving their named entity labels and Wikidata IDs if found.
"""
nlp = spacy.load(ner_model_path)
text_to_info = {}
for text in tqdm(texts, desc="Running Named Entity Linking"):
doc = nlp(text)
datum = []
for e in doc.ents:
kb_id = (
e.kb_id_ if "Q" == e.kb_id_[0] and e.kb_id_[1:].isnumeric() else None
)
datum.append(
{"text": e.text, "label": e.label_, "id": kb_id,}
)
text_to_info[text] = datum
return text_to_info