in blink/main_dense.py [0:0]
def __load_test(test_filename, kb2id, wikipedia_id2local_id, logger):
test_samples = []
with open(test_filename, "r") as fin:
lines = fin.readlines()
for line in lines:
record = json.loads(line)
record["label"] = str(record["label_id"])
# for tac kbp we should use a separate knowledge source to get the entity id (label_id)
if kb2id and len(kb2id) > 0:
if record["label"] in kb2id:
record["label_id"] = kb2id[record["label"]]
else:
continue
# check that each entity id (label_id) is in the entity collection
elif wikipedia_id2local_id and len(wikipedia_id2local_id) > 0:
try:
key = int(record["label"].strip())
if key in wikipedia_id2local_id:
record["label_id"] = wikipedia_id2local_id[key]
else:
continue
except:
continue
# LOWERCASE EVERYTHING !
record["context_left"] = record["context_left"].lower()
record["context_right"] = record["context_right"].lower()
record["mention"] = record["mention"].lower()
test_samples.append(record)
if logger:
logger.info("{}/{} samples considered".format(len(test_samples), len(lines)))
return test_samples