in blink/candidate_retrieval/dataset.py [0:0]
def read_csv_file(path, added_params):
data = {}
info = True
with open(path, "r", encoding="utf8") as f:
for line in f:
comps = line.strip().split("\t")
doc_name = comps[0] + " " + comps[1]
mention = comps[2]
lctx = comps[3]
rctx = comps[4]
if comps[6] != "EMPTYCAND":
cands = [c.split(",") for c in comps[6:-2]]
cands = [
(",".join(c[2:]).replace('"', "%22").replace(" ", "_"), float(c[1]))
for c in cands
]
else:
cands = []
gold = comps[-1].split(",")
if gold[0] == "-1":
gold = (
",".join(gold[2:]).replace('"', "%22").replace(" ", "_"),
1e-5,
-1,
)
else:
gold = (
",".join(gold[3:]).replace('"', "%22").replace(" ", "_"),
1e-5,
-1,
)
if added_params["generate_cands"]:
if info:
print("Generating candidates")
info = False
cands = added_params["cand_generator"].process(mention)
if doc_name not in data:
data[doc_name] = []
data[doc_name].append(
{
"mention": mention,
"context": (lctx, rctx),
"candidates": cands,
"gold": gold,
}
)
return data