def read_csv_file()

in blink/candidate_retrieval/dataset.py [0:0]


def read_csv_file(path, added_params):
    data = {}
    info = True
    with open(path, "r", encoding="utf8") as f:
        for line in f:
            comps = line.strip().split("\t")
            doc_name = comps[0] + " " + comps[1]
            mention = comps[2]
            lctx = comps[3]
            rctx = comps[4]

            if comps[6] != "EMPTYCAND":
                cands = [c.split(",") for c in comps[6:-2]]
                cands = [
                    (",".join(c[2:]).replace('"', "%22").replace(" ", "_"), float(c[1]))
                    for c in cands
                ]
            else:
                cands = []

            gold = comps[-1].split(",")
            if gold[0] == "-1":
                gold = (
                    ",".join(gold[2:]).replace('"', "%22").replace(" ", "_"),
                    1e-5,
                    -1,
                )
            else:
                gold = (
                    ",".join(gold[3:]).replace('"', "%22").replace(" ", "_"),
                    1e-5,
                    -1,
                )

            if added_params["generate_cands"]:
                if info:
                    print("Generating candidates")
                    info = False
                cands = added_params["cand_generator"].process(mention)

            if doc_name not in data:
                data[doc_name] = []

            data[doc_name].append(
                {
                    "mention": mention,
                    "context": (lctx, rctx),
                    "candidates": cands,
                    "gold": gold,
                }
            )
    return data