def __init__()

in blink/candidate_retrieval/dataset.py [0:0]


    def __init__(self, path, person_path, conll_path, added_params):
        if added_params["generate_ments_and_cands"]:
            added_params["generate_cands"] = False

        if added_params["generate_cands"] or added_params["generate_ments_and_cands"]:
            added_params["cand_generator"] = get_candidate_generator(added_params)

        print(added_params)

        print("load csv")
        self.train = read_csv_file(path + "/aida_train.csv", added_params)
        self.testA = read_csv_file(path + "/aida_testA.csv", added_params)
        self.testB = read_csv_file(path + "/aida_testB.csv", added_params)
        self.ace2004 = read_csv_file(path + "/wned-ace2004.csv", added_params)
        self.aquaint = read_csv_file(path + "/wned-aquaint.csv", added_params)
        self.clueweb = read_csv_file(path + "/wned-clueweb.csv", added_params)
        self.msnbc = read_csv_file(path + "/wned-msnbc.csv", added_params)
        self.wikipedia = read_csv_file(path + "/wned-wikipedia.csv", added_params)
        self.wikipedia.pop("Jiří_Třanovský Jiří_Třanovský", None)

        print("process coref")
        person_names = load_person_names(person_path)
        with_coref(self.train, person_names)
        with_coref(self.testA, person_names)
        with_coref(self.testB, person_names)
        with_coref(self.ace2004, person_names)
        with_coref(self.aquaint, person_names)
        with_coref(self.clueweb, person_names)
        with_coref(self.msnbc, person_names)
        with_coref(self.wikipedia, person_names)

        print("load conll")
        read_conll_file(self.train, conll_path + "/AIDA/aida_train.txt")
        read_conll_file(self.testA, conll_path + "/AIDA/testa_testb_aggregate_original")
        read_conll_file(self.testB, conll_path + "/AIDA/testa_testb_aggregate_original")
        read_conll_file(
            self.ace2004, conll_path + "/wned-datasets/ace2004/ace2004.conll"
        )
        read_conll_file(
            self.aquaint, conll_path + "/wned-datasets/aquaint/aquaint.conll"
        )
        read_conll_file(self.msnbc, conll_path + "/wned-datasets/msnbc/msnbc.conll")
        read_conll_file(
            self.clueweb, conll_path + "/wned-datasets/clueweb/clueweb.conll"
        )
        read_conll_file(
            self.wikipedia, conll_path + "/wned-datasets/wikipedia/wikipedia.conll"
        )

        if added_params["generate_cands"]:
            print(
                "Number of candidates not present in p_e_m originally, but present when lowercased",
                len(added_params["cand_generator"].lower_org),
            )
            print(
                "Number of candidates not present in p_e_m originally, but present in p_e_m_lower when lowercased ",
                len(added_params["cand_generator"].lower_lower),
            )