def __init__()

in hypernymysuite/evaluation.py [0:0]


    def __init__(self, filename, vocabdict, ycolumn="label"):
        if "<OOV>" not in vocabdict:
            raise ValueError("Reserved word <OOV> must appear in vocabulary.")

        table = pd.read_table(filename)

        # some things require the part of speech, which may not be explicitly
        # given in the dataset.
        if "pos" not in table.columns:
            table["pos"] = "N"
        table = table[table.pos.str.lower() == "n"]

        # Handle MWEs by replacing the space
        table["word1"] = table.word1.apply(lambda x: x.replace(" ", "_").lower())
        table["word2"] = table.word2.apply(lambda x: x.replace(" ", "_").lower())

        if vocabdict:
            self.word1_inv = table.word1.apply(vocabdict.__contains__)
            self.word2_inv = table.word2.apply(vocabdict.__contains__)
        else:
            self.word1_inv = table.word1.apply(lambda x: True)
            self.word2_inv = table.word2.apply(lambda x: True)

        # Always evaluate on lemmas
        table["word1"] = table.word1.apply(lemmatizer.lemmatize)
        table["word2"] = table.word2.apply(lemmatizer.lemmatize)

        self.table = table
        self.labels = np.array(table[ycolumn])
        if "fold" in table:
            self.folds = table["fold"]
        else:
            self.folds = np.array(["test"] * len(self.table))

        self.table["is_oov"] = self.oov_mask