in hypernymysuite/evaluation.py [0:0]
def __init__(self, filename, vocabdict, ycolumn="label"):
if "<OOV>" not in vocabdict:
raise ValueError("Reserved word <OOV> must appear in vocabulary.")
table = pd.read_table(filename)
# some things require the part of speech, which may not be explicitly
# given in the dataset.
if "pos" not in table.columns:
table["pos"] = "N"
table = table[table.pos.str.lower() == "n"]
# Handle MWEs by replacing the space
table["word1"] = table.word1.apply(lambda x: x.replace(" ", "_").lower())
table["word2"] = table.word2.apply(lambda x: x.replace(" ", "_").lower())
if vocabdict:
self.word1_inv = table.word1.apply(vocabdict.__contains__)
self.word2_inv = table.word2.apply(vocabdict.__contains__)
else:
self.word1_inv = table.word1.apply(lambda x: True)
self.word2_inv = table.word2.apply(lambda x: True)
# Always evaluate on lemmas
table["word1"] = table.word1.apply(lemmatizer.lemmatize)
table["word2"] = table.word2.apply(lemmatizer.lemmatize)
self.table = table
self.labels = np.array(table[ycolumn])
if "fold" in table:
self.folds = table["fold"]
else:
self.folds = np.array(["test"] * len(self.table))
self.table["is_oov"] = self.oov_mask