in distant_supervision/ner_entity_gatherer.py [0:0]
def _get_unique_entity_pairs(self, article):
"""
:return: a list of pairs [("0 to 6 years", "DATE"), ...]
"""
phrase_pair_set = set()
for sent in article.sents:
phrase_tuple_lst = sent.get_phrases(self.phrase_mode)
for (phrase_str, phrase_category) in phrase_tuple_lst:
if len(phrase_str) < ENTITY_NCHARS_LLIM:
# only keep entities that have a minimum number of characters
continue
phrase_pair_set.add((phrase_str, phrase_category))
return list(phrase_pair_set)