def _get_unique_entity_pairs()

in distant_supervision/ner_entity_gatherer.py [0:0]


    def _get_unique_entity_pairs(self, article):
        """
        :return: a list of pairs [("0 to 6 years", "DATE"), ...]
        """
        phrase_pair_set = set()
        for sent in article.sents:
            phrase_tuple_lst = sent.get_phrases(self.phrase_mode)

            for (phrase_str, phrase_category) in phrase_tuple_lst:
                if len(phrase_str) < ENTITY_NCHARS_LLIM:
                    # only keep entities that have a minimum number of characters
                    continue
                phrase_pair_set.add((phrase_str, phrase_category))
        return list(phrase_pair_set)