in distant_supervision/synthetic_data_creator.py [0:0]
def _get_hit_phrases(self, es_hit):
entities = [tuple(e) for e in json.loads(es_hit['_source']['entities'])]
noun_chunks = [tuple(e) for e in json.loads(es_hit['_source']['noun_chunks'])]
if self.phrase_mode is PhraseMode.NER_ONLY:
phrases = self.text_preprocessor.get_phrases(entities=entities, noun_chunks=[])
else:
phrases = self.text_preprocessor.get_phrases(entities=entities, noun_chunks=noun_chunks)
return set(phrases)