def compute_ner_and_noun_chunks()

in distant_supervision/text_preprocessor.py [0:0]


    def compute_ner_and_noun_chunks(self, text):
        """
        https://spacy.io/usage/linguistic-features#noun-chunks

        ents: [('today', 'DATE'), ('Patrick', 'PERSON')]
        noun_chunks: e.g. [('Autonomous cars', 'nsubj'), ('insurance liability', 'dobj')]

        :return: (ents, noun_chunks)
        """
        if len(text) > ULIM_CHAR_PER_SENTENCE:
            return [], []

        # spacy has memory leaks: https://github.com/explosion/spaCy/issues/3618
        nlp = SpacyMagic.load('my_english', 'en_core_web_sm', disable=[])
        doc = nlp(text)

        ents = [(ent.text, ent.label_) for ent in doc.ents]
        chunks = [(nc.text, nc.root.dep_) for nc in doc.noun_chunks]

        ents = sorted(set(ents))
        chunks = sorted(set(chunks))

        return ents, chunks