def extract_entities()

in {{cookiecutter.project_slug}}/app/spacy_extractor.py [0:0]


    def extract_entities(self, records: List[Dict[str, str]]):
        """Apply the pre-trained model to a batch of records
        
        records (list): The list of "document" dictionaries each with an
            `id` and `text` property
        
        RETURNS (list): List of responses containing the id of 
            the correlating document and a list of entities.
        """
        ids = (doc[self.input_id_col] for doc in records)
        texts = (doc[self.input_text_col] for doc in records)

        res = []

        for doc_id, spacy_doc in zip(ids, self.nlp.pipe(texts)):
            entities = {}
            for ent in spacy_doc.ents:
                ent_id = ent.kb_id
                if not ent_id:
                    ent_id = ent.ent_id
                if not ent_id:
                    ent_id = self._name_to_id(ent.text)

                if ent_id not in entities:
                    if ent.text.lower() == ent.text:
                        ent_name = ent.text.capitalize()
                    else:
                        ent_name = ent.text
                    entities[ent_id] = {
                        "name": ent_name,
                        "label": ent.label_,
                        "matches": [],
                    }
                entities[ent_id]["matches"].append(
                    {"start": ent.start_char, "end": ent.end_char, "text": ent.text}
                )

            res.append({"id": doc_id, "entities": list(entities.values())})
        return res