def de_segment()

in src/processors.py [0:0]


    def de_segment(self, segments: List[Document]) -> Document:
        """
        Merge the segments back into one big text. It also merges back the pii classification result.
        Handles conflicting result on overlapping text between two text segments in the following ways:
        1. For pii classification, the maximum thresholds for an entity amongst the segments is
            updated as the threshold for that entity for the merged document
        2. For pii entity annotations, for a conflicting annotation span a higher priority
            is given to the one with a higher confidence threshold
        """
        merged_text = ""
        pii_classification = {}
        pii_entities = []
        segments.sort(key=lambda x: x.char_offset)
        for segment in segments:
            offset_adjusted_segment = Document(text=segment.text, char_offset=segment.char_offset,
                                               pii_entities=self._relocate_annotation(segment.pii_entities, segment.char_offset),
                                               pii_classification=segment.pii_classification)
            self._merge_classifcation_results(segment, pii_classification)
            self._merge_pii_annotation_results(offset_adjusted_segment, pii_entities)
            merged_text = merged_text + segment.text[len(merged_text) - segment.char_offset:]
        return Document(text=merged_text, char_offset=0, pii_classification=pii_classification, pii_entities=pii_entities)