def _find_trailing_overlapping_tokens_start_index()

in src/processors.py [0:0]


    def _find_trailing_overlapping_tokens_start_index(self, text):
        word_count = 0
        original_length = len(text)
        k = original_length - 1
        while word_count < self.overlap_tokens:
            k -= 1
            # Moving backwards: find the beginning of word (next character is space and current character is not space)
            while not (text[k + 1] != ' ' and text[k] == ' ') and k > 0 and original_length - k < self.max_overlapping_chars:
                k -= 1
            word_count += 1
            if k == 0:
                LOG.debug("Overlapping tokens for the next sentence starts beyond the current sentence")
                break
        return k