def _process_line()

in pipeline/data/hplt.py [0:0]


    def _process_line(self, line_locale: str, line: str):
        # Line locale does not match expected locale, filter
        if line_locale != self.hplt_locale:
            self.stats.filtered_line_locale.value += 1
            self._maybe_write_accumulated_text()
            return

        char_count = len(line)
        # Filter long segments
        if char_count > self.max_characters:
            self.stats.filtered_too_long.value += 1
            self._maybe_write_accumulated_text()
            return

        # Just write the current line if merging is disabled
        if not self.merge_lines:
            self.accumulated_text = line
            self.stats.visited_lines.kept += 1
            self._maybe_write_accumulated_text()
            return

        # Text accumulation mode starts here

        self.stats.visited_lines.kept += 1

        # Determine if this sentence should be added to the previous one or
        # written out as a new line.
        if self.cumulative_char_count + char_count + 1 > self.max_characters:
            # This line would be too long, write it out.
            self._maybe_write_accumulated_text()

        self.cumulative_char_count += char_count
        # Collect this line to write.
        if self.accumulated_text:
            self.accumulated_text = f"{self.accumulated_text} {line}"
            # count the whitespace
            self.cumulative_char_count += 1
        else:
            self.accumulated_text = line