def _iter_log_entries()

in tracking/translations_parser/parser.py [0:0]


    def _iter_log_entries(self) -> Iterator[tuple[list[tuple[str]], str]]:
        """
        Inner method to iterate on log lines passed to
        the parser, differentiating headers and text.
        Automatically set Marian run date when found.
        """
        for line in self.logs_iter:
            # When reading stdin stream, propagate raw lines to stdout
            # and force flush on stdout to make sure every line gets displayed
            sys.stdout.buffer.write(line.encode("utf-8"))
            sys.stdout.buffer.flush()

            self._current_index += 1
            headers, position = self.get_headers(line)
            if self.log_filter and not self.log_filter(headers):
                logger.debug(
                    f"Skipping line {self._current_index} : Headers does not match the filter"
                )
                continue
            elif self.run_date is None:
                # Try to fill run date from log headers
                self.run_date = self.get_timestamp(headers)
            text = line[position:]

            def _join(seq):
                if not seq:
                    return None
                if isinstance(seq[0], str):
                    return "_".join([item for item in seq if item is not None])
                return _join([_join(item) for item in seq if item is not None])

            # Record logs depending on Marian headers
            tag = None
            if len(headers) >= 2:
                # The 2 first headers are ignored (task timestamp, then marian timestamp)
                _, _, *marian_tags = headers
                tag = _join(marian_tags)
            if tag:
                self.parsed_logs.append(f"[tag] {text}")
            else:
                self.parsed_logs.append(text)

            yield headers, text