in tracking/translations_parser/parser.py [0:0]
def _iter_log_entries(self) -> Iterator[tuple[list[tuple[str]], str]]:
"""
Inner method to iterate on log lines passed to
the parser, differentiating headers and text.
Automatically set Marian run date when found.
"""
for line in self.logs_iter:
# When reading stdin stream, propagate raw lines to stdout
# and force flush on stdout to make sure every line gets displayed
sys.stdout.buffer.write(line.encode("utf-8"))
sys.stdout.buffer.flush()
self._current_index += 1
headers, position = self.get_headers(line)
if self.log_filter and not self.log_filter(headers):
logger.debug(
f"Skipping line {self._current_index} : Headers does not match the filter"
)
continue
elif self.run_date is None:
# Try to fill run date from log headers
self.run_date = self.get_timestamp(headers)
text = line[position:]
def _join(seq):
if not seq:
return None
if isinstance(seq[0], str):
return "_".join([item for item in seq if item is not None])
return _join([_join(item) for item in seq if item is not None])
# Record logs depending on Marian headers
tag = None
if len(headers) >= 2:
# The 2 first headers are ignored (task timestamp, then marian timestamp)
_, _, *marian_tags = headers
tag = _join(marian_tags)
if tag:
self.parsed_logs.append(f"[tag] {text}")
else:
self.parsed_logs.append(text)
yield headers, text