in treeherder/log_parser/artifactbuildercollection.py [0:0]
def parse(self):
"""
Iterate over each line of the log, running each parser against it.
Stream lines from the gzip file and run each parser against it,
building the ``artifact`` as we go.
"""
with make_request(self.url, stream=True) as response:
download_size_in_bytes = int(response.headers.get("Content-Length", -1))
# Temporary annotation of log size to help set thresholds in bug 1295997.
newrelic.agent.add_custom_attribute("unstructured_log_size", download_size_in_bytes)
newrelic.agent.add_custom_attribute(
"unstructured_log_encoding", response.headers.get("Content-Encoding", "None")
)
if download_size_in_bytes > MAX_DOWNLOAD_SIZE_IN_BYTES:
raise LogSizeError(f"Download size of {download_size_in_bytes} bytes exceeds limit")
# Lines must be explicitly decoded since `iter_lines()`` returns bytes by default
# and we cannot use its `decode_unicode=True` mode, since otherwise Unicode newline
# characters such as `\u0085` (which can appear in test output) are treated the same
# as `\n` or `\r`, and so split into unwanted additional lines by `iter_lines()`.
for line in response.iter_lines():
for builder in self.builders:
try:
# Using `replace` to prevent malformed unicode (which might possibly exist
# in test message output) from breaking parsing of the rest of the log.
builder.parse_line(line.decode("utf-8", "replace"))
except EmptyPerformanceDataError:
logger.warning("We have parsed an empty PERFHERDER_DATA for %s", self.url)
# gather the artifacts from all builders
for builder in self.builders:
# Run end-of-parsing actions for this parser,
# in case the artifact needs clean-up/summarising.
builder.finish_parse()
name = builder.name
artifact = builder.get_artifact()
if name == "performance_data" and not artifact[name]:
continue
self.artifacts[name] = artifact