def parse()

in treeherder/log_parser/artifactbuildercollection.py [0:0]


    def parse(self):
        """
        Iterate over each line of the log, running each parser against it.

        Stream lines from the gzip file and run each parser against it,
        building the ``artifact`` as we go.
        """
        with make_request(self.url, stream=True) as response:
            download_size_in_bytes = int(response.headers.get("Content-Length", -1))

            # Temporary annotation of log size to help set thresholds in bug 1295997.
            newrelic.agent.add_custom_attribute("unstructured_log_size", download_size_in_bytes)
            newrelic.agent.add_custom_attribute(
                "unstructured_log_encoding", response.headers.get("Content-Encoding", "None")
            )

            if download_size_in_bytes > MAX_DOWNLOAD_SIZE_IN_BYTES:
                raise LogSizeError(f"Download size of {download_size_in_bytes} bytes exceeds limit")

            # Lines must be explicitly decoded since `iter_lines()`` returns bytes by default
            # and we cannot use its `decode_unicode=True` mode, since otherwise Unicode newline
            # characters such as `\u0085` (which can appear in test output) are treated the same
            # as `\n` or `\r`, and so split into unwanted additional lines by `iter_lines()`.
            for line in response.iter_lines():
                for builder in self.builders:
                    try:
                        # Using `replace` to prevent malformed unicode (which might possibly exist
                        # in test message output) from breaking parsing of the rest of the log.
                        builder.parse_line(line.decode("utf-8", "replace"))
                    except EmptyPerformanceDataError:
                        logger.warning("We have parsed an empty PERFHERDER_DATA for %s", self.url)

        # gather the artifacts from all builders
        for builder in self.builders:
            # Run end-of-parsing actions for this parser,
            # in case the artifact needs clean-up/summarising.
            builder.finish_parse()
            name = builder.name
            artifact = builder.get_artifact()
            if name == "performance_data" and not artifact[name]:
                continue
            self.artifacts[name] = artifact