def parse_marian_context()

in tracking/translations_parser/parser.py [0:0]


    def parse_marian_context(self, logs_iter: Iterator[tuple[list[tuple[str]], str]]) -> None:
        """
        Looks for Marian context in the first logs lines.
        Returns the first headers and text couple that is not Marian context.
        """
        headers: list[tuple[str]] = []
        # Consume first lines until we get the Marian header
        while ("marian",) not in headers:
            try:
                headers, text = next(logs_iter)
                logger.debug(f"Marian header not found in: headers={headers} text={text.strip()}")
            except StopIteration:
                raise ValueError("Could not find a [marian] entry in the training log.")

        logger.debug(f"Reading Marian version from text={text.strip()}")
        _, version, self.version_hash, self.release_date, *_ = text.split()
        version = version.rstrip(";")
        major, minor = map(int, version.lstrip("v").split(".")[:2])
        self.version = f"{major}.{minor}"
        logger.info(f"Detected Marian version {self.version}")
        if (major, minor) not in SUPPORTED_MARIAN_VERSIONS:
            versions = ", ".join(f"{major}.{minor}" for major, minor in SUPPORTED_MARIAN_VERSIONS)
            logger.warning(
                f"Parsing logs from a non supported Marian version {major}.{minor} "
                f"(supported versions: {versions})."
            )

        logger.debug("Reading Marian run description.")
        desc = []
        for headers, text in logs_iter:
            # Marian headers stops when dumping the configuration
            if ("config",) in headers:
                break
            desc.append(text)
        self.description = " ".join(desc)

        # Try to parse all following config lines as YAML
        logger.debug("Reading Marian configuration.")
        config_yaml = ""
        while ("config",) in headers:
            # Marian incorrectly logs some messages with [config] prefix.
            if "Model is being created" in text or "Loaded model has been created" in text:
                headers, text = next(logs_iter)
                break
            config_yaml += f"{text}\n"
            headers, text = next(logs_iter)
        try:
            self.config["marian"] = yaml.safe_load(config_yaml)
        except Exception as e:
            logger.error(f"Impossible to parse Marian config YAML: {e}")

        # Try to read required extra configuration files when running online from Taskcluster
        self.config.update(self.get_extra_marian_config())