def xliff_parse()

in python/moz/l10n/formats/xliff/parse.py [0:0]


def xliff_parse(source: str | bytes) -> Resource[Message]:
    """
    Parse an XLIFF 1.2 file into a message resource.

    Sections identify files and groups within them,
    with the first identifier part parsed as the <file> "original" attribute,
    and later parts as <group> "id" attributes.

    An entry's value represents the <target> of a <trans-unit>,
    and its comment the first <note>.
    Other elements and attributes are represented by metadata.

    Metadata keys encode XML element data, using XPath expressions as keys.
    """
    root = etree.fromstring(source.encode() if isinstance(source, str) else source)
    version = root.attrib.get("version", None)
    if version not in ("1.0", "1.1", "1.2"):
        raise ValueError(f"Unsupported <xliff> version: {version}")
    ns = root.nsmap.get(None, "")
    if ns:
        if ns in xliff_ns:
            ns = f"{{{ns}}}"
        else:
            raise ValueError(f"Unsupported namespace: {ns}")

    if root.tag != f"{ns}xliff":
        raise ValueError(f"Unsupported root node: {root}")
    if root.text and not root.text.isspace():
        raise ValueError(f"Unexpected text in <xliff>: {root.text}")

    res: Resource[Message] = Resource(Format.xliff, [])
    root_comments = [
        c.text for c in root.itersiblings(etree.Comment, preceding=True) if c.text
    ]
    if root_comments:
        root_comments.reverse()
        res.comment = comment_str(root_comments)
    res.meta = attrib_as_metadata(root)
    for key, uri in root.nsmap.items():
        res.meta.append(Metadata(f"@xmlns:{key}" if key else "@xmlns", uri))

    comment: list[str] = []
    for file in root:
        if file.tail and not file.tail.isspace():
            raise ValueError(f"Unexpected text in <xliff>: {file.tail}")
        if isinstance(file, etree._Comment):
            comment.append(file.text)
        elif file.tag == f"{ns}file":
            file_name = file.attrib.get("original", None)
            if file_name is None:
                raise ValueError(f'Missing "original" attribute for <file>: {file}')
            meta = attrib_as_metadata(file, None, ("original",))
            entries: list[Entry[Message] | Comment] = []
            body = None
            for child in file:
                if isinstance(child, etree._Comment):
                    entries.append(Comment(comment_str(child.text)))
                elif child.tag == f"{ns}header":
                    meta += element_as_metadata(child, "header", True)
                elif child.tag == f"{ns}body":
                    if body:
                        raise ValueError(f"Duplicate <body> in <file>: {file}")
                    body = child
                else:
                    raise ValueError(
                        f"Unsupported <{child.tag}> element in <file>: {file}"
                    )
                if child.tail and not child.tail.isspace():
                    raise ValueError(f"Unexpected text in <file>: {child.tail}")

            section = Section((file_name,), entries, meta=meta)
            if comment:
                section.comment = comment_str(comment)
                comment.clear()
            res.sections.append(section)

            if body is None:
                raise ValueError(f"Missing <body> in <file>: {file}")
            elif body.text and not body.text.isspace():
                raise ValueError(f"Unexpected text in <body>: {body.text}")

            is_xcode = xcode_tool_id in meta
            if is_xcode and file_name.endswith(".stringsdict"):
                plural_entries = parse_xliff_stringsdict(ns, body)
                if plural_entries is not None:
                    entries += cast(
                        List[Union[Entry[Message], Comment]], plural_entries
                    )
                    continue

            for unit in body:
                if isinstance(unit, etree._Comment):
                    entries.append(Comment(comment_str(unit.text)))
                elif unit.tag == f"{ns}trans-unit":
                    entries.append(parse_trans_unit(unit, is_xcode))
                elif unit.tag == f"{ns}bin-unit":
                    entries.append(parse_bin_unit(unit))
                elif unit.tag == f"{ns}group":
                    res.sections += parse_group(ns, [file_name], unit, is_xcode)
                else:
                    raise ValueError(
                        f"Unsupported <{unit.tag}> element in <body>: {body}"
                    )
                if unit.tail and not unit.tail.isspace():
                    raise ValueError(f"Unexpected text in <body>: {unit.tail}")
    return res