in python/moz/l10n/formats/xliff/parse.py [0:0]
def xliff_parse(source: str | bytes) -> Resource[Message]:
"""
Parse an XLIFF 1.2 file into a message resource.
Sections identify files and groups within them,
with the first identifier part parsed as the <file> "original" attribute,
and later parts as <group> "id" attributes.
An entry's value represents the <target> of a <trans-unit>,
and its comment the first <note>.
Other elements and attributes are represented by metadata.
Metadata keys encode XML element data, using XPath expressions as keys.
"""
root = etree.fromstring(source.encode() if isinstance(source, str) else source)
version = root.attrib.get("version", None)
if version not in ("1.0", "1.1", "1.2"):
raise ValueError(f"Unsupported <xliff> version: {version}")
ns = root.nsmap.get(None, "")
if ns:
if ns in xliff_ns:
ns = f"{{{ns}}}"
else:
raise ValueError(f"Unsupported namespace: {ns}")
if root.tag != f"{ns}xliff":
raise ValueError(f"Unsupported root node: {root}")
if root.text and not root.text.isspace():
raise ValueError(f"Unexpected text in <xliff>: {root.text}")
res: Resource[Message] = Resource(Format.xliff, [])
root_comments = [
c.text for c in root.itersiblings(etree.Comment, preceding=True) if c.text
]
if root_comments:
root_comments.reverse()
res.comment = comment_str(root_comments)
res.meta = attrib_as_metadata(root)
for key, uri in root.nsmap.items():
res.meta.append(Metadata(f"@xmlns:{key}" if key else "@xmlns", uri))
comment: list[str] = []
for file in root:
if file.tail and not file.tail.isspace():
raise ValueError(f"Unexpected text in <xliff>: {file.tail}")
if isinstance(file, etree._Comment):
comment.append(file.text)
elif file.tag == f"{ns}file":
file_name = file.attrib.get("original", None)
if file_name is None:
raise ValueError(f'Missing "original" attribute for <file>: {file}')
meta = attrib_as_metadata(file, None, ("original",))
entries: list[Entry[Message] | Comment] = []
body = None
for child in file:
if isinstance(child, etree._Comment):
entries.append(Comment(comment_str(child.text)))
elif child.tag == f"{ns}header":
meta += element_as_metadata(child, "header", True)
elif child.tag == f"{ns}body":
if body:
raise ValueError(f"Duplicate <body> in <file>: {file}")
body = child
else:
raise ValueError(
f"Unsupported <{child.tag}> element in <file>: {file}"
)
if child.tail and not child.tail.isspace():
raise ValueError(f"Unexpected text in <file>: {child.tail}")
section = Section((file_name,), entries, meta=meta)
if comment:
section.comment = comment_str(comment)
comment.clear()
res.sections.append(section)
if body is None:
raise ValueError(f"Missing <body> in <file>: {file}")
elif body.text and not body.text.isspace():
raise ValueError(f"Unexpected text in <body>: {body.text}")
is_xcode = xcode_tool_id in meta
if is_xcode and file_name.endswith(".stringsdict"):
plural_entries = parse_xliff_stringsdict(ns, body)
if plural_entries is not None:
entries += cast(
List[Union[Entry[Message], Comment]], plural_entries
)
continue
for unit in body:
if isinstance(unit, etree._Comment):
entries.append(Comment(comment_str(unit.text)))
elif unit.tag == f"{ns}trans-unit":
entries.append(parse_trans_unit(unit, is_xcode))
elif unit.tag == f"{ns}bin-unit":
entries.append(parse_bin_unit(unit))
elif unit.tag == f"{ns}group":
res.sections += parse_group(ns, [file_name], unit, is_xcode)
else:
raise ValueError(
f"Unsupported <{unit.tag}> element in <body>: {body}"
)
if unit.tail and not unit.tail.isspace():
raise ValueError(f"Unexpected text in <body>: {unit.tail}")
return res