python/moz/l10n/formats/xliff/parse_trans_unit.py (76 lines of code) (raw):

# Copyright Mozilla Foundation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import annotations from collections import defaultdict from collections.abc import Iterator from lxml import etree from ...model import ( Entry, Expression, Markup, Message, Metadata, PatternMessage, VariableRef, ) from .common import attrib_as_metadata, element_as_metadata, pretty_name, xliff_ns from .parse_xcode import parse_xcode_pattern def parse_trans_unit(unit: etree._Element, is_xcode: bool) -> Entry[Message]: id = unit.attrib.get("id", None) if id is None: raise ValueError(f'Missing "id" attribute for <trans-unit>: {unit}') meta = attrib_as_metadata(unit, None, ("id",)) if unit.text and not unit.text.isspace(): raise ValueError(f"Unexpected text in <trans-unit>: {unit.text}") target = None note = None seen: dict[str, int] = defaultdict(int) for el in unit: if isinstance(el, etree._Comment): meta.append(Metadata("comment()", el.text)) else: name = pretty_name(el, el.tag) if name == "target": if target: raise ValueError(f"Duplicate <target> in <trans-unit> {id}: {unit}") target = el meta += attrib_as_metadata(el, "target") elif name == "note" and note is None and el.text: note = el note_attrib = attrib_as_metadata(el, "note") if note_attrib: meta += note_attrib elif el != unit[-1]: # If there are elements after this <note>, # add a marker for its relative position. meta.append(Metadata("note", "")) seen[name] += 1 else: idx = seen[name] + 1 base = f"{name}[{idx}]" if idx > 1 else name meta += element_as_metadata(el, base, True) seen[name] = idx if el.tail and not el.tail.isspace(): raise ValueError(f"Unexpected text in <trans-unit>: {el.tail}") comment = "" if note is None else note.text or "" msg = PatternMessage( [] if target is None else list(parse_pattern(target, is_xcode)) ) return Entry((id,), msg, comment, meta) def parse_pattern( el: etree._Element, is_xcode: bool ) -> Iterator[str | Expression | Markup]: if el.text: if is_xcode: yield from parse_xcode_pattern(el.text) else: yield el.text for child in el: q = etree.QName(child.tag) ns = q.namespace name = q.localname if not ns or ns in xliff_ns else q.text options: dict[str, str | VariableRef] = dict(child.attrib) if name in ("x", "bx", "ex"): yield Markup("standalone", name, options) elif isinstance(child.tag, str): yield Markup("open", name, options) yield from parse_pattern(child, is_xcode) yield Markup("close", name) if child.tail: yield child.tail