python/moz/l10n/formats/po/parse.py (57 lines of code) (raw):
# Copyright Mozilla Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
from polib import pofile
from ...model import (
CatchallKey,
Comment,
Entry,
Expression,
Message,
Metadata,
PatternMessage,
Resource,
Section,
SelectMessage,
VariableRef,
)
from .. import Format
def po_parse(source: str | bytes) -> Resource[Message]:
"""
Parse a .po or .pot file into a message resource
Message identifiers may have one or two parts,
with the second one holding the optional message context.
Messages may include the following metadata:
- `translator-comments`
- `extracted-comments`
- `reference`: `f"{file}:{line}"`, separately for each reference
- `obsolete`: `""`
- `flag`: separately for each flag
- `plural`
"""
pf = pofile(source if isinstance(source, str) else source.decode())
res_comment = pf.header.lstrip("\n").rstrip()
res_meta: list[Metadata] = [
Metadata(key, value.strip()) for key, value in pf.metadata.items()
]
entries: list[Entry[Message] | Comment] = []
for pe in pf:
meta: list[Metadata] = []
if pe.tcomment:
meta.append(Metadata("translator-comments", pe.tcomment))
if pe.comment:
meta.append(Metadata("extracted-comments", pe.comment))
for file, line in pe.occurrences:
meta.append(Metadata("reference", f"{file}:{line}"))
if pe.obsolete:
meta.append(Metadata("obsolete", "true"))
for flag in pe.flags:
meta.append(Metadata("flag", flag))
if pe.msgid_plural:
meta.append(Metadata("plural", pe.msgid_plural))
if pe.msgstr_plural:
keys = list(pe.msgstr_plural)
keys.sort()
sel = Expression(VariableRef("n"), "number")
max_idx = keys[-1]
value: Message = SelectMessage(
declarations={"n": sel},
selectors=(VariableRef("n"),),
variants={
(str(idx) if idx < max_idx else CatchallKey(str(idx)),): (
[pe.msgstr_plural[idx]] if idx in pe.msgstr_plural else []
)
for idx in range(max_idx + 1)
},
)
else:
value = PatternMessage([pe.msgstr])
id = (pe.msgid, pe.msgctxt) if pe.msgctxt else (pe.msgid,)
entries.append(Entry(id, value, meta=meta))
return Resource(Format.po, [Section((), entries)], res_comment, res_meta)