python/moz/l10n/formats/properties/serialize.py (99 lines of code) (raw):
# Copyright Mozilla Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
from re import Match, compile
from typing import Any, Callable, Iterator, Literal
from ...model import Entry, Message, PatternMessage, Resource
control_chars = compile(r"[\x00-\x19\x5C\x7F-\x9F]")
not_ascii_printable_chars = compile(r"[^\x20-\x5B\x5D-\x7E]")
special_key_trans = str.maketrans({" ": "\\ ", ":": "\\:", "=": "\\="})
def encode_char(m: Match[str]) -> str:
ch = m.group()
if ch == "\\":
return r"\\"
elif ch == "\t":
return r"\t"
elif ch == "\n":
return r"\n"
elif ch == "\f":
return r"\f"
elif ch == "\r":
return r"\r"
return f"\\u{ord(ch):04x}"
def escape_chars(value: str, ensure_ascii: bool) -> str:
return (
control_chars.sub(encode_char, value)
if not ensure_ascii
else not_ascii_printable_chars.sub(encode_char, value)
)
def fix_outer_spaces(value: str) -> str:
if value[0:1].isspace():
value = "\\" + value
if value.endswith(" ") and not value.endswith("\\ "):
value = value[:-1] + "\\u0020"
return value
def properties_serialize(
resource: Resource[str] | Resource[Message],
encoding: Literal["iso-8859-1", "utf-8", "utf-16"] = "utf-8",
serialize_message: Callable[[Message], str] | None = None,
trim_comments: bool = False,
) -> Iterator[str]:
"""
Serialize a resource as the contents of a .properties file.
Section identifiers will be prepended to their constituent message identifiers.
Multi-part message identifiers will be joined with `.` between each part.
For non-string message values, a `serialize_message` callable must be provided.
Metadata is not supported.
Yields each entry, comment, and empty line separately.
Re-parsing a serialized .properties file is not guaranteed to result in the same Resource,
as the serialization may lose information about message sections and metadata.
"""
ensure_ascii = encoding == "iso-8859-1"
at_empty_line = True
def comment(comment: str, meta: Any, standalone: bool) -> Iterator[str]:
nonlocal at_empty_line
if trim_comments:
return
if meta:
raise ValueError("Metadata is not supported")
if comment:
if standalone and not at_empty_line:
yield "\n"
for line in comment.strip("\n").split("\n"):
if not line or line.isspace():
yield "#\n"
else:
line = line.rstrip() + "\n"
yield f"#{line}" if line.startswith("#") else f"# {line}"
if standalone:
yield "\n"
at_empty_line = True
yield from comment(resource.comment, resource.meta, True)
for section in resource.sections:
yield from comment(section.comment, section.meta, True)
id_prefix = ".".join(section.id) + "." if section.id else ""
for entry in section.entries:
if isinstance(entry, Entry):
yield from comment(entry.comment, entry.meta, False)
key = id_prefix + ".".join(entry.id)
key = escape_chars(key, ensure_ascii)
key = key.translate(special_key_trans)
value: str
msg = entry.value
try:
if isinstance(msg, str):
value = msg
elif serialize_message:
value = serialize_message(msg)
value = escape_chars(value, ensure_ascii)
else:
value = properties_serialize_message(
msg, ensure_ascii=ensure_ascii
)
except Exception as err:
raise ValueError(f"Error serializing {key}") from err
yield f"{key} = {fix_outer_spaces(value)}\n" if value else f"{key} =\n"
at_empty_line = False
else:
yield from comment(entry.comment, None, True)
def properties_serialize_message(msg: Message, *, ensure_ascii: bool = False) -> str:
"""
Serialize a message value in its .properties representation.
If `ensure_ascii` is set, all non-ASCII characters will be escaped.
Non-string pattern parts must have a string `source` attribute.
"""
if not isinstance(msg, PatternMessage):
raise ValueError(f"Unsupported message: {msg}")
msgstr = ""
for part in msg.pattern:
if isinstance(part, str):
msgstr += part
else:
partsrc = part.attributes.get("source", None)
if not isinstance(partsrc, str):
raise ValueError(f"Unsupported message part: {part}")
msgstr += partsrc
return fix_outer_spaces(escape_chars(msgstr, ensure_ascii))