python/moz/l10n/formats/dtd/serialize.py (66 lines of code) (raw):
# Copyright Mozilla Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
from collections.abc import Iterator
from re import UNICODE, compile
from typing import Any
from ...model import Entry, Message, PatternMessage, Resource
from .parse import name, re_comment
re_name = compile(name, UNICODE)
def dtd_serialize(
resource: Resource[str] | Resource[Message],
trim_comments: bool = False,
) -> Iterator[str]:
"""
Serialize a resource as the contents of a DTD file.
Section identifiers will be prepended to their constituent message identifiers.
Multi-part identifiers will be joined with `.` between each part.
Metadata is not supported.
Yields each entity, comment, and empty line separately.
Re-parsing a serialized DTD file is not guaranteed to result in the same Resource,
as the serialization may lose information about message sections and metadata.
"""
at_empty_line = True
def comment(comment: str, meta: Any, standalone: bool) -> Iterator[str]:
nonlocal at_empty_line
if trim_comments:
return
if meta:
raise ValueError("Metadata is not supported")
if comment:
if standalone and not at_empty_line:
yield "\n"
# Comments can't include --, so add a zero width space between and after dashes beyond the first
lines = [
line.rstrip().replace("--", "-\u200b-\u200b")
for line in comment.strip("\n").split("\n")
]
cstr = "<!--" if not lines[0] or lines[0].startswith(" ") else "<!-- "
cstr += lines[0]
for line in lines[1:]:
cstr += "\n"
if line and not line.isspace():
if not line.startswith(" "):
cstr += " "
cstr += line
yield cstr + " -->\n"
if standalone:
yield "\n"
at_empty_line = True
yield from comment(resource.comment, resource.meta, True)
for section in resource.sections:
yield from comment(section.comment, section.meta, True)
id_prefix = ".".join(section.id) + "." if section.id else ""
for entry in section.entries:
if isinstance(entry, Entry):
yield from comment(entry.comment, entry.meta, False)
name = id_prefix + ".".join(entry.id)
if not re_name.fullmatch(name):
raise ValueError(f"Unsupported DTD name: {name}")
msg = entry.value
if isinstance(msg, str):
value = msg
elif isinstance(msg, PatternMessage) and all(
isinstance(p, str) for p in msg.pattern
):
value = "".join(msg.pattern) # type: ignore[arg-type]
else:
raise ValueError(f"Unsupported message for {name}: {msg}")
if '"' in value and "'" not in value:
quoted = f"'{value}'"
else:
quoted = value.replace('"', """)
quoted = f'"{quoted}"'
quoted = re_comment.sub("", quoted)
yield f"<!ENTITY {name} {quoted}>\n"
at_empty_line = False
else:
yield from comment(entry.comment, None, True)