python/moz/l10n/formats/fluent/serialize.py (263 lines of code) (raw):
# Copyright Mozilla Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
from collections.abc import Callable
from re import fullmatch
from typing import Any, Iterator
from fluent.syntax import FluentSerializer
from fluent.syntax import ast as ftl
from ...model import (
CatchallKey,
Comment,
Entry,
Expression,
Message,
Metadata,
Pattern,
PatternMessage,
Resource,
Section,
SelectMessage,
VariableRef,
)
def fluent_serialize(
resource: (Resource[str] | Resource[Message]),
serialize_metadata: Callable[[Metadata], str | None] | None = None,
trim_comments: bool = False,
) -> Iterator[str]:
"""
Serialize a resource as the contents of a Fluent FTL file.
Section identifiers are not supported.
Single-part message identifiers are treated as message values,
while two-part message identifiers are considered message attributes.
Function names are upper-cased, and expressions using the `message` function
are mapped to message and term references.
Yields each entry and comment separately.
If the resource includes any metadata, a `serialize_metadata` callable must be provided
to map each field into a comment value, or to discard it by returning an empty value.
"""
ftl_ast = fluent_astify(resource, serialize_metadata, trim_comments)
serializer = FluentSerializer()
nl_prefix = 0
for entry in ftl_ast.body:
yield serializer.serialize_entry(entry, nl_prefix)
if not nl_prefix:
nl_prefix = 1
def fluent_astify(
resource: (Resource[str] | Resource[Message]),
serialize_metadata: Callable[[Metadata], str | None] | None = None,
trim_comments: bool = False,
) -> ftl.Resource:
"""
Transform a resource into a corresponding Fluent AST structure.
Section identifiers are not supported.
Single-part message identifiers are treated as message values,
while two-part message identifiers are considered message attributes.
Function names are upper-cased, and annotations with the `message` function
are mapped to message and term references.
If the resource includes any metadata other than a string resource `info` value,
a `serialize_metadata` callable must be provided
to map each field into a comment value, or to discard it by returning an empty value.
"""
def comment(
node: (Resource[Any] | Section[Any] | Entry[Any] | Comment),
) -> str:
if trim_comments:
return ""
cs = node.comment.rstrip()
if not isinstance(node, Comment) and node.meta:
if not serialize_metadata:
raise ValueError("Metadata requires serialize_metadata parameter")
for field in node.meta:
if (
isinstance(node, Resource)
and field.key == "info"
and field == node.meta[0]
):
continue
meta_str = serialize_metadata(field)
if meta_str:
ms = meta_str.strip("\n")
cs = f"{cs}\n{ms}" if cs else ms
return cs
body: list[ftl.EntryType] = []
res_info = resource.meta[0] if resource.meta else None
if (
not trim_comments
and res_info
and res_info.key == "info"
and isinstance(res_info.value, str)
and res_info.value
):
body.append(ftl.Comment(res_info.value))
res_comment = resource.comment.rstrip()
else:
res_comment = comment(resource)
if res_comment:
body.append(ftl.ResourceComment(res_comment))
for idx, section in enumerate(resource.sections):
section_comment = comment(section) # type: ignore[arg-type]
if (not trim_comments and idx != 0) or section_comment:
body.append(ftl.GroupComment(section_comment))
cur: ftl.Message | ftl.Term | None = None
cur_id = ""
for entry in section.entries: # type: ignore[attr-defined]
if isinstance(entry, Comment):
if not trim_comments:
body.append(ftl.Comment(entry.comment))
cur = None
else:
value = fluent_astify_message(entry.value)
entry_comment = comment(entry)
if len(entry.id) == 1: # value
cur_id = entry.id[0]
cur = (
ftl.Term(ftl.Identifier(cur_id[1:]), value)
if cur_id.startswith("-")
else ftl.Message(ftl.Identifier(cur_id), value)
)
if entry_comment:
cur.comment = ftl.Comment(entry_comment)
body.append(cur)
elif len(entry.id) == 2: # attribute
if cur is None or entry.id[0] != cur_id:
cur_id = entry.id[0]
if cur_id.startswith("-"):
value = ftl.Pattern([ftl.Placeable(ftl.StringLiteral(""))])
cur = ftl.Term(ftl.Identifier(cur_id[1:]), value)
else:
cur = ftl.Message(ftl.Identifier(cur_id))
if entry_comment:
cur.comment = ftl.Comment(entry_comment)
body.append(cur)
elif entry_comment:
attr_comment = f"{entry.id[1]}:\n{entry_comment}"
if cur.comment:
cur.comment.content = (
str(cur.comment.content) + "\n\n" + attr_comment
)
else:
cur.comment = ftl.Comment(attr_comment)
cur.attributes.append(
ftl.Attribute(ftl.Identifier(entry.id[1]), value)
)
else:
raise ValueError(f"Unsupported message id: {entry.id}")
return ftl.Resource(body)
def fluent_astify_message(message: str | Message) -> ftl.Pattern:
"""
Transform a message into a corresponding Fluent AST pattern.
Function names are upper-cased, and expressions using the `message` function
are mapped to message and term references.
"""
if isinstance(message, str):
return ftl.Pattern([ftl.TextElement(message)])
if not isinstance(message, (PatternMessage, SelectMessage)):
raise ValueError(f"Unsupported message: {message}")
if isinstance(message, PatternMessage):
return flat_pattern(message.declarations, message.pattern)
# It gets a bit complicated for SelectMessage. We'll be modifying this list,
# building select expressions for each selector starting from the last one
# until this list has only one entry `[[], pattern]`.
#
# We rely on the variants being in order, so that a variant with N keys
# will be next to all other variants for which the first N-1 keys are equal.
variants = [
(list(keys), flat_pattern(message.declarations, value))
for keys, value in message.variants.items()
]
other = fallback_name(message)
keys0 = variants[0][0]
while keys0:
selector = value(message.declarations, message.selectors[len(keys0) - 1])
if (
isinstance(selector, ftl.FunctionReference)
and selector.id.name == "NUMBER"
and selector.arguments.positional
and isinstance(selector.arguments.positional[0], ftl.VariableReference)
and not selector.arguments.named
):
selector = selector.arguments.positional[0]
base_keys = []
sel_exp = None
i = 0
while i < len(variants):
keys, pattern = variants[i]
key = keys.pop() # Ultimately modifies keys0
ftl_variant = ftl.Variant(
variant_key(key, other), pattern, isinstance(key, CatchallKey)
)
if sel_exp and keys == base_keys:
sel_exp.variants.append(ftl_variant)
variants.pop(i)
else:
base_keys = keys
sel_exp = ftl.SelectExpression(selector.clone(), [ftl_variant])
variants[i] = (keys, ftl.Pattern([ftl.Placeable(sel_exp)]))
i += 1
if len(variants) != 1:
raise ValueError(f"Error resolving select message variants (n={len(variants)})")
return variants[0][1]
def fallback_name(message: SelectMessage) -> str:
"""
Try `other`, `other1`, `other2`, ... until a free one is found.
"""
i = 0
key = root = "other"
while any(
key == (k.value if isinstance(k, CatchallKey) else k)
for keys in message.variants
for k in keys
):
i += 1
key = f"{root}{i}"
return key
def variant_key(
key: str | CatchallKey, other: str
) -> ftl.NumberLiteral | ftl.Identifier:
kv = key.value or other if isinstance(key, CatchallKey) else key
try:
float(kv)
return ftl.NumberLiteral(kv)
except Exception:
if fullmatch(r"[a-zA-Z][\w-]*", kv):
return ftl.Identifier(kv)
raise ValueError(f"Unsupported variant key: {kv}")
def flat_pattern(decl: dict[str, Expression], pattern: Pattern) -> ftl.Pattern:
elements: list[ftl.TextElement | ftl.Placeable] = []
for el in pattern:
if isinstance(el, str):
elements.append(ftl.TextElement(el))
elif isinstance(el, Expression):
elements.append(ftl.Placeable(expression(decl, el)))
else:
raise ValueError(f"Conversion to Fluent not supported: {el}")
return ftl.Pattern(elements)
def expression(
decl: dict[str, Expression], expr: Expression, decl_name: str = ""
) -> ftl.InlineExpression:
arg = value(decl, expr.arg, decl_name) if expr.arg is not None else None
if expr.function:
return function_ref(decl, arg, expr.function, expr.options)
elif expr.function:
raise ValueError("Unsupported annotations are not supported")
if arg:
return arg
raise ValueError("Invalid empty expression")
def function_ref(
decl: dict[str, Expression],
arg: ftl.InlineExpression | None,
function: str,
options: dict[str, str | VariableRef],
) -> ftl.InlineExpression:
named: list[ftl.NamedArgument] = []
for name, val in options.items():
ftl_val = value(decl, val)
if isinstance(ftl_val, ftl.Literal):
named.append(ftl.NamedArgument(ftl.Identifier(name), ftl_val))
else:
raise ValueError(f"Fluent option value not literal for {name}: {ftl_val}")
if function == "string":
if not arg:
raise ValueError("Argument required for :string")
if named:
raise ValueError("Options on :string are not supported")
return arg
if function == "number" and isinstance(arg, ftl.NumberLiteral) and not named:
return arg
if function == "message":
if not isinstance(arg, ftl.Literal):
raise ValueError(
"Message and term references must have a literal message identifier"
)
match = fullmatch(r"(-?[a-zA-Z][\w-]*)(?:\.([a-zA-Z][\w-]*))?", arg.value)
if not match:
raise ValueError(f"Invalid message or term identifier: {arg.value}")
msg_id = match[1]
msg_attr = match[2]
attr = ftl.Identifier(msg_attr) if msg_attr else None
if msg_id[0] == "-":
args = ftl.CallArguments(named=named) if named else None
return ftl.TermReference(ftl.Identifier(msg_id[1:]), attr, args)
elif named:
raise ValueError("Options on message references are not supported")
else:
return ftl.MessageReference(ftl.Identifier(msg_id), attr)
args = ftl.CallArguments([arg] if arg else None, named)
return ftl.FunctionReference(ftl.Identifier(function.upper()), args)
# Non-printable ASCII C0 & C1 / Unicode Cc characters
esc_cc = {n: f"\\u{n:04X}" for r in (range(0, 32), range(127, 160)) for n in r}
def value(
decl: dict[str, Expression], val: str | VariableRef, decl_name: str = ""
) -> ftl.InlineExpression:
if isinstance(val, str):
try:
float(val)
return ftl.NumberLiteral(val)
except Exception:
return ftl.StringLiteral(val.translate(esc_cc))
elif val.name != decl_name and val.name in decl:
return expression(decl, decl[val.name], val.name)
else:
return ftl.VariableReference(ftl.Identifier(val.name))