python/moz/l10n/formats/webext/parse.py (94 lines of code) (raw):
# Copyright Mozilla Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
from re import compile
from typing import Any
from ...model import (
Comment,
Entry,
Expression,
Message,
Pattern,
PatternMessage,
Resource,
Section,
VariableRef,
)
from ...util.loads import json_linecomment_loads
from .. import Format
placeholder = compile(r"\$([a-zA-Z0-9_@]+)\$|(\$[1-9])|\$(\$+)")
pos_arg = compile(r"\$([1-9])")
def webext_parse(source: str | bytes) -> Resource[Message]:
"""
Parse a messages.json file into a message resource.
Named placeholders are represented as declarations,
with an attribute used for an example, if it's available.
The parsed resource will not include any metadata.
"""
json: dict[str, dict[str, Any]] = json_linecomment_loads(source)
entries: list[Entry[Message] | Comment] = [
Entry(
(key,),
webext_parse_message(msg["message"], msg.get("placeholders", None)),
comment=msg.get("description", ""),
)
for key, msg in json.items()
]
return Resource(Format.webext, [Section((), entries)])
def webext_parse_message(
source: str, placeholders: dict[str, dict[str, str]] | None
) -> PatternMessage:
"""
Parse a single messages.json message.
Named placeholders are represented as declarations,
with an attribute used for an example, if it's available.
"""
ph_data = (
{k.lower(): v for k, v in placeholders.items()}
if placeholders is not None
else {}
)
pattern: Pattern = []
declarations: dict[str, Expression] = {}
pos = 0
for m in placeholder.finditer(source):
text = source[pos : m.start()]
if text:
if pattern and isinstance(pattern[-1], str):
pattern[-1] += text
else:
pattern.append(text)
if m[1]:
# Named placeholder, with content & optional example in placeholders object
ph_key = m[1].lower()
ph = ph_data.get(ph_key, None)
if ph is None:
raise ValueError(f"Missing placeholders entry for {ph_key}")
elif "_name" in ph:
ph_name = ph["_name"]
else:
decl_src = ph["content"]
decl_arg_match = pos_arg.fullmatch(decl_src)
decl_value = (
Expression(
VariableRef(f"arg{decl_arg_match[1]}"),
attributes={"source": decl_src},
)
if decl_arg_match
else Expression(decl_src)
)
if "example" in ph:
decl_value.attributes["example"] = ph["example"]
ph_name = m[1].replace("@", "_")
if ph_name[0].isdigit():
ph_name = f"_{ph_name}"
declarations[ph_name] = decl_value
ph["_name"] = ph_name
exp = Expression(VariableRef(ph_name), attributes={"source": m[0]})
pattern.append(exp)
elif m[2]:
# Indexed placeholder
ph_src = m[2]
pattern.append(
Expression(
VariableRef(f"arg{ph_src[1]}"), attributes={"source": ph_src}
)
)
else:
# Escaped literal dollar sign
if pattern and isinstance(pattern[-1], str):
pattern[-1] += m[3]
else:
pattern.append(m[3])
pos = m.end()
if pos < len(source):
rest = source[pos:]
if pattern and isinstance(pattern[-1], str):
pattern[-1] += rest
else:
pattern.append(rest)
return PatternMessage(pattern, declarations)