in python/moz/l10n/formats/__init__.py [0:0]
def detect_format(name: str | None, source: bytes | str | None = None) -> Format | None:
"""
Detect the format of the input based on its file extension
and/or contents.
Returns a `Format` enum value, or `None` if the input is not recognized.
Without `source`, JSON and XML based formats are
only recognized if they have a distinctive file extension.
"""
if not name:
ext = None
else:
_, ext = splitext(name)
if ext == ".dtd":
return Format.dtd
elif ext == ".ftl":
return Format.fluent
elif ext == ".inc":
return Format.inc
elif ext == ".ini":
return Format.ini
elif ext == ".properties":
return Format.properties
elif ext in {".po", ".pot"}:
return Format.po
elif ext in {".xlf", ".xliff"}:
return Format.xliff
if source is None:
return None
# Try parsing as JSON first, unless we're pretty sure it's XML
if ext != ".xml":
try:
json: dict[str, Any] = loads(source)
if not is_object_of_strings(json):
return None
if all(is_webext_message(m) for m in json.values()):
return Format.webext
return Format.plain_json
except JSONDecodeError:
pass
try:
json = json_linecomment_loads(source)
if is_object_of_strings(json) and all(
is_webext_message(m) for m in json.values()
):
return Format.webext
return None
except JSONDecodeError:
pass
# Let's presume the input is XML and look at its root node.
try:
from lxml.etree import LxmlError, iterparse
bs = source.encode() if isinstance(source, str) else source
_, xml_root = next(iterparse(BytesIO(bs), events=("start",)))
ns = xml_root.nsmap.get(None, None)
if ns:
return Format.xliff if ns in xliff_ns else None
return Format.android if xml_root.tag == "resources" else None
except ImportError:
pass
except LxmlError:
# Must be separate and after ImportError
pass
return None