# Copyright Mozilla Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

from collections import defaultdict
from collections.abc import Iterator
from re import compile
from typing import List, Union, cast

from lxml import etree

from ...model import (
    Comment,
    Entry,
    Expression,
    Message,
    Metadata,
    PatternMessage,
    Resource,
    Section,
)
from .. import Format
from .common import (
    attrib_as_metadata,
    element_as_metadata,
    pretty_name,
    xcode_tool_id,
    xliff_ns,
)
from .parse_trans_unit import parse_pattern, parse_trans_unit
from .parse_xcode import parse_xliff_stringsdict


def xliff_parse(source: str | bytes) -> Resource[Message]:
    """
    Parse an XLIFF 1.2 file into a message resource.

    Sections identify files and groups within them,
    with the first identifier part parsed as the <file> "original" attribute,
    and later parts as <group> "id" attributes.

    An entry's value represents the <target> of a <trans-unit>,
    and its comment the first <note>.
    Other elements and attributes are represented by metadata.

    Metadata keys encode XML element data, using XPath expressions as keys.
    """
    root = etree.fromstring(source.encode() if isinstance(source, str) else source)
    version = root.attrib.get("version", None)
    if version not in ("1.0", "1.1", "1.2"):
        raise ValueError(f"Unsupported <xliff> version: {version}")
    ns = root.nsmap.get(None, "")
    if ns:
        if ns in xliff_ns:
            ns = f"{{{ns}}}"
        else:
            raise ValueError(f"Unsupported namespace: {ns}")

    if root.tag != f"{ns}xliff":
        raise ValueError(f"Unsupported root node: {root}")
    if root.text and not root.text.isspace():
        raise ValueError(f"Unexpected text in <xliff>: {root.text}")

    res: Resource[Message] = Resource(Format.xliff, [])
    root_comments = [
        c.text for c in root.itersiblings(etree.Comment, preceding=True) if c.text
    ]
    if root_comments:
        root_comments.reverse()
        res.comment = comment_str(root_comments)
    res.meta = attrib_as_metadata(root)
    for key, uri in root.nsmap.items():
        res.meta.append(Metadata(f"@xmlns:{key}" if key else "@xmlns", uri))

    comment: list[str] = []
    for file in root:
        if file.tail and not file.tail.isspace():
            raise ValueError(f"Unexpected text in <xliff>: {file.tail}")
        if isinstance(file, etree._Comment):
            comment.append(file.text)
        elif file.tag == f"{ns}file":
            file_name = file.attrib.get("original", None)
            if file_name is None:
                raise ValueError(f'Missing "original" attribute for <file>: {file}')
            meta = attrib_as_metadata(file, None, ("original",))
            entries: list[Entry[Message] | Comment] = []
            body = None
            for child in file:
                if isinstance(child, etree._Comment):
                    entries.append(Comment(comment_str(child.text)))
                elif child.tag == f"{ns}header":
                    meta += element_as_metadata(child, "header", True)
                elif child.tag == f"{ns}body":
                    if body:
                        raise ValueError(f"Duplicate <body> in <file>: {file}")
                    body = child
                else:
                    raise ValueError(
                        f"Unsupported <{child.tag}> element in <file>: {file}"
                    )
                if child.tail and not child.tail.isspace():
                    raise ValueError(f"Unexpected text in <file>: {child.tail}")

            section = Section((file_name,), entries, meta=meta)
            if comment:
                section.comment = comment_str(comment)
                comment.clear()
            res.sections.append(section)

            if body is None:
                raise ValueError(f"Missing <body> in <file>: {file}")
            elif body.text and not body.text.isspace():
                raise ValueError(f"Unexpected text in <body>: {body.text}")

            is_xcode = xcode_tool_id in meta
            if is_xcode and file_name.endswith(".stringsdict"):
                plural_entries = parse_xliff_stringsdict(ns, body)
                if plural_entries is not None:
                    entries += cast(
                        List[Union[Entry[Message], Comment]], plural_entries
                    )
                    continue

            for unit in body:
                if isinstance(unit, etree._Comment):
                    entries.append(Comment(comment_str(unit.text)))
                elif unit.tag == f"{ns}trans-unit":
                    entries.append(parse_trans_unit(unit, is_xcode))
                elif unit.tag == f"{ns}bin-unit":
                    entries.append(parse_bin_unit(unit))
                elif unit.tag == f"{ns}group":
                    res.sections += parse_group(ns, [file_name], unit, is_xcode)
                else:
                    raise ValueError(
                        f"Unsupported <{unit.tag}> element in <body>: {body}"
                    )
                if unit.tail and not unit.tail.isspace():
                    raise ValueError(f"Unexpected text in <body>: {unit.tail}")
    return res


def xliff_parse_message(source: str, *, is_xcode: bool = False) -> PatternMessage:
    """
    Parse an XLIFF 1.2 <target> into a message.

    Set `is_xcode=True` to parse XCode-style printf strings as variable references.
    """
    parser = etree.XMLParser(resolve_entities=False)
    el = etree.fromstring(f"<target>{source}</target>", parser)
    return PatternMessage(list(parse_pattern(el, is_xcode)))


def parse_group(
    ns: str, parent: list[str], group: etree._Element, is_xcode: bool
) -> Iterator[Section[Message]]:
    id = group.attrib.get("id", "")
    path = [*parent, id]
    meta = attrib_as_metadata(group, None, ("id",))
    entries: list[Entry[Message] | Comment] = []
    if group.text and not group.text.isspace():
        raise ValueError(f"Unexpected text in <group>: {group.text}")

    # Note that this is modified after being emitted,
    # To ensure that nested groups are ordered by path
    yield Section(tuple(path), entries, meta=meta)

    seen: dict[str, int] = defaultdict(int)
    for unit in group:
        if isinstance(unit, etree._Comment):
            entries.append(Comment(comment_str(unit.text)))
        elif unit.tag == f"{ns}trans-unit":
            entries.append(parse_trans_unit(unit, is_xcode))
        elif unit.tag == f"{ns}bin-unit":
            entries.append(parse_bin_unit(unit))
        elif unit.tag == f"{ns}group":
            yield from parse_group(ns, path, unit, is_xcode)
        else:
            name = pretty_name(unit, unit.tag)
            idx = seen[name] + 1
            unit_base = f"{name}[{idx}]" if idx > 1 else name
            meta += element_as_metadata(unit, unit_base, True)
            seen[name] = idx
        if unit.tail and not unit.tail.isspace():
            raise ValueError(f"Unexpected text in <group>: {unit.tail}")


def parse_bin_unit(unit: etree._Element) -> Entry[Message]:
    id = unit.attrib.get("id", None)
    if id is None:
        raise ValueError(f'Missing "id" attribute for <bin-unit>: {unit}')
    meta = attrib_as_metadata(unit, None, ("id",))
    meta += element_as_metadata(unit, "", False)
    msg = PatternMessage([Expression(None, attributes={"bin-unit": True})])
    return Entry((id,), msg, meta=meta)


dash_indent = compile(r" .+(\n   - .*)+ ")


def comment_str(body: list[str] | str) -> str:
    if isinstance(body, str):
        body = [body]
    lines: list[str] = []
    for comment in body:
        if comment:
            if dash_indent.fullmatch(comment):
                # A dash is considered as a part of the indent if it's aligned
                # with the last dash of <!-- in a top-level comment.
                lines.append(comment.replace("\n   - ", "\n").strip(" "))
            else:
                lines.append(
                    "\n".join(line.strip() for line in comment.splitlines()).strip("\n")
                )
    return "\n\n".join(lines).strip("\n")
