"""
Reads all of the kinds and their task descriptions and generates:
docs/training/tasks.md
"""

from dataclasses import dataclass
from typing import Any
import yaml
from pathlib import Path


@dataclass
class TaskEntry:
    name_full: str
    name_base: str
    kind_path: Path
    description: str


def main() -> None:
    print("Generating the kind descriptions documentation.")

    root_path = (Path(__file__).parent / "../../").resolve()
    skip_kinds = {"docker-image", "tests", "inference"}

    # The name base would be "datasets-opus" for "datasets-opus-{src_locale}"
    task_entries_by_name_base: dict[str, list[TaskEntry]] = {}

    # Load in all of the kinds and extract the descriptions.
    for kind_path in (root_path / "taskcluster/kinds").glob("*/kind.yml"):
        relative_kind_path = kind_path.relative_to(root_path)
        kind_name = kind_path.parent.name

        if kind_name in skip_kinds:
            print(f"Skipping {kind_path}")
            continue

        print(relative_kind_path)
        with open(kind_path, "r", encoding="utf-8") as f:
            data = yaml.safe_load(f)

        default_description = data.get("task-defaults", {}).get("description", "")

        tasks: dict[str, Any] = data.get("tasks")
        if tasks:
            for task_name, task_data in tasks.items():
                print(f"  - {task_name}")
                description: str = task_data.get("description", default_description).strip()
                if not description:
                    raise ValueError(
                        f'The task "{task_name}" in {kind_path} did not have a description'
                    )
                task_name_full = f"{kind_name}-{task_name}"
                task_name_base = task_name_full.split("-{", 1)[0]

                task_entries = task_entries_by_name_base.get(task_name_base)
                if not task_entries:
                    task_entries = []
                    task_entries_by_name_base[task_name_base] = task_entries

                task_entries.append(
                    TaskEntry(
                        name_full=task_name_full,
                        name_base=task_name_base,
                        kind_path=relative_kind_path,
                        description=description,
                    )
                )

    docs_relative_path = "docs/training/task-descriptions.md"
    print(f"Writing out {docs_relative_path}")
    with open(root_path / docs_relative_path, "w", encoding="utf-8") as f:
        f.write("<!-- Do not edit, this file is autogenerated by `task build-docs` -->\n")
        f.write("# Task Descriptions\n\n")
        f.write(
            "This page is auto-generated using all of the descriptions of the tasks "
            "in the kind.yml files. See [Pipeline Steps](pipeline-steps.md). for more "
            "general documentation of the pipeline.\n\n"
        )

        for task_base_name, task_entries in sorted(task_entries_by_name_base.items()):
            for task_entry in task_entries:
                # Show the base name of the task unless there are duplicates. In this case
                # show the full name and both descriptions.
                task_name = (
                    task_entry.name_base if len(task_entries) == 1 else task_entry.name_full
                )
                lines = [
                    "",
                    f"### [`{task_name}`](https://github.com/mozilla/translations/blob/main/{task_entry.kind_path})"
                    "",
                    task_entry.description,
                ]
                f.write("\n".join(lines))

    print("Done generating docs")


if __name__ == "__main__":
    main()
