utils/tasks/copy-kind-descriptions.py (73 lines of code) (raw):
"""
Reads all of the kinds and their task descriptions and generates:
docs/training/tasks.md
"""
from dataclasses import dataclass
from typing import Any
import yaml
from pathlib import Path
@dataclass
class TaskEntry:
name_full: str
name_base: str
kind_path: Path
description: str
def main() -> None:
print("Generating the kind descriptions documentation.")
root_path = (Path(__file__).parent / "../../").resolve()
skip_kinds = {"docker-image", "tests", "inference"}
# The name base would be "datasets-opus" for "datasets-opus-{src_locale}"
task_entries_by_name_base: dict[str, list[TaskEntry]] = {}
# Load in all of the kinds and extract the descriptions.
for kind_path in (root_path / "taskcluster/kinds").glob("*/kind.yml"):
relative_kind_path = kind_path.relative_to(root_path)
kind_name = kind_path.parent.name
if kind_name in skip_kinds:
print(f"Skipping {kind_path}")
continue
print(relative_kind_path)
with open(kind_path, "r", encoding="utf-8") as f:
data = yaml.safe_load(f)
default_description = data.get("task-defaults", {}).get("description", "")
tasks: dict[str, Any] = data.get("tasks")
if tasks:
for task_name, task_data in tasks.items():
print(f" - {task_name}")
description: str = task_data.get("description", default_description).strip()
if not description:
raise ValueError(
f'The task "{task_name}" in {kind_path} did not have a description'
)
task_name_full = f"{kind_name}-{task_name}"
task_name_base = task_name_full.split("-{", 1)[0]
task_entries = task_entries_by_name_base.get(task_name_base)
if not task_entries:
task_entries = []
task_entries_by_name_base[task_name_base] = task_entries
task_entries.append(
TaskEntry(
name_full=task_name_full,
name_base=task_name_base,
kind_path=relative_kind_path,
description=description,
)
)
docs_relative_path = "docs/training/task-descriptions.md"
print(f"Writing out {docs_relative_path}")
with open(root_path / docs_relative_path, "w", encoding="utf-8") as f:
f.write("<!-- Do not edit, this file is autogenerated by `task build-docs` -->\n")
f.write("# Task Descriptions\n\n")
f.write(
"This page is auto-generated using all of the descriptions of the tasks "
"in the kind.yml files. See [Pipeline Steps](pipeline-steps.md). for more "
"general documentation of the pipeline.\n\n"
)
for task_base_name, task_entries in sorted(task_entries_by_name_base.items()):
for task_entry in task_entries:
# Show the base name of the task unless there are duplicates. In this case
# show the full name and both descriptions.
task_name = (
task_entry.name_base if len(task_entries) == 1 else task_entry.name_full
)
lines = [
"",
f"### [`{task_name}`](https://github.com/mozilla/translations/blob/main/{task_entry.kind_path})"
"",
task_entry.description,
]
f.write("\n".join(lines))
print("Done generating docs")
if __name__ == "__main__":
main()