in scripts/in_container/run_provider_yaml_files_check.py [0:0]
def check_doc_files(yaml_files: dict[str, dict]) -> tuple[int, int]:
num_docs = 0
num_errors = 0
current_doc_urls: list[str] = []
current_logo_urls: list[str] = []
for provider in yaml_files.values():
if "integrations" in provider:
current_doc_urls.extend(
guide
for guides in provider["integrations"]
if "how-to-guide" in guides
for guide in guides["how-to-guide"]
)
current_logo_urls.extend(
integration["logo"] for integration in provider["integrations"] if "logo" in integration
)
if "transfers" in provider:
current_doc_urls.extend(
op["how-to-guide"] for op in provider["transfers"] if "how-to-guide" in op
)
if suspended_providers:
console.print("[yellow]Suspended/Removed providers:[/]")
console.print(suspended_providers)
expected_doc_files = itertools.chain(
AIRFLOW_DOCS_PATH.glob("apache-airflow-providers-*/operators/**/*.rst"),
AIRFLOW_DOCS_PATH.glob("apache-airflow-providers-*/transfer/**/*.rst"),
)
expected_doc_urls = {
f"/docs/{f.relative_to(AIRFLOW_DOCS_PATH).as_posix()}"
for f in expected_doc_files
if f.name != "index.rst"
and "_partials" not in f.parts
and not f.relative_to(AIRFLOW_DOCS_PATH).as_posix().startswith(tuple(suspended_providers))
} | {
f"/docs/{f.relative_to(AIRFLOW_DOCS_PATH).as_posix()}"
for f in AIRFLOW_DOCS_PATH.glob("apache-airflow-providers-*/operators.rst")
if not f.relative_to(AIRFLOW_DOCS_PATH).as_posix().startswith(tuple(suspended_providers))
}
if suspended_logos:
console.print("[yellow]Suspended logos:[/]")
console.print(suspended_logos)
console.print()
expected_logo_urls = {
f"/{f.relative_to(AIRFLOW_DOCS_PATH).as_posix()}"
for f in (AIRFLOW_DOCS_PATH / "integration-logos").rglob("*")
if f.is_file()
and not f"/{f.relative_to(AIRFLOW_DOCS_PATH).as_posix()}".startswith(tuple(suspended_logos))
}
try:
console.print("Checking document urls")
assert_sets_equal(
set(expected_doc_urls),
"Document urls found in airflow/docs",
set(current_doc_urls),
"Document urls configured in provider.yaml files",
)
console.print(f"Checked {len(current_doc_urls)} doc urls")
console.print()
console.print("Checking logo urls")
assert_sets_equal(
set(expected_logo_urls),
"Logo urls found in airflow/docs/integration-logos",
set(current_logo_urls),
"Logo urls configured in provider.yaml files",
)
console.print(f"Checked {len(current_logo_urls)} logo urls")
console.print()
except AssertionError as ex:
nested_error = textwrap.indent(str(ex), " ")
errors.append(
f"Discrepancies between documentation/logos for providers and provider.yaml files "
f"[yellow]How to fix it[/]: Please synchronize the docs/logs.\n{nested_error}"
)
num_errors += 1
return num_docs, num_errors