scripts/validate_sphinx.py (60 lines of code) (raw):
#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
from __future__ import annotations
import argparse
import os
import pkgutil
import re
from typing import Set
# Paths are relative to top-level botorch directory (passed as arg below)
SPHINX_RST_PATH = os.path.join("sphinx", "source")
BOTORCH_LIBRARY_PATH = "botorch"
# Regex for automodule directive used in Sphinx docs
AUTOMODULE_REGEX = re.compile(r"\.\. automodule:: ([\.\w]*)")
# The top-level modules in botorch not to be validated
EXCLUDED_MODULES = {"version"}
def parse_rst(rst_filename: str) -> Set[str]:
"""Extract automodule directives from rst."""
ret = set()
with open(rst_filename, "r") as f:
lines = f.readlines()
for line in lines:
line = line.strip()
name = AUTOMODULE_REGEX.findall(line)
if name:
ret.add(name[0])
return ret
def validate_complete_sphinx(path_to_botorch: str) -> None:
"""Validate that Sphinx-based API documentation is complete.
- Every top-level module (e.g., acquisition, models, etc.) should have a
corresponding .rst sphix source file in sphinx/source.
- Every single non-package (i.e. py file) module should be included in an
.rst file `automodule::` directive. Sphinx will then automatically
include all members from the module in the documentation.
Note: this function does not validate any documentation, only its presence.
Args:
path_to_botorch: the path to the top-level botorch directory (directory
that includes botorch library, sphinx, website, etc.).
"""
# Load top-level modules used in botorch (e.g., acquisition, models)
# Exclude auxiliary packages
modules = {
modname
for importer, modname, ispkg in pkgutil.walk_packages(
path=[BOTORCH_LIBRARY_PATH], onerror=lambda x: None
)
if modname not in EXCLUDED_MODULES
}
# Load all rst files (these contain the documentation for Sphinx)
rstpath = os.path.join(path_to_botorch, SPHINX_RST_PATH)
rsts = {f.replace(".rst", "") for f in os.listdir(rstpath) if f.endswith(".rst")}
# Verify that all top-level modules have a corresponding rst
missing_rsts = modules.difference(rsts)
if not len(missing_rsts) == 0:
raise RuntimeError(f"Not all modules have corresponding rst: {missing_rsts}")
# Track all modules that are not in docs (so can print all)
modules_not_in_docs = []
# Iterate over top-level modules
for module in modules.intersection(rsts):
# Parse rst & extract all modules use automodule directive
modules_in_rst = parse_rst(os.path.join(rstpath, module + ".rst"))
# Extract all non-package modules
for _importer, modname, ispkg in pkgutil.walk_packages(
path=[
os.path.join(BOTORCH_LIBRARY_PATH, module)
], # botorch.__path__[0], module),
prefix="botorch." + module + ".",
onerror=lambda x: None,
):
if not ispkg and ".tests" not in modname and modname not in modules_in_rst:
modules_not_in_docs.append(modname)
if not len(modules_not_in_docs) == 0:
raise RuntimeError(f"Not all modules are documented: {modules_not_in_docs}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Validate that Sphinx documentation is complete."
)
parser.add_argument(
"-p",
"--path",
metavar="path",
required=True,
help="Path to the top-level botorch directory.",
)
args = parser.parse_args()
validate_complete_sphinx(args.path)