scripts/generate_api_docs.py (195 lines of code) (raw):
# Copyright (c) Meta Platforms, Inc
"""
Generates MDX (Markdown + JSX, see https://mdxjs.com/) files and sidebar
information for the Docusaurus v2 website from the library components'
docstrings.
We have chosen to take this approach to integrate our API documentation
with Docusaurus because there is no pre-existing robust solution to use
Sphinx output with Docusaurus.
This script will be run by the "documentation" GitHub workflow on pushes
and pull requests to the main branch. It will function corrrectly from
any working directory.
"""
import errno
import importlib
import inspect
import os
import re
from inspect import isclass, isfunction, ismodule
from typing import Any
import toml
from flowtorch.docs import (
generate_class_markdown,
generate_function_markdown,
generate_module_markdown,
sparse_module_hierarchy,
walk_packages,
)
def module_sidebar(mod_name, items):
return f"{{\n type: 'category',\n label: '{mod_name}',\n \
collapsed: {'true'},\
items: [{', '.join(items)}],\n}}"
def fullname(key, item):
return key + "." + item
def dfs(dict):
sidebar_items = []
for key, val in dict.items():
if len(modules_and_symbols[key][1]) > 0:
items = [f'"api/{symbol_to_article[key]}"'] + [
f'"api/{symbol_to_article[fullname(key, item)]}"'
for item, _ in modules_and_symbols[key][1]
]
else:
items = []
if val != {}:
items.extend(dfs(val))
sidebar_items.append(module_sidebar(key, items))
return sidebar_items
# Generate article markdown files
def generate_markdown(article_name: str, symbol_name: str, entity: Any) -> str:
"""
TODO: Method that inputs an object, extracts signature/docstring,
and formats as markdown
TODO: Method that build index markdown for overview files
The overview for the entire API is a special case
"""
if symbol_name == "":
header = """---
id: overview
sidebar_label: "Overview"
slug: "/api"
---
:::info
These API stubs are generated from Python via a custom script and will filled
out in the future.
:::
"""
return header
# Regular modules/functions
item = {
"id": article_name,
"sidebar_label": "Overview" if ismodule(entity) else symbol_name.split(".")[-1],
"slug": f"/api/{article_name}",
"ref": entity,
}
header = f"""---
id: {item['id']}
sidebar_label: {item['sidebar_label']}
---"""
# Convert symbol to MDX
# Imports for custom styling components
markdown = [
"""import { FontAwesomeIcon } from '@fortawesome/react-fontawesome'
import { faAngleDoubleRight } from '@fortawesome/free-solid-svg-icons'
import PythonClass from "@theme/PythonClass";
import PythonFunction from "@theme/PythonFunction";
import PythonMethod from "@theme/PythonMethod";
import PythonModule from "@theme/PythonModule";
import PythonNavbar from "@theme/PythonNavbar";
"""
]
# Make URL
entity_file = (
entity.__file__ if ismodule(entity) else inspect.getmodule(entity).__file__
)
url = (
config["settings"]["github"]
+ "flowtorch/"
+ entity_file[(len(main_path) + 1) :].replace("\\", "/")
)
# Make navigation bar
markdown.append(f"<PythonNavbar url='{url}'>\n")
navigation = []
symbol_splits = symbol_name.split(".")
for idx in range(len(symbol_splits)):
partial_symbol_name = ".".join(symbol_splits[0 : (idx + 1)])
if idx == len(symbol_splits) - 1:
navigation.append(f"*{symbol_splits[idx]}*")
elif partial_symbol_name in symbol_to_article:
navigation.append(
f"[{symbol_splits[idx]}](/api/{symbol_to_article[partial_symbol_name]})"
)
else:
navigation.append(f"{symbol_splits[idx]}")
markdown.append(
' <FontAwesomeIcon icon={faAngleDoubleRight} size="sm" /> '.join(navigation)
)
markdown.append("\n</PythonNavbar>\n")
# Handle known symbol types
if isclass(entity):
markdown.append(generate_class_markdown(symbol_name, entity))
return "\n".join([header] + markdown)
elif ismodule(entity):
markdown.append(generate_module_markdown(symbol_name, entity))
return "\n".join([header] + markdown)
# Signature for function
elif isfunction(entity):
markdown.append(generate_function_markdown(symbol_name, entity))
return "\n".join([header] + markdown)
# Unknown symbol type
else:
raise ValueError(f"Symbol {symbol_name} has unknown type {type(symbol_object)}")
def search_symbols(config):
# Validate module name to document
assert (
"settings" in config
and "search" in config["settings"]
and (
type(config["settings"]["search"]) is str
or type(config["settings"]["search"]) is list
)
)
# TODO: Try to import module, more validation, etc.
# Construct regular expressions for includes and excludes
# Default include/exclude rules
patterns = {
"include": {"modules": re.compile(r".+"), "symbols": re.compile(r".+")},
"exclude": {"modules": re.compile(r""), "symbols": re.compile(r"")},
}
# Override rules based on configuration file
if "filters" in config:
filters = config["filters"]
for clude, rules in filters.items():
for rule, pattern in rules.items():
if type(pattern) is list:
pattern = "|".join(pattern)
patterns[clude][rule] = re.compile(pattern)
# Read in all modules and symbols
search = config["settings"]["search"]
search = [search] if type(search) is str else search
modules_and_symbols = {}
for modname in set(search):
modules_and_symbols = {**modules_and_symbols, **walk_packages(modname)}
# Apply filtering
# TODO: Would be slightly faster if we applied module filtering inside walk_packages
tmp = {}
for x, y in modules_and_symbols.items():
if (
patterns["include"]["modules"].fullmatch(x) is not None
and patterns["exclude"]["modules"].fullmatch(x) is None
):
new_y1 = [
(a, b)
for a, b in y[1]
if patterns["include"]["symbols"].fullmatch(x + "." + a) is not None
and patterns["exclude"]["symbols"].fullmatch(x + "." + a) is None
]
tmp[x] = (y[0], new_y1)
return tmp
def construct_article_list(modules_and_symbols):
# Construct list of articles (converting symbols to lower-case and collating)
# NOTE: Webservers and Windows machines can't seem to distinguish addresses by
# case...
articles = {}
symbol_to_article = {}
for mod_name, (module, symbols) in modules_and_symbols.items():
if len(symbols):
article_name = mod_name.lower()
# Find a unique name
if article_name in articles:
suffix = 1
while article_name + str(suffix) in articles:
suffix += 1
article_name = article_name + str(suffix)
articles[article_name] = (mod_name, module)
symbol_to_article[mod_name] = article_name
suffix = 0
for symbol_name, symbol in symbols:
full_name = mod_name + "." + symbol_name
article_name = full_name.lower()
# Find a unique name
if article_name in articles:
suffix += 1
while article_name + str(suffix) in articles:
suffix += 1
article_name = article_name + str(suffix)
articles[article_name] = (full_name, symbol)
symbol_to_article[full_name] = article_name
return articles, symbol_to_article
if __name__ == "__main__":
# Load and validate configuration file
import flowtorch
config_path = os.path.join(flowtorch.__path__[0], "../website/documentation.toml")
config = toml.load(config_path)
modules_and_symbols = search_symbols(config)
articles, symbol_to_article = construct_article_list(modules_and_symbols)
# Generate sidebar
# Build hierarchy of modules
hierarchy = sparse_module_hierarchy(modules_and_symbols.keys())
# Create directories if they don't exist
search = config["settings"]["search"]
search = [search] if type(search) is str else search
main_module = importlib.import_module(search[0])
main_path = main_module.__path__[0]
sidebar_path = os.path.join(main_path, config["paths"]["sidebar"])
markdown_path = os.path.join(main_path, config["paths"]["markdown"])
def create_paths(path: str) -> None:
try:
os.makedirs(path)
except OSError as e:
if e.errno != errno.EEXIST:
raise
create_paths(sidebar_path)
create_paths(markdown_path)
with open(
os.path.join(
os.path.join(
main_path,
config["paths"]["sidebar"],
config["paths"]["sidebar_filename"],
)
),
"w",
) as file:
print("module.exports = [\n'api/overview',", file=file)
print(",".join(dfs(hierarchy)), file=file)
print("];", file=file)
# For each article, convert the symbols to markdown, etc.
# TODO: How to handle when there is a symbol called overview?
# Maybe add a key for None instead of "overview"?
articles["overview"] = ("", None)
for article_name, (symbol_name, symbol_object) in articles.items():
with open(
os.path.join(
os.path.join(
main_path, config["paths"]["markdown"], article_name + ".mdx"
)
),
"w",
) as file:
print(
generate_markdown(article_name, symbol_name, symbol_object), file=file
)