update-toc.py (121 lines of code) (raw):
# This script is intended for use in intermediate doc repos generated from docs.ms CI.
# Given a reference ToC and a set of namespaces, limit the reference to ToC entries that contain
# namespaces in our set.
import argparse
import os
import fnmatch
import re
import json
import xml.etree.ElementTree as ET
# by default, yaml does not maintain insertion order of the dicts
# given that this is intended to generate TABLE OF CONTENTS values,
# maintaining this order is important.
# The drop-in replacement oyaml is a handy solution for us.
import oyaml as yaml
MONIKER_REPLACEMENTS = ['{moniker}','<moniker>']
class PathResolver:
def __init__(self, doc_repo_location = None, moniker = ""):
self.excluded_href_paths = []
self.target_moniker = moniker
self.doc_repo_location = doc_repo_location
if self.doc_repo_location:
self.excluded_href_paths = self.get_non_standard_hrefs(self.doc_repo_location)
# the doc builds have the capability to reference readmes from external repos (they resolve during publishing)
# this means that we can't simply check the href values for existence. If they are an href that STARTS with one of the
# "dependent repositories" than we should leave them exactly as is.
# amend_href is the core of the logic for handling referenced files and ensures that we cannot refer to the same readme twice
# from two different reference ymls
def amend_href(self, toc_dict):
if not self.doc_repo_location:
return toc_dict
input_string = toc_dict["href"]
# if this is an external readme, we should not attempt to resolve the file to a different one, just return with no changes
if any([input_string.startswith(href) for href in self.excluded_href_paths]):
return toc_dict
# create a resolvable path to the readme on disk, without any of the docs ms specificity
resolvable_path = os.path.normpath(os.path.join(self.doc_repo_location, input_string.replace("~/", "")))
# apply moniker folder adjustments if necessary
if self.target_moniker is not None:
for replacement in MONIKER_REPLACEMENTS:
# input string maintains leading ~/ necessary for docs. update the moniker folder if it exists
input_string = input_string.replace(replacement, self.target_moniker)
# the resolvable path is different from the input_string in that it is actually a resolvable path.
# update it with the moniker folder so we can test for existence of the file
resolvable_path = resolvable_path.replace(replacement, self.target_moniker)
possible_target_readme = os.path.splitext(resolvable_path)[0] + ".md"
if os.path.exists(possible_target_readme):
toc_dict["href"] = input_string
else:
toc_dict.pop("href")
toc_dict["landingPageType"] = "Service"
return toc_dict
# the doc builds have the capability to reference readmes from external repos (they resolve during publishing)
# this means that we can't simply check the href values for existence. If they are an href that STARTS with one of the
# "dependent repositories" than we should leave them exactly as is. This function returns the start paths
def get_non_standard_hrefs(self, doc_repo_location):
excluded_href_paths = []
target = os.path.join(doc_repo_location, ".openpublishing.publish.config.json")
with open(target, "r") as f:
data = json.load(f)
for dependent_repo in data["dependent_repositories"]:
excluded_href_paths.append("~/{}".format(dependent_repo["path_to_root"]))
return excluded_href_paths
def filter_children(targeted_ns_list, known_namespaces):
amended_list = []
for ns in targeted_ns_list:
# also need to handle when the namespace grep is a pattern
# azure-eventhubs* <-- for instance
if ns in known_namespaces:
amended_list.append(ns)
return amended_list
# a post-order recursive function that returns a modified reference.yml
# based on the set of namespaces that we've grabbed from autogenerated ToC.yml
def filter_toc(toc_dict, namespaces, path_resolver):
if toc_dict is None:
return None
# internal node
if "items" in toc_dict:
# recurse as mant times as necessary
item_list = []
for item in toc_dict['items']:
result_n = filter_toc(item, namespaces, path_resolver)
# only append the result if we know it exists
if result_n:
item_list.append(result_n)
if item_list:
toc_dict["items"] = item_list
else:
return None
# handle href
if "href" in toc_dict:
toc_dict = path_resolver.amend_href(toc_dict)
# leaf node
if "children" in toc_dict:
filtered_children = filter_children(toc_dict["children"], namespaces)
# if we filter out all the children, this node should simply cease to exist
if not filtered_children:
return None
elif "href" not in toc_dict and "items" not in toc_dict:
return None
return toc_dict
def grep_children_namespaces(autogenerated_toc_xml):
return [ns.attrib['Name'] for ns in ET.parse(args.namespaces).getroot()[1:] if ns.tag == 'Namespace'] + ['**']
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="""
Combines a reference and target ToC. The new target ToC mirrors the reference, omitting ToC
entries that are NOT present in the preview output.
"""
)
parser.add_argument("-r", "--reference", help="The source ToC.yml", required=True)
parser.add_argument("-t", "--target", help="The target ToC.yml", required=True)
parser.add_argument(
"-n",
"--namespaces",
help="The ToC.yml where target autogenerated documentation exists",
required=True,
)
parser.add_argument(
"-d",
"--docrepo",
help="The root directory of the target documentation repository.",
required=True,
)
parser.add_argument(
"-m",
"--moniker",
help="Selected moniker. Used when filling in moniker-folder path updates.",
default="",
required=False,
)
args = parser.parse_args()
try:
target_autogenerated_toc = ET.parse(args.namespaces).getroot()[0]
except Exception as f:
print(
"Execution requires the known namespaces yml be defined. Please check if the target xml has assembly tags."
)
try:
with open(args.reference, "r") as reference_yml:
base_reference_toc = yaml.safe_load(reference_yml)
except Exception as f:
print(
"Execution requires the known reference yml be defined."
)
present_in_target = grep_children_namespaces(target_autogenerated_toc)
print(
"Here are the visible namespaces in target autogenerated ToC. Constraining reference.yml."
)
for ns in sorted(present_in_target):
print(" |__ " + ns)
path_resolver = PathResolver(doc_repo_location=args.docrepo, moniker=args.moniker)
base_reference_toc[0] = filter_toc(base_reference_toc[0], present_in_target, path_resolver)
updated_content = yaml.dump(base_reference_toc, default_flow_style=False)
with open(args.target, "w") as f:
f.write(updated_content)