update-toc.py (121 lines of code) (raw):

# This script is intended for use in intermediate doc repos generated from docs.ms CI. # Given a reference ToC and a set of namespaces, limit the reference to ToC entries that contain # namespaces in our set. import argparse import os import fnmatch import re import json import xml.etree.ElementTree as ET # by default, yaml does not maintain insertion order of the dicts # given that this is intended to generate TABLE OF CONTENTS values, # maintaining this order is important. # The drop-in replacement oyaml is a handy solution for us. import oyaml as yaml MONIKER_REPLACEMENTS = ['{moniker}','<moniker>'] class PathResolver: def __init__(self, doc_repo_location = None, moniker = ""): self.excluded_href_paths = [] self.target_moniker = moniker self.doc_repo_location = doc_repo_location if self.doc_repo_location: self.excluded_href_paths = self.get_non_standard_hrefs(self.doc_repo_location) # the doc builds have the capability to reference readmes from external repos (they resolve during publishing) # this means that we can't simply check the href values for existence. If they are an href that STARTS with one of the # "dependent repositories" than we should leave them exactly as is. # amend_href is the core of the logic for handling referenced files and ensures that we cannot refer to the same readme twice # from two different reference ymls def amend_href(self, toc_dict): if not self.doc_repo_location: return toc_dict input_string = toc_dict["href"] # if this is an external readme, we should not attempt to resolve the file to a different one, just return with no changes if any([input_string.startswith(href) for href in self.excluded_href_paths]): return toc_dict # create a resolvable path to the readme on disk, without any of the docs ms specificity resolvable_path = os.path.normpath(os.path.join(self.doc_repo_location, input_string.replace("~/", ""))) # apply moniker folder adjustments if necessary if self.target_moniker is not None: for replacement in MONIKER_REPLACEMENTS: # input string maintains leading ~/ necessary for docs. update the moniker folder if it exists input_string = input_string.replace(replacement, self.target_moniker) # the resolvable path is different from the input_string in that it is actually a resolvable path. # update it with the moniker folder so we can test for existence of the file resolvable_path = resolvable_path.replace(replacement, self.target_moniker) possible_target_readme = os.path.splitext(resolvable_path)[0] + ".md" if os.path.exists(possible_target_readme): toc_dict["href"] = input_string else: toc_dict.pop("href") toc_dict["landingPageType"] = "Service" return toc_dict # the doc builds have the capability to reference readmes from external repos (they resolve during publishing) # this means that we can't simply check the href values for existence. If they are an href that STARTS with one of the # "dependent repositories" than we should leave them exactly as is. This function returns the start paths def get_non_standard_hrefs(self, doc_repo_location): excluded_href_paths = [] target = os.path.join(doc_repo_location, ".openpublishing.publish.config.json") with open(target, "r") as f: data = json.load(f) for dependent_repo in data["dependent_repositories"]: excluded_href_paths.append("~/{}".format(dependent_repo["path_to_root"])) return excluded_href_paths def filter_children(targeted_ns_list, known_namespaces): amended_list = [] for ns in targeted_ns_list: # also need to handle when the namespace grep is a pattern # azure-eventhubs* <-- for instance if ns in known_namespaces: amended_list.append(ns) return amended_list # a post-order recursive function that returns a modified reference.yml # based on the set of namespaces that we've grabbed from autogenerated ToC.yml def filter_toc(toc_dict, namespaces, path_resolver): if toc_dict is None: return None # internal node if "items" in toc_dict: # recurse as mant times as necessary item_list = [] for item in toc_dict['items']: result_n = filter_toc(item, namespaces, path_resolver) # only append the result if we know it exists if result_n: item_list.append(result_n) if item_list: toc_dict["items"] = item_list else: return None # handle href if "href" in toc_dict: toc_dict = path_resolver.amend_href(toc_dict) # leaf node if "children" in toc_dict: filtered_children = filter_children(toc_dict["children"], namespaces) # if we filter out all the children, this node should simply cease to exist if not filtered_children: return None elif "href" not in toc_dict and "items" not in toc_dict: return None return toc_dict def grep_children_namespaces(autogenerated_toc_xml): return [ns.attrib['Name'] for ns in ET.parse(args.namespaces).getroot()[1:] if ns.tag == 'Namespace'] + ['**'] if __name__ == "__main__": parser = argparse.ArgumentParser( description=""" Combines a reference and target ToC. The new target ToC mirrors the reference, omitting ToC entries that are NOT present in the preview output. """ ) parser.add_argument("-r", "--reference", help="The source ToC.yml", required=True) parser.add_argument("-t", "--target", help="The target ToC.yml", required=True) parser.add_argument( "-n", "--namespaces", help="The ToC.yml where target autogenerated documentation exists", required=True, ) parser.add_argument( "-d", "--docrepo", help="The root directory of the target documentation repository.", required=True, ) parser.add_argument( "-m", "--moniker", help="Selected moniker. Used when filling in moniker-folder path updates.", default="", required=False, ) args = parser.parse_args() try: target_autogenerated_toc = ET.parse(args.namespaces).getroot()[0] except Exception as f: print( "Execution requires the known namespaces yml be defined. Please check if the target xml has assembly tags." ) try: with open(args.reference, "r") as reference_yml: base_reference_toc = yaml.safe_load(reference_yml) except Exception as f: print( "Execution requires the known reference yml be defined." ) present_in_target = grep_children_namespaces(target_autogenerated_toc) print( "Here are the visible namespaces in target autogenerated ToC. Constraining reference.yml." ) for ns in sorted(present_in_target): print(" |__ " + ns) path_resolver = PathResolver(doc_repo_location=args.docrepo, moniker=args.moniker) base_reference_toc[0] = filter_toc(base_reference_toc[0], present_in_target, path_resolver) updated_content = yaml.dump(base_reference_toc, default_flow_style=False) with open(args.target, "w") as f: f.write(updated_content)