scripts/auto_link_to_docs.py (63 lines of code) (raw):

#!/usr/bin/env python # Copyright 2013 Google LLC # # Licensed under the Apache License, Version 2.0 (the 'License'); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an 'AS IS' BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ Process docs-links.json and updates all READMEs and replaces <!-- auto-doc-link --><!-- end-auto-doc-link --> With a generated list of documentation backlinks. """ from collections import defaultdict import json import os import re REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) DOC_SITE_ROOT = "https://cloud.google.com" AUTO_DOC_LINK_EXP = re.compile( r"<!-- auto-doc-link -->.*?<!-- end-auto-doc-link -->\n", re.DOTALL ) def invert_docs_link_map(docs_links): """ The docs links map is in this format: { "doc_path": [ "file_path", ] } This transforms it to: { "file_path": [ "doc_path", ] } """ files_to_docs = defaultdict(list) for doc, files in docs_links.iteritems(): for file in files: files_to_docs[file].append(doc) files_to_docs[file] = list(set(files_to_docs[file])) return files_to_docs def collect_docs_for_readmes(files_to_docs): """ There's a one-to-many relationship between readmes and files. This method finds the readme for each file and consolidates all docs references. """ readmes_to_docs = defaultdict(list) for file, docs in files_to_docs.iteritems(): readme = get_readme_path(file) readmes_to_docs[readme].extend(docs) readmes_to_docs[readme] = list(set(readmes_to_docs[readme])) return readmes_to_docs def linkify(docs): """Adds the documentation site root to doc paths, creating a full URL.""" return [DOC_SITE_ROOT + x for x in docs] def replace_contents(file_path, regex, new_content): with open(file_path, "r+") as f: content = f.read() content = regex.sub(new_content, content) f.seek(0) f.write(content) def get_readme_path(file_path): """Gets the readme for an associated sample file, basically just the README.md in the same directory.""" dir = os.path.dirname(file_path) readme = os.path.join(REPO_ROOT, dir, "README.md") return readme def generate_doc_link_statement(docs): links = linkify(docs) if len(links) == 1: return """<!-- auto-doc-link --> These samples are used on the following documentation page: > {} <!-- end-auto-doc-link --> """.format( links.pop() ) else: return """<!-- auto-doc-link --> These samples are used on the following documentation pages: > {} <!-- end-auto-doc-link --> """.format( "\n".join(["* {}".format(x) for x in links]) ) def update_readme(readme_path, docs): if not os.path.exists(readme_path): print("{} doesn't exist".format(readme_path)) return replace_contents(readme_path, AUTO_DOC_LINK_EXP, generate_doc_link_statement(docs)) print("Updated {}".format(readme_path)) def main(): docs_links = json.load( open(os.path.join(REPO_ROOT, "scripts", "resources", "docs-links.json"), "r") ) files_to_docs = invert_docs_link_map(docs_links) readmes_to_docs = collect_docs_for_readmes(files_to_docs) for readme, docs in readmes_to_docs.iteritems(): update_readme(readme, docs) if __name__ == "__main__": main()