packages/python-packages/doc-warden/warden/index_packages.py (279 lines of code) (raw):

# Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. from __future__ import print_function from .warden_common import check_match, walk_directory_for_pattern, get_omitted_files, get_java_package_roots, get_net_package, get_swift_package_roots, get_python_package_roots, get_js_package_roots, find_alongside_file, find_below_file, parse_pom, parse_csproj, is_java_pom_package_pom, find_above_file from .PackageInfo import PackageInfo import json import os import ast from jinja2 import Template import xml.etree.ElementTree as ET import textwrap import re import fnmatch import pathlib2 # python 3 transitioned StringIO to be part of `io` module. # python 2 needs the old version however try: from StringIO import StringIO except ImportError: from io import StringIO # chose to go with in-line templates. reasoning here is that the whitespace is a bit more important for markdown # given that all the content fitting on a single line is important, leveraging the format string method makes for a # much simpler to maintain template PKGID_COL = ' [`{{ pkg.package_id }}`]( {{ pkg.relative_package_location }} )' RM_COL = ' {% if len(pkg.relative_readme_location) > 0 %}[README]({{ pkg.relative_readme_location }}){% else %} N/A {% endif %} ' CL_COL = ' {% if len(pkg.relative_changelog_location) > 0 %}[CHANGELOG]({{ pkg.relative_changelog_location }}){% else %} N/A {% endif %} ' GROUPID_COL = ' `{{ pkg.repository_args[0] }}` ' REPO_COL = ' `[Repo Link]( {{ pkg.relative_package_location }}` )' PUBLISH_COL = ' {% if pkg.test_url(config) %}[{{ pkg.get_repository_link_text(config) }}]( {{ pkg.get_formatted_repository_url(config) }} ){% else %} N/A {% endif %} ' COLUMN_LOOP = '{% for pkg in packages %}' COLUMN_TEMPLATE = '{0}|{1}|{2}|{3}|{4}|'.format(COLUMN_LOOP, PKGID_COL, RM_COL, CL_COL, PUBLISH_COL) JAVA_COLUMN_TEMPLATE = '{0}|{1}|{2}|{3}|{4}|'.format(COLUMN_LOOP, PKGID_COL, GROUPID_COL, RM_COL, PUBLISH_COL) OUTPUT_HEADER = """ # Package Index - {{ title }} | Package Id | Readme | Changelog | Published Url | |----------------|-----------|---------------------------|---------------------| """ JAVA_OUTPUT_HEADER = """ # Package Index - {{ title }} | Artifact Id | Group Id | Readme | Published Url | |----------------|-----------|-----------|---------------------| """ COLUMN_OUTPUT_FOOTER = """ {% endfor %} """ # model for "context" of the output template # title = <repo-name> (target directory dirname() perhaps # packages = [ PackageInfo, PackageInfo, ... ] # len = wrapper for len(string) OUTPUT_TEMPLATE = OUTPUT_HEADER + COLUMN_TEMPLATE + COLUMN_OUTPUT_FOOTER JAVA_OUTPUT_TEMPLATE = JAVA_OUTPUT_HEADER + JAVA_COLUMN_TEMPLATE + COLUMN_OUTPUT_FOOTER # given a set of package roots (omitted packages are included for indexing), extract a set of common metadata # to use when generated a package index. def index_packages(configuration): language_selector = { 'python': get_python_package_info, 'js': get_js_package_info, 'java': get_java_package_info, 'net': get_net_package_info, 'swift': get_swift_package_info } return language_selector.get(configuration.scan_language.lower(), unrecognized_option)(configuration) def get_swift_package_id_from_directory(directory): package_id = pathlib2.Path(directory).name return package_id def get_swift_package_info(config): pkg_list = [] pkg_locations, ignored_pkg_locations = get_swift_package_roots(config) for pkg_file in (pkg_locations + ignored_pkg_locations): pkg_id = get_swift_package_id_from_directory(pkg_file) changelog = os.path.join(pkg_file, 'CHANGELOG.md') changelog_relpath = webify_relative_path(os.path.relpath(changelog, config.target_directory)) readme = os.path.join(pkg_file, 'README.md') readme_relpath = webify_relative_path(os.path.relpath(readme, config.target_directory)) pkg_location = webify_relative_path(os.path.relpath(pkg_file, config.target_directory)) # The way I am determining the package version is by lifting the marketing version # from the project.pbxproj file. There isn't really a strong notion of semantic version # in XCode projects beyond this marketing version from what I can tell. Perhaps if # SwiftPM becomes more dominant and gets tooling support then that might change. pbxproj_file_path = '{}/{}.xcodeproj/project.pbxproj'.format(pkg_file, pkg_id) with open(pbxproj_file_path) as pbxproj_file: pbxproj_file_contents = pbxproj_file.read() # This is a pretty simple expression, it grabs the strings that are # in the form: # # MARKETING_VERSION = "1.0.0-beta.1" # # It then uses a capture group name to zero in on the version. It # doesn't attempt to validate the format of the version, it just takes # the value between the quotes. version_match_expression = 'MARKETING_VERSION = \"(?P<version>(.*))\"' search_result = re.search(version_match_expression, pbxproj_file_contents) if search_result is None: continue else: version = search_result.group(2) if(pkg_id not in config.package_indexing_exclusion_list): pkg_list.append(PackageInfo( package_id = pkg_id, package_version = version, relative_package_location = pkg_location, relative_readme_location = readme_relpath or '', relative_changelog_location = changelog_relpath or '', repository_args = [] )) # leverages python AST to parse arguments to `setup.py` and return a list of all indexed packages # within the target directory def get_python_package_info(config): pkg_list = [] pkg_locations, ignored_pkg_locations = get_python_package_roots(config) for pkg_file in (pkg_locations + ignored_pkg_locations): pkg_id, version = parse_setup(config, pkg_file) # package is badly formatted setup. ignore. if pkg_id is None: continue changelog = find_below_file('history.md', pkg_file) if changelog is None: changelog = find_below_file('history.rst', pkg_file) readme = find_below_file('readme.md', pkg_file) if readme is None: readme = find_below_file('readme.rst', pkg_file) if changelog: changelog_relpath = webify_relative_path(os.path.relpath(changelog, config.target_directory)) else: changelog_relpath = '' if readme: readme_relpath = webify_relative_path(os.path.relpath(readme, config.target_directory)) else: readme_relpath = '' pkg_location = webify_relative_path(os.path.relpath(pkg_file, config.target_directory)) if(pkg_id not in config.package_indexing_exclusion_list): pkg_list.append(PackageInfo( package_id = pkg_id, package_version = version, relative_package_location = pkg_location, relative_readme_location = readme_relpath or '', relative_changelog_location = changelog_relpath or '', repository_args = [] )) return pkg_list # leverages JSON parsing of any packages.json files and returns a list of all indexed packages found # within the target directory def get_js_package_info(config): pkg_list = [] pkg_locations, ignored_pkg_locations = get_js_package_roots(config) for pkg_file in (pkg_locations + ignored_pkg_locations): with open(pkg_file, 'r') as read_file: pkg_json = json.load(read_file) target_directory = os.path.dirname(pkg_file) changelog = find_below_file('changelog.md', pkg_file) readme = find_below_file('readme.md', pkg_file) if changelog: changelog_relpath = webify_relative_path(os.path.relpath(changelog, config.target_directory)) else: changelog_relpath = '' if readme: readme_relpath = webify_relative_path(os.path.relpath(readme, config.target_directory)) else: readme_relpath = '' pkg_location = webify_relative_path(os.path.relpath(pkg_file, config.target_directory)) if(pkg_json['name'] not in config.package_indexing_exclusion_list): pkg_list.append(PackageInfo( package_id = pkg_json['name'], package_version = pkg_json['version'], relative_package_location = pkg_location, relative_readme_location = readme_relpath or '', relative_changelog_location = changelog_relpath or '', repository_args = [] )) return pkg_list # given a pom file, maven `groupId` is usually present at the same level as the `artifactId` # however, this is not always the case. When this instance occurs, we instead look for the parent `groupId` def resolve_java_group_id(pom_root): parent_id_root = pom_root.find('parent') group_id_root = pom_root.find('groupId') # there isn't a groupId at the same level as version and artifactId if group_id_root is None: if parent_id_root: parent_group_id = parent_id_root.find('groupId') if parent_group_id: return parent_group_id.text else: return group_id_root.text return '' # parses all pom files within the target directory, and returns a list of `jar` packages found within def get_java_package_info(config): pkg_list = [] pkg_locations, ignored_pkg_locations = get_java_package_roots(config) for pkg_file in (pkg_locations + ignored_pkg_locations): with open(pkg_file, 'r') as read_file: root = parse_pom(pkg_file) target_directory = os.path.dirname(pkg_file) changelog = find_below_file('changelog.md', pkg_file) readme = find_below_file('readme.md', pkg_file) if changelog: changelog_relpath = webify_relative_path(os.path.relpath(changelog, config.target_directory)) else: changelog_relpath = '' if readme: readme_relpath = webify_relative_path(os.path.relpath(readme, config.target_directory)) else: readme_relpath = '' pkg_location = webify_relative_path(os.path.relpath(pkg_file, config.target_directory)) artifact_root = root.find('artifactId') version_root = root.find('version') group_id = resolve_java_group_id(root) if artifact_root is None or version_root is None or not group_id: if config.verbose_output: print("{} has is missing a version, artifactId, or groupId".format(pkg_file)) continue if(artifact_root.text not in config.package_indexing_exclusion_list): pkg_list.append(PackageInfo( package_id = artifact_root.text, package_version = version_root.text, relative_package_location = pkg_location, relative_readme_location = readme_relpath or '', relative_changelog_location = changelog_relpath or '', repository_args = [group_id] )) return pkg_list # finds .net packages (non-test CSProjs) and attempts to correlate the packageinfo details # returns a list of all `packages` found within the target directory. def get_net_package_info(config): pkg_list = [] pkg_locations, ignored_pkg_locations = get_net_package(config) for pkg_file in (pkg_locations + ignored_pkg_locations): pkg_version = parse_csproj(pkg_file) pkg_name = os.path.splitext(os.path.basename(pkg_file))[0] if(pkg_name not in config.package_indexing_exclusion_list): changelog = find_above_file('changelog.md', pkg_file, config.get_package_indexing_traversal_stops(), net_early_exit, os.path.normpath(config.target_directory)) readme = find_above_file('readme.md', pkg_file, config.get_package_indexing_traversal_stops(), net_early_exit, os.path.normpath(config.target_directory)) if changelog: changelog_relpath = webify_relative_path(os.path.relpath(changelog, config.target_directory)) else: changelog_relpath = '' if readme: readme_relpath = webify_relative_path(os.path.relpath(readme, config.target_directory)) else: readme_relpath = '' pkg_location = webify_relative_path(os.path.relpath(pkg_file, config.target_directory)) pkg_list.append(PackageInfo( package_id = pkg_name, package_version = pkg_version, relative_package_location = pkg_location, relative_readme_location = readme_relpath or '', relative_changelog_location = changelog_relpath or '', repository_args = [] )) return pkg_list # used after scanning a directory for readme. If this returns true, # we shouldn't traverse higher up the tree. def net_early_exit(path): if path is None: return False rule = re.compile(fnmatch.translate('*.sln')) for file in os.listdir(path): if rule.match(file): return True return False # windows outputs paths with `\`, but that really needs to be `/` to work as a url # given that this is a cross-platform package, we will manually handle this here. def webify_relative_path(path): path_corrected = path.replace('\\', '/') return path_corrected # entrypoint for rendering the packages.md # handles the template selection and execution def render(config, pkg_list): language_selector = { 'python': OUTPUT_TEMPLATE, 'js': OUTPUT_TEMPLATE, 'java': JAVA_OUTPUT_TEMPLATE, 'net': OUTPUT_TEMPLATE } template = language_selector.get(config.scan_language.lower(), unrecognized_option) render_template(config, pkg_list, template) # implementation of the jinja2 template substitution. given a packagelist, generates # packages.md rows. def render_template(config, pkg_list, template): template = Template(template) pkg_list.sort(key=lambda x: x.package_id) get_len = lambda string: len(string) rendered_template = template.render(title=os.path.basename(config.target_directory), packages=pkg_list, config=config, len=get_len) with open(config.package_index_output_location, 'w') as packages_file: packages_file.write(rendered_template) def unrecognized_option(configuration): print('Argument {} provided is not a supported language.'.format(configuration.scan_language)) exit(1) # opens setup.py and leverages AST to intercept the parameters TO setup.py # this easily allows us to examine the values that may originate from outside this file (like VERSION) def parse_setup(config, setup_filename): mock_setup = textwrap.dedent('''\ def setup(*args, **kwargs): __setup_calls__.append((args, kwargs)) ''') parsed_mock_setup = ast.parse(mock_setup, filename=setup_filename) with open(setup_filename, 'rt') as setup_file: try: parsed = ast.parse(setup_file.read()) except: if config.verbose_output: print('{} was unparsable.'.format(setup_filename)) return None, None for index, node in enumerate(parsed.body[:]): if ( not isinstance(node, ast.Expr) or not isinstance(node.value, ast.Call) or not hasattr(node.value.func, 'id') or node.value.func.id != 'setup' ): continue parsed.body[index:index] = parsed_mock_setup.body break fixed = ast.fix_missing_locations(parsed) codeobj = compile(fixed, setup_filename, 'exec') local_vars = {} global_vars = {'__setup_calls__': []} current_dir = os.getcwd() working_dir = os.path.dirname(setup_filename) os.chdir(working_dir) try: exec(codeobj, global_vars, local_vars) except: if config.verbose_output: print('{} ran into an exception during exec'.format(setup_filename)) return None, None os.chdir(current_dir) try: _, kwargs = global_vars['__setup_calls__'][0] except: if config.verbose_output: print('{} had no kwargs'.format(setup_filename)) return None, None version = kwargs['version'] pkg_id = kwargs['name'] return pkg_id, version