tools/assets-automation/assets-reporting/generate_assets_report.py (410 lines of code) (raw):

import os, argparse, glob, json, datetime, re from subprocess import run from typing import List, Dict, Any import yaml # pyyaml from packaging import version # from packaging from ci_tools.functions import ( discover_targeted_packages, ) # azure-sdk-tools from Azure/azure-sdk-for-python from ci_tools.parsing import ParsedSetup generated_folder = os.path.abspath(os.path.join(os.path.abspath(__file__), "..", "generated")) TABLE_HEADER: str = """| Package | Using Proxy | External Recordings | |---|---|---| """ TABLE_LAYER: str = """|{}|{}|{}| """ YES = "✅" NO = "❌" DOCUMENT: str = """ <table> <tr> <td> {} </td> <td> {} </td> </tr> </table> """ TABLE_LAYER: str = """|{}|{}|{}| """ SUMMARY_TABLE_HEADER: str = """| Language | Package Count | Using Proxy | External Recordings | |---|---|---|---| """ SUMMARY_TABLE_LAYER: str = """|{}|{}|{:.0%}|{:.0%}| """ SUMMARY_NOTES = """ ## A few notes about how this data was generated - Markdown for these wiki pages is generated from a [single python script.](https://github.com/Azure/azure-sdk-tools/tree/main/tools/assets-automation/assets-reporting/generate_assets_report.py) - Within the script follow `generate_<language>_report()` definition to understand how the data for that language was obtained. - The `Package Count` for each language is NOT the actual total count of packages within each monorepo. It is the count of packages that are slated to transition _at some point_. - Where applicable, counts only include `track 2` packages, upholding the previous point about "intended to transition eventually." """ TABLE_HEIGHT: int = 10 BATCH_SIZE = TABLE_HEIGHT * 2 class ScanResult: def __init__(self, language: str): self.language = language self.packages: List[str] = [] self.packages_using_proxy: List[str] = [] self.packages_using_external: List[str] = [] def get_repo(language: str) -> str: where = f"https://github.com/azure/azure-sdk-for-{language.lower()}" target_folder = os.path.join(generated_folder, language) print(f"Cloning repo for {language} from {where}", end="...") if not os.path.exists(target_folder): os.makedirs(target_folder) command = [ "git", "clone", "--depth", "1", "--branch", "main", where, target_folder, ] run(command, cwd=generated_folder) else: command = [ "git", "pull", "origin", "main" ] run(command, cwd= os.path.join(generated_folder, target_folder)) print(YES) return target_folder def evaluate_python_package(package_path: str) -> int: service_dir, _ = os.path.split(package_path) recordings_folder = os.path.join(package_path, "tests", "recordings") recordings_glob = os.path.join(recordings_folder, "*.json") assets_json = os.path.join(package_path, "assets.json") details = ParsedSetup.from_path(package_path) if not ( any(["azure-core" in req for req in details.requires]) or any(["azure-mgmt-core" in req for req in details.requires]) ): return 0 # only examine packages that currently have recordings (and ensure that ones transitioned to external aren't ignored) if not os.path.exists(recordings_folder) and not os.path.exists(assets_json): return 0 # if there is an assets.json present at root, we are done. it's transitioned. if os.path.exists(assets_json): return 2 # otherwise, we have to check the recordings for yml (vcrpy) or json (test-proxy) test_proxy_files = glob.glob(recordings_glob) if test_proxy_files: return 1 return 0 def generate_python_report() -> ScanResult: language = "Python" repo = get_repo(language) print(f"Evaluating repo for {language} @ {repo}", end="...") result = ScanResult(language) results = [pkg for pkg in discover_targeted_packages("azure*", repo) if "-nspkg" not in pkg] to_be_removed = [] for pkg in results: evaluation = evaluate_python_package(pkg) if evaluation == 0: to_be_removed.append(pkg) elif evaluation == 1: result.packages_using_proxy.append(os.path.basename(pkg)) elif evaluation == 2: result.packages_using_proxy.append(os.path.basename(pkg)) result.packages_using_external.append(os.path.basename(pkg)) result.packages = sorted( set([os.path.basename(pkg) for pkg in results]) - set([os.path.basename(pkg) for pkg in to_be_removed]) ) print("done.") return result def evaluate_go_package(package_path: str) -> int: evaluation = 0 possible_recordings_dir = os.path.join(package_path, "testdata", "recordings") possible_assets = os.path.join(package_path, "assets.json") # only examine packages that currently have recordings (and ensure that ones transitioned to external aren't ignored) if not os.path.exists(possible_recordings_dir) and not os.path.exists(possible_assets): return 0 if os.path.exists(possible_recordings_dir): evaluation = 1 if os.path.exists(possible_assets): evaluation = 2 return evaluation # evaluate by finding a testdata/recordings def generate_go_report() -> ScanResult: language = "Go" repo_root = get_repo(language) print(f"Evaluating repo for {language} @ {repo_root}", end="...") result = ScanResult(language) sdk_path = os.path.join(repo_root, "sdk") exclusions = [os.path.join("testdata", "perf", "go.mod"), "template", "samples", "internal", "azcore"] packages = glob.glob(os.path.join(repo_root, "sdk", "**", "go.mod"), recursive=True) packages = [os.path.dirname(pkg) for pkg in packages if not any([x in pkg for x in exclusions])] result.packages = sorted(set([pkg.replace(sdk_path + os.sep, "") for pkg in packages])) for pkg in packages: evaluation = evaluate_go_package(pkg) if evaluation == 0: result.packages.remove(pkg.replace(sdk_path + os.sep, "")) elif evaluation == 1: result.packages_using_proxy.append(pkg.replace(sdk_path + os.sep, "")) elif evaluation == 2: result.packages_using_proxy.append(pkg.replace(sdk_path + os.sep, "")) result.packages_using_external.append(pkg.replace(sdk_path + os.sep, "")) print("done.") return result def evaluate_net_package(csproj_path: str) -> int: evaluation = 0 found_recorded_testcase = False possible_test_directory = os.path.join(os.path.dirname(csproj_path), "..", "tests") possible_project_assets_json = os.path.join(os.path.dirname(csproj_path), "..", "assets.json") possible_solution_assets_json = os.path.join(os.path.dirname(csproj_path), "..", "..", "assets.json") session_records = os.path.join(possible_test_directory, "SessionRecords") package_name = os.path.splitext(os.path.basename(csproj_path))[0] if not os.path.exists(possible_test_directory): return 0 # for Azure.*, only examine packages with recorded tests. EG with existing SessionRecords or an existing assets.json if not os.path.exists(session_records) and not ( os.path.exists(possible_project_assets_json) or os.path.exists(possible_solution_assets_json) ): return 0 # For mgmt, you should find a reference to ManagementRecordedTestBase in projects using test proxy: # https://grep.app/search?q=managementrecordedtestbase&filter[repo][0]=Azure/azure-sdk-for-net&filter[path][0]=sdk/ # For data plane, you should find RecordedTestBase: # https://grep.app/search?q=recordedtestbase&filter[repo][0]=Azure/azure-sdk-for-net&filter[path][0]=sdk/ find = "RecordedTestBase" if "ResourceManager" in package_name: find = "ManagementRecordedTestBase" test_files = glob.glob(os.path.join(possible_test_directory, "**", "*.cs"), recursive=True) for testfile in test_files: try: with open(testfile, "r", encoding="utf-8") as f: content = f.read() if find in content: evaluation = 1 except: pass if os.path.exists(possible_project_assets_json) or os.path.exists(possible_solution_assets_json): evaluation = 2 return evaluation def net_trim_path(solution_path: str) -> str: return os.path.splitext(os.path.basename(solution_path))[0] def generate_net_report() -> ScanResult: language = "net" result = ScanResult("." + language.upper()) repo = get_repo(language) print(f"Evaluating repo for {language} @ {repo}", end="...") # <service> # |<package> # | | all_azure_projects = glob.glob(os.path.join(repo, "sdk", "*", "*", "src", "*Azure.*.csproj"), recursive=True) to_be_removed = [] for csproj in all_azure_projects: evaluation = evaluate_net_package(csproj) if evaluation == 0: to_be_removed.append(csproj) elif evaluation == 1: result.packages_using_proxy.append(net_trim_path(csproj)) elif evaluation == 2: result.packages_using_proxy.append(net_trim_path(csproj)) result.packages_using_external.append(net_trim_path(csproj)) result.packages = sorted( set([net_trim_path(csproj) for csproj in all_azure_projects]) - set([net_trim_path(csproj) for csproj in to_be_removed]) ) print("done.") return result def evaluate_cpp_package(package_path: str) -> int: evaluation = 0 possible_assets_json = os.path.join(package_path, "..", "assets.json") if False: evaluation = 1 if os.path.exists(possible_assets_json): evaluation = 2 return evaluation def generate_cpp_report() -> ScanResult: language = "CPP" result = ScanResult(language) repo_root = get_repo(language) print(f"Evaluating repo for {language} @ {repo_root}", end="...") exclusions = [os.path.join("vcpkg", "vcpkg.json"), "template", os.path.join("sdk", "core")] packages = glob.glob(os.path.join(repo_root, "sdk", "**", "vcpkg.json"), recursive=True) packages = [os.path.dirname(pkg) for pkg in packages if not any([x in pkg for x in exclusions])] result.packages = sorted([os.path.basename(pkg) for pkg in packages]) for pkg in packages: evaluation = evaluate_cpp_package(pkg) if evaluation == 1: result.packages_using_proxy.append(os.path.basename(pkg)) elif evaluation == 2: result.packages_using_proxy.append(os.path.basename(pkg)) result.packages_using_external.append(os.path.basename(pkg)) print("done.") return result def resolve_java_test_directory(package_path: str) -> str: singular = os.path.join(os.path.dirname(package_path), "src", "test") plural = os.path.join(os.path.dirname(package_path), "src", "tests") if os.path.exists(singular): return singular elif os.path.exists(plural): return plural else: return "" def evaluate_java_package(package_path: str) -> int: possible_test_directory = resolve_java_test_directory(package_path) possible_assets_location = os.path.join(os.path.dirname(package_path),'assets.json') if os.path.exists(possible_assets_location): return 2 if not possible_test_directory: return -1 test_files = glob.glob(os.path.join(possible_test_directory, "**", "*.java"), recursive=True) # we only will search the test_files if there are actual session-records present session_glob = os.path.join(possible_test_directory, "**", "session-records") session_records = glob.glob(session_glob, recursive=True) if not session_records: return -1 for testfile in test_files: try: with open(testfile, "r", encoding="utf-8") as f: content = f.read() if "extends TestProxyTestBase" in content: return 1 except: pass return 0 def generate_java_report() -> ScanResult: language = "Java" result = ScanResult(language) repo_root = get_repo(language) print(f"Evaluating repo for {language} @ {repo_root}", end="...") # enforce looking under individual package dir, and not service dir packages = glob.glob(os.path.join(repo_root, "sdk", "*", "*", "pom.xml"), recursive=True) # we don't care about packages that start with 'microsoft-' as they are track 1 and will never migrate packages = [package for package in packages if not "microsoft-" in os.path.dirname(package)] packages = [package for package in packages if not "azure-communication-callingserver" in os.path.dirname(package)] packages = [package for package in packages if not "azure-maps-elevation" in os.path.dirname(package)] packages = [package for package in packages if not "azure-verticals-agrifood-farming" in os.path.dirname(package)] result.packages = sorted([os.path.basename(os.path.dirname(pkg)) for pkg in packages]) for pkg in packages: evaluation = evaluate_java_package(pkg) if evaluation == -1: result.packages.remove(os.path.basename(os.path.dirname(pkg))) elif evaluation == 1: result.packages_using_proxy.append(os.path.basename(os.path.dirname(pkg))) elif evaluation == 2: result.packages_using_proxy.append(os.path.basename(os.path.dirname(pkg))) result.packages_using_external.append(os.path.basename(os.path.dirname(pkg))) result.packages = sorted(set(result.packages)) print("done.") return result def evaluate_js_package(package_path: str) -> int: with open(package_path, "r", encoding="utf-8") as f: package_json = json.load(f) assets_json = os.path.join(os.path.dirname(package_path), "assets.json") if os.path.exists(assets_json): return 2 if "devDependencies" in package_json: if "@azure-tools/test-recorder" in package_json["devDependencies"]: version_spec = package_json["devDependencies"]["@azure-tools/test-recorder"] if version_spec[0] == "^": version_spec = version_spec[1:] if version.parse(version_spec) >= version.parse("2.0.0"): return 1 return 0 def e_startswith(input: str, prefixes: List[str]) -> bool: return any([input.startswith(fix) for fix in prefixes]) def e_endswith(input: str, postfixes: List[str]) -> bool: return any([input.endswith(fix) for fix in postfixes]) def e_directory_in(input_dir: str, directory_patterns: List[str]) -> bool: return any([subdir in input_dir for subdir in directory_patterns]) def js_package_included(package_path: str) -> bool: package_name = os.path.basename(os.path.dirname(package_path)) excluded_packages = [ "samples-react", "sample-react", "mock-hub", "abort-controller", "logger", "samples-express", "samples-browser", "samples-react", "event-hubs-track-1", "opentelemetry-instrumentation-azure-sdk", "monitor-opentelemetry-exporter", "service-bus-v1", "service-bus-v7", "app", "perf", "service-bus", "eventhubs-checkpointstore-blob", "eventhubs-checkpointstore-tables", "schema-registry-avro", "api-management-custom-widgets-scaffolder", "storage-internal-avro", "web-pubsub-express", ] excluded_package_postfixes = ["-track-1", "-common"] excluded_package_prefixes = ["@azure/core-", "core-"] # exclude any packages that have these paths in them excluded_directories = [ os.path.join("sdk", "identity", "identity", "test"), os.path.join("sdk", "test-utils"), os.path.join("sdk", "core"), "samples", ] # only include packages with a test folder alongside has_test_folder = os.path.exists(os.path.join(os.path.dirname(package_path), "test")) # insure we don't include amqp packages (they cant convert to test-proxy) amqp_package = False with open(package_path, "r", encoding="utf-8") as f: package_json = json.load(f) if "dependencies" in package_json: if "@azure/core-amqp" in package_json["dependencies"]: amqp_package = True return ( "samples" not in os.path.normpath(package_path).split(os.sep) and package_name not in excluded_packages and not e_startswith(package_name, excluded_package_prefixes) and not e_endswith(package_name, excluded_package_postfixes) and not e_directory_in(package_path, excluded_directories) and not amqp_package and has_test_folder ) def generate_js_report() -> ScanResult: language = "JS" repo = get_repo(language) print(f"Evaluating repo for {language} @ {repo}", end="...") target_folder = os.path.join(repo, "sdk", "**", "package.json") result = ScanResult(language) results = glob.glob(target_folder, recursive=True) result.packages = sorted( set([os.path.basename(os.path.dirname(pkg)) for pkg in results if js_package_included(pkg)]) ) excluded = set(sorted([os.path.basename(os.path.dirname(pkg)) for pkg in results if not js_package_included(pkg)])) for pkg in results: evaluation = evaluate_js_package(pkg) if evaluation == 1: result.packages_using_proxy.append(os.path.basename(os.path.dirname(pkg))) elif evaluation == 2: result.packages_using_proxy.append(os.path.basename(os.path.dirname(pkg))) result.packages_using_external.append(os.path.basename(os.path.dirname(pkg))) print("done.") return result def generate_detailed_table(origin: ScanResult, package_set: List[str]) -> str: result = TABLE_HEADER for package in package_set: transitioned = YES if package in origin.packages_using_proxy else NO externalized = YES if package in origin.packages_using_external else NO table_row = TABLE_LAYER.format(package.replace("\\", "/"), transitioned, externalized) result += table_row return result def generate_summary_table(results: List[ScanResult]) -> str: result = SUMMARY_TABLE_HEADER # Language | Package Count | Using Proxy | External Recordings for language in results: result += SUMMARY_TABLE_LAYER.format( language.language, len(language.packages), (len(language.packages_using_proxy) / float(len(language.packages))), (len(language.packages_using_external) / float(len(language.packages))), ) return result def write_output(result: ScanResult) -> None: with open(result.language.lower() + ".md", "w", encoding="utf-8") as f: date = datetime.date.today() # leaving this commented, as the level of detail doesn't assist the report # time_of_day = datetime.datetime.today().strftime("%I:%M%p") # @{time_of_day} {datetime.datetime.today().astimezone().tzname()} f.writelines(f"# {result.language} Transition Details as of {date}") if result.packages: # batch two sets batch_size = (len(result.packages) // 2) + (len(result.packages) % 2) table_set_1 = result.packages[0:batch_size] table_set_2 = result.packages[batch_size:] document_addition = DOCUMENT.format( generate_detailed_table(result, table_set_1), generate_detailed_table(result, table_set_2) ) f.write(document_addition) def write_summary(results: List[ScanResult]) -> None: with open("summary.md", "w", encoding="utf-8") as f: date = datetime.date.today() # leaving this commented, as the level of detail doesn't assist the report # time_of_day = datetime.datetime.today().strftime("%I:%M%p") # @{time_of_day} {datetime.datetime.today().astimezone().tzname()} f.writelines(f"# Test-Proxy overall progress per language - {date}" + os.linesep) summary = generate_summary_table(results) f.write(summary) f.write(SUMMARY_NOTES) if __name__ == "__main__": parser = argparse.ArgumentParser( description=""" Generates a markdown report that summarizes the the status of the transition to the test-proxy and externalized assets. """ ) parser.parse_args() python = generate_python_report() write_output(python) js = generate_js_report() write_output(js) go = generate_go_report() write_output(go) net = generate_net_report() write_output(net) cpp = generate_cpp_report() write_output(cpp) java = generate_java_report() write_output(java) write_summary( [ python, js, go, net, cpp, java ] )