sagemaker_image_builder/utils.py (83 lines of code) (raw):

import json import os import pathlib import conda.cli.python_api from conda.env.specs import RequirementsSpec from conda.models.match_spec import MatchSpec from semver import Version def get_dir_for_version(version: Version) -> str: version_prerelease_suffix = ( f"/v{version.major}.{version.minor}.{version.patch}-" f"{version.prerelease}" if version.prerelease else "" ) return os.path.relpath( f"build_artifacts/v{version.major}/v{version.major}.{version.minor}/" f"v{version.major}.{version.minor}.{version.patch}" f"{version_prerelease_suffix}" ) def is_exists_dir_for_version(version: Version, file_name_to_verify_existence="Dockerfile") -> bool: dir_path = get_dir_for_version(version) # Also validate whether this directory is not generated due to any pre-release builds/ # additional packages. # This can be validated by checking whether {cpu/gpu}.env.{in/out}/Dockerfile exists in the # directory. return os.path.exists(dir_path) and os.path.exists(dir_path + "/" + file_name_to_verify_existence) def get_semver(version_str) -> Version: # Version strings on conda-forge follow PEP standards rather than SemVer, which support # version strings such as X.Y.Z.postN, X.Y.Z.preN. These cause errors in semver.Version.parse # so we keep the first 3 entries as version string. if version_str.count(".") > 2: version_str = ".".join(version_str.split(".")[:3]) # If the version string doesn't include the patch version number, then assume it's 0. if version_str.count(".") == 1: version_str = f"{version_str}.0" version = Version.parse(version_str) if version.build is not None: raise Exception() return version def read_env_file(file_path) -> RequirementsSpec: return RequirementsSpec(filename=file_path) def get_match_specs(file_path) -> dict[str, MatchSpec]: if not os.path.isfile(file_path): return {} requirement_spec = read_env_file(file_path) assert len(requirement_spec.environment.dependencies) == 1 assert "conda" in requirement_spec.environment.dependencies return {MatchSpec(i).get("name"): MatchSpec(i) for i in requirement_spec.environment.dependencies["conda"]} def sizeof_fmt(num): # Convert byte to human-readable size units. for unit in ("B", "KB", "MB", "GB"): if abs(num) < 1024.0: return f"{num:3.2f}{unit}" num /= 1024.0 return f"{num:.2f}TB" def create_markdown_table(headers, rows): """Loop through a data rows and return a markdown table as a multi-line string. headers -- A list of strings, each string represents a column name rows -- A list of dicts, each dict is a row """ markdowntable = "" # Make a string of all the keys in the first dict with pipes before after and between each key markdownheader = " | ".join(headers) # Make a header separator line with dashes instead of key names markdownheaderseparator = "---|" * (len(headers) - 1) + "---" # Add the header row and separator to the table markdowntable += markdownheader + "\n" markdowntable += markdownheaderseparator + "\n" # Loop through the list of dictionaries outputting the rows for row in rows: markdownrow = "" for k, v in row.items(): markdownrow += str(v) + "|" markdowntable += markdownrow[:-1] + "\n" return markdowntable def dump_conda_package_metadata(args): prefix = os.environ["CONDA_PREFIX"] meta_data_path = pathlib.Path(prefix) / "conda-meta" meta_data_files = meta_data_path.glob("*.json") meta_data = dict() for meta_data_file in meta_data_files: name = meta_data_file.name.split("-")[0] with open(meta_data_file, "r", encoding="utf-8") as f: metadata = json.load(f) version = metadata["version"] size = metadata["size"] meta_data[name] = {"version": version, "size": size} # Sort the pakcage sizes in decreasing order meta_data = {k: v for k, v in sorted(meta_data.items(), key=lambda item: item[1]["size"], reverse=True)} if args.human_readable: meta_data = {k: {"version": v["version"], "size": sizeof_fmt(v["size"])} for k, v in meta_data.items()} print(json.dumps(meta_data)) def pull_conda_package_metadata(image_config, image_artifact_dir): results = dict() env_out_file_name = image_config["env_out_filename"] match_spec_out = get_match_specs(image_artifact_dir + "/" + env_out_file_name) target_packages_match_spec_out = {k: v for k, v in match_spec_out.items()} for package, match_spec_out in target_packages_match_spec_out.items(): if str(match_spec_out).startswith("conda-forge"): # Pull package metadata from conda-forge and dump into json file search_result = conda.cli.python_api.run_command("search", str(match_spec_out), "--json") package_metadata = json.loads(search_result[0])[package][0] results[package] = {"version": package_metadata["version"], "size": package_metadata["size"]} # Sort the package sizes in decreasing order results = {k: v for k, v in sorted(results.items(), key=lambda item: item[1]["size"], reverse=True)} return results