in src/image_builder.py [0:0]
def image_builder(buildspec, image_types=[], device_types=[]):
"""
Builds images using build specification with specified image and device types
and export them to ECR image repository
An empty image types array indicates all image types.
Similarly, an empty device types array indicates all device types
:param buildspec: buid specification defining images to be build
:param image_types: <list> list of image types
:param device_types: <list> list of image device type
"""
BUILDSPEC = Buildspec()
BUILDSPEC.load(buildspec)
PRE_PUSH_STAGE_IMAGES = []
COMMON_STAGE_IMAGES = []
if (
"huggingface" in str(BUILDSPEC["framework"])
or "autogluon" in str(BUILDSPEC["framework"])
or "stabilityai" in str(BUILDSPEC["framework"])
or "trcomp" in str(BUILDSPEC["framework"])
or is_autopatch_build_enabled(buildspec_path=buildspec)
):
_login_to_prod_ecr_registry()
for image_name, image_config in BUILDSPEC["images"].items():
# filter by image type if type is specified
if image_types and not image_config["image_type"] in image_types:
continue
# filter by device type if type is specified
if device_types and not image_config["device_type"] in device_types:
continue
ARTIFACTS = deepcopy(BUILDSPEC["context"]) if BUILDSPEC.get("context") else {}
extra_build_args = {}
labels = {}
tag_override = image_config.get("build_tag_override", "False").lower() == "true"
prod_repo_uri = ""
if is_autopatch_build_enabled(buildspec_path=buildspec) or tag_override:
prod_repo_uri = utils.derive_prod_image_uri_using_image_config_from_buildspec(
image_config=image_config,
framework=BUILDSPEC["framework"],
new_account_id=constants.PUBLIC_DLC_REGISTRY,
)
FORMATTER.print(
f"""[PROD_URI for {image_config["repository"]}:{image_config["tag"]}] {prod_repo_uri}"""
)
if image_config.get("version") is not None:
if BUILDSPEC["version"] != image_config.get("version"):
continue
if image_config.get("context") is not None:
ARTIFACTS.update(image_config["context"])
image_tag = (
tag_image_with_pr_number(image_config["tag"])
if build_context == "PR"
else image_config["tag"]
)
if is_autopatch_build_enabled(buildspec_path=buildspec):
image_tag = append_tag(image_tag, "autopatch")
additional_image_tags = []
if is_nightly_build_context():
additional_image_tags.append(tag_image_with_date(image_tag))
if is_build_enabled() or build_context != "PR":
# Order appears to matter in datetime tagging, so tag with no datetime first, then
# set image_tag to have datetime
no_datetime = image_tag
additional_image_tags.append(no_datetime)
if build_context == "MAINLINE":
additional_image_tags.append(tag_image_with_initiator(no_datetime))
image_tag = tag_image_with_datetime(image_tag)
additional_image_tags.append(image_tag)
image_repo_uri = (
image_config["repository"]
if build_context == "PR"
else modify_repository_name_for_context(str(image_config["repository"]), build_context)
)
base_image_uri = None
if image_config.get("base_image_name") is not None:
base_image_object = _find_image_object(
PRE_PUSH_STAGE_IMAGES, image_config["base_image_name"]
)
base_image_uri = base_image_object.ecr_url
if image_config.get("download_artifacts") is not None:
for artifact_name, artifact in image_config.get("download_artifacts").items():
type = artifact["type"]
uri = artifact["URI"]
var = artifact["VAR_IN_DOCKERFILE"]
try:
file_name = utils.download_file(uri, type).strip()
except ValueError:
FORMATTER.print(f"Artifact download failed: {uri} of type {type}.")
ARTIFACTS.update(
{
f"{artifact_name}": {
"source": f"{os.path.join(os.sep, os.path.abspath(os.getcwd()), file_name)}",
"target": file_name,
}
}
)
extra_build_args[var] = file_name
labels[var] = file_name
labels[f"{var}_URI"] = uri
transformers_version = image_config.get("transformers_version")
if str(BUILDSPEC["framework"]).startswith("huggingface"):
if transformers_version:
extra_build_args["TRANSFORMERS_VERSION"] = transformers_version
else:
raise KeyError(
f"HuggingFace buildspec.yml must contain 'transformers_version' field for each image"
)
if "datasets_version" in image_config:
extra_build_args["DATASETS_VERSION"] = image_config.get("datasets_version")
elif str(image_config["image_type"]) == "training":
raise KeyError(
f"HuggingFace buildspec.yml must contain 'datasets_version' field for each image"
)
torchserve_version = image_config.get("torch_serve_version")
inference_toolkit_version = image_config.get("tool_kit_version")
if torchserve_version:
extra_build_args["TORCHSERVE_VERSION"] = torchserve_version
if inference_toolkit_version:
extra_build_args["SM_TOOLKIT_VERSION"] = inference_toolkit_version
dockerfile = image_config["docker_file"]
target = image_config.get("target")
if tag_override and build_context == "PR":
if is_autopatch_build_enabled(buildspec_path=buildspec):
FORMATTER.print("AUTOPATCH ENABLED IN BUILDSPEC, CANNOT OVERRIDE WITH TAG, SORRY!")
else:
_login_to_prod_ecr_registry()
with tempfile.NamedTemporaryFile(mode="w", delete=False) as temp_file_handle:
source_uri = f"{prod_repo_uri}"
temp_file_handle.write(
f"FROM {source_uri}\nLABEL dlc.dev.source_img={source_uri}"
)
dockerfile = temp_file_handle.name
target = None
FORMATTER.print(f"USING TAG OVERRIDE {source_uri}")
ARTIFACTS.update(
{
"dockerfile": {
"source": dockerfile,
"target": "Dockerfile",
}
}
)
# job_type will be either inference or training, based on the repo URI
if "training" in image_repo_uri:
label_job_type = "training"
elif "inference" in image_repo_uri:
label_job_type = "inference"
else:
raise RuntimeError(
f"Cannot find inference or training job type in {image_repo_uri}. "
f"This is required to set job_type label."
)
template_file = os.path.join(
os.sep, get_cloned_folder_path(), "miscellaneous_scripts", "dlc_template.py"
)
template_fw_version = (
str(image_config["framework_version"])
if image_config.get("framework_version")
else str(BUILDSPEC["version"])
)
template_fw = str(BUILDSPEC["framework"])
post_template_file = utils.generate_dlc_cmd(
template_path=template_file,
output_path=os.path.join(image_config["root"], "out.py"),
framework=template_fw,
framework_version=template_fw_version,
container_type=label_job_type,
)
ARTIFACTS.update(
{"customize": {"source": post_template_file, "target": "sitecustomize.py"}}
)
context = Context(ARTIFACTS, f"build/{image_name}.tar.gz", image_config["root"])
if "labels" in image_config:
labels.update(image_config.get("labels"))
for label, value in labels.items():
if isinstance(value, bool):
labels[label] = str(value)
cx_type = utils.get_label_prefix_customer_type(image_tag)
# Define label variables
label_framework = str(BUILDSPEC["framework"]).replace("_", "-")
if image_config.get("framework_version"):
label_framework_version = str(image_config["framework_version"]).replace(".", "-")
else:
label_framework_version = str(BUILDSPEC["version"]).replace(".", "-")
label_device_type = str(image_config["device_type"])
if label_device_type == "gpu":
label_device_type = f"{label_device_type}.{str(image_config['cuda_version'])}"
label_arch = str(BUILDSPEC["arch_type"])
label_python_version = str(image_config["tag_python_version"])
label_os_version = str(image_config.get("os_version")).replace(".", "-")
label_contributor = str(BUILDSPEC.get("contributor"))
label_transformers_version = str(transformers_version).replace(".", "-")
if cx_type == "sagemaker":
# Adding standard labels to all images
labels[
f"com.amazonaws.ml.engines.{cx_type}.dlc.framework.{label_framework}.{label_framework_version}"
] = "true"
labels[f"com.amazonaws.ml.engines.{cx_type}.dlc.device.{label_device_type}"] = "true"
labels[f"com.amazonaws.ml.engines.{cx_type}.dlc.arch.{label_arch}"] = "true"
# python version label will look like py_version.py36, for example
labels[f"com.amazonaws.ml.engines.{cx_type}.dlc.python.{label_python_version}"] = "true"
labels[f"com.amazonaws.ml.engines.{cx_type}.dlc.os.{label_os_version}"] = "true"
labels[f"com.amazonaws.ml.engines.{cx_type}.dlc.job.{label_job_type}"] = "true"
if label_contributor:
labels[
f"com.amazonaws.ml.engines.{cx_type}.dlc.contributor.{label_contributor}"
] = "true"
if transformers_version:
labels[
f"com.amazonaws.ml.engines.{cx_type}.dlc.lib.transformers.{label_transformers_version}"
] = "true"
if torchserve_version and inference_toolkit_version:
labels[
f"com.amazonaws.ml.engines.{cx_type}.dlc.inference-toolkit.{inference_toolkit_version}.torchserve.{torchserve_version}"
] = "true"
"""
Override parameters from parent in child.
"""
info = {
"account_id": str(BUILDSPEC["account_id"]),
"region": str(BUILDSPEC["region"]),
"framework": str(BUILDSPEC["framework"]),
"version": str(BUILDSPEC["version"]),
"root": str(image_config["root"]),
"name": str(image_name),
"device_type": str(image_config["device_type"]),
"python_version": str(image_config["python_version"]),
"image_type": str(image_config["image_type"]),
"image_size_baseline": int(image_config["image_size_baseline"]),
"base_image_uri": base_image_uri,
"enable_test_promotion": image_config.get("enable_test_promotion", True),
"labels": labels,
"extra_build_args": extra_build_args,
"cx_type": cx_type,
"release_image_uri": prod_repo_uri,
"buildspec_path": buildspec,
}
# Create pre_push stage docker object
pre_push_stage_image_object = DockerImage(
info=info,
dockerfile=dockerfile,
repository=image_repo_uri,
tag=append_tag(image_tag, "pre-push"),
to_build=image_config["build"],
stage=constants.PRE_PUSH_STAGE,
context=context,
additional_tags=additional_image_tags,
target=target,
)
##### Create Common stage docker object #####
# If for a pre_push stage image we create a common stage image, then we do not push the pre_push stage image
# to the repository. Instead, we just push its common stage image to the repository. Therefore,
# inside function get_common_stage_image_object we make pre_push_stage_image_object non pushable.
common_stage_image_object = generate_common_stage_image_object(
pre_push_stage_image_object, image_tag
)
COMMON_STAGE_IMAGES.append(common_stage_image_object)
PRE_PUSH_STAGE_IMAGES.append(pre_push_stage_image_object)
FORMATTER.separator()
if is_autopatch_build_enabled(buildspec_path=buildspec) and is_build_enabled():
FORMATTER.banner("APATCH-PREP")
patch_helper.initiate_multithreaded_autopatch_prep(
PRE_PUSH_STAGE_IMAGES, make_dummy_boto_client=True
)
FORMATTER.banner("DLC")
# Parent images do not inherit from any containers built in this job
# Child images use one of the parent images as their base image
parent_images = [image for image in PRE_PUSH_STAGE_IMAGES if not image.is_child_image]
child_images = [image for image in PRE_PUSH_STAGE_IMAGES if image.is_child_image]
ALL_IMAGES = PRE_PUSH_STAGE_IMAGES + COMMON_STAGE_IMAGES
IMAGES_TO_PUSH = [image for image in ALL_IMAGES if image.to_push and image.to_build]
pushed_images = []
pushed_images += process_images(parent_images, "Parent/Independent", buildspec_path=buildspec)
pushed_images += process_images(child_images, "Child/Dependent", buildspec_path=buildspec)
assert all(
image in pushed_images for image in IMAGES_TO_PUSH
), "Few images could not be pushed."
# After the build, display logs/summary for all the images.
FORMATTER.banner("Summary")
show_build_info(ALL_IMAGES)
FORMATTER.banner("Errors")
is_any_build_failed, is_any_build_failed_size_limit = show_build_errors(ALL_IMAGES)
# From all images, filter the images that were supposed to be built and upload their metrics
BUILT_IMAGES = [image for image in ALL_IMAGES if image.to_build]
if BUILT_IMAGES:
FORMATTER.banner("Upload Metrics")
upload_metrics(BUILT_IMAGES, BUILDSPEC, is_any_build_failed, is_any_build_failed_size_limit)
# Set environment variables to be consumed by test jobs
test_trigger_job = get_codebuild_project_name()
# Tests should only run on images that were pushed to the repository
images_to_test = IMAGES_TO_PUSH
if not is_build_enabled():
# Ensure we have images populated if do_build is false, so that tests can proceed if needed
images_to_test = [image for image in ALL_IMAGES if image.to_push]
if images_to_test:
FORMATTER.banner("Test Env")
utils.set_test_env(
images_to_test,
use_latest_additional_tag=True,
BUILD_CONTEXT=os.getenv("BUILD_CONTEXT"),
TEST_TRIGGER=test_trigger_job,
)