cloudbuild-merge.yaml (77 lines of code) (raw):

# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. substitutions: _HEALTH_CHECK_REPOSITORY_BASE: us-docker.pkg.dev/gce-ai-infra/health-check steps: - name: 'alpine' entrypoint: sh args: - -c - | #!/bin/bash set -e _VERSION=$(cat deploy/helm/health_runner/version.txt) [ -n "$$_VERSION" ] || { echo "ERROR: _VERSION is empty or not set"; exit 1; }; echo "Version found to be: $$_VERSION" REF_NAME="$$_VERSION" id: get-version # Build and tag healthchecks - name: 'gcr.io/cloud-builders/docker' entrypoint: bash args: - -c - | #!/bin/bash set -e health_check_names=("gpu-healthcheck" "nccl-healthcheck" "neper-healthcheck" "straggler-healthcheck" "tinymax-healthcheck") machine_types=("a3-highgpu-8g" "a3-megagpu-8g" "a3-ultragpu-8g" "a4-highgpu-8g") for health_check_name in "${health_check_names[@]}"; do for machine_type in "${machine_types[@]}"; do echo "Building ${health_check_name} for ${machine_type} with version $REF_NAME..." if [[ "$machine_type" == "a3-ultragpu-8g" && "$health_check_name" == "nccl-healthcheck" ]]; then dockerfile="docker/nccl_healthcheck_a3ultra.Dockerfile" else dockerfile="docker/${health_check_name//-/_}.Dockerfile" fi latest_tag="$_HEALTH_CHECK_REPOSITORY_BASE/$${health_check_name}:$${machine_type}_$REF_NAME" echo "Building with tag: ${latest_tag} and Dockerfile: ${dockerfile}" docker build -t "${latest_tag}" -f "${dockerfile}" . echo "Successfully built image: ${latest_tag}" done done id: build-healthchecks waitFor: ['get-version'] # Build health-runner - name: 'gcr.io/cloud-builders/docker' entrypoint: bash args: - -c - | #!/bin/bash set -e _BUILD_TARGET="health_runner" latest_tag="$_HEALTH_CHECK_REPOSITORY_BASE/health-runner:$REF_NAME" echo "Building $$_BUILD_TARGET with version $REF_NAME..." docker build -t "${latest_tag}" -f "docker/$$_BUILD_TARGET.Dockerfile" . echo "Successfully built image: ${latest_tag}" id: build-health-runner waitFor: ['get-version'] - name: 'us-docker.pkg.dev/scaevola-builder-integration/release/scanvola/scanvola' args: ['--image=$_HEALTH_CHECK_REPOSITORY_BASE/health-runner:$REF_NAME'] options: # See: https://cloud.google.com/build/docs/securing-builds/view-build-provenance#req-metadata requestedVerifyOption: VERIFIED # List ALL images to be pushed by Cloud Build after successful builds images: # Healthchecks (5 checks * 4 machine types = 20 images) - '$_HEALTH_CHECK_REPOSITORY_BASE/gpu-healthcheck:a3-highgpu-8g_$REF_NAME' - '$_HEALTH_CHECK_REPOSITORY_BASE/gpu-healthcheck:a3-megagpu-8g_$REF_NAME' - '$_HEALTH_CHECK_REPOSITORY_BASE/gpu-healthcheck:a3-ultragpu-8g_$REF_NAME' - '$_HEALTH_CHECK_REPOSITORY_BASE/gpu-healthcheck:a4-highgpu-8g_$REF_NAME' - '$_HEALTH_CHECK_REPOSITORY_BASE/nccl-healthcheck:a3-highgpu-8g_$REF_NAME' - '$_HEALTH_CHECK_REPOSITORY_BASE/nccl-healthcheck:a3-megagpu-8g_$REF_NAME' - '$_HEALTH_CHECK_REPOSITORY_BASE/nccl-healthcheck:a3-ultragpu-8g_$REF_NAME' - '$_HEALTH_CHECK_REPOSITORY_BASE/nccl-healthcheck:a4-highgpu-8g_$REF_NAME' - '$_HEALTH_CHECK_REPOSITORY_BASE/neper-healthcheck:a3-highgpu-8g_$REF_NAME' - '$_HEALTH_CHECK_REPOSITORY_BASE/neper-healthcheck:a3-megagpu-8g_$REF_NAME' - '$_HEALTH_CHECK_REPOSITORY_BASE/neper-healthcheck:a3-ultragpu-8g_$REF_NAME' - '$_HEALTH_CHECK_REPOSITORY_BASE/neper-healthcheck:a4-highgpu-8g_$REF_NAME' - '$_HEALTH_CHECK_REPOSITORY_BASE/straggler-healthcheck:a3-highgpu-8g_$REF_NAME' - '$_HEALTH_CHECK_REPOSITORY_BASE/straggler-healthcheck:a3-megagpu-8g_$REF_NAME' - '$_HEALTH_CHECK_REPOSITORY_BASE/straggler-healthcheck:a3-ultragpu-8g_$REF_NAME' - '$_HEALTH_CHECK_REPOSITORY_BASE/straggler-healthcheck:a4-highgpu-8g_$REF_NAME' - '$_HEALTH_CHECK_REPOSITORY_BASE/tinymax-healthcheck:a3-highgpu-8g_$REF_NAME' - '$_HEALTH_CHECK_REPOSITORY_BASE/tinymax-healthcheck:a3-megagpu-8g_$REF_NAME' - '$_HEALTH_CHECK_REPOSITORY_BASE/tinymax-healthcheck:a3-ultragpu-8g_$REF_NAME' - '$_HEALTH_CHECK_REPOSITORY_BASE/tinymax-healthcheck:a4-highgpu-8g_$REF_NAME' # Health-runner (1 image) - '$_HEALTH_CHECK_REPOSITORY_BASE/health-runner:$REF_NAME'