build/ci_matrix.py (132 lines of code) (raw):
# Copyright (c) 2020 The Neuropod Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This script lets us define a CI build matrix in one place
import itertools
# Template for .github/workflows/mac_ci.yml
GH_ACTIONS_TEMPLATE = """
#
# DO NOT MANUALLY EDIT THIS FILE
# AUTOGENERATED BY build/ci_matrix.py
#
name: Mac CI
on:
push:
branches:
- master
tags:
- v*
pull_request:
jobs:
build:
runs-on: macos-10.15
name: TF ${{{{ matrix.tf }}}}, Torch ${{{{ matrix.torch }}}}, Python ${{{{ matrix.python }}}}
env:
NEUROPOD_TENSORFLOW_VERSION: ${{{{ matrix.tf }}}}
NEUROPOD_TORCH_VERSION: ${{{{ matrix.torch }}}}
NEUROPOD_PYTHON_VERSION: ${{{{ matrix.python }}}}
steps:
# Checkout the repo
- uses: actions/checkout@v2
# Setup bazel cache
- name: Bazel Cache
uses: actions/cache@v2
with:
path: ~/bazel_cache
key: ${{{{ runner.os }}}}-bazel-cache-tf-${{{{ matrix.tf }}}}-torch-${{{{ matrix.torch }}}}-python-${{{{ matrix.python }}}}
# Build and test
- name: Build and Test
run: ./build/ci/gh_actions_build.sh
env:
NEUROPOD_TEST_FRAMEWORKS: ${{{{ matrix.test_frameworks }}}}
GH_UPLOAD_TOKEN: ${{{{ secrets.GITHUB_TOKEN }}}}
strategy:
matrix:
include:
{}
"""
# Template for docker-compose.test.yml
DOCKER_COMPOSE_TEST_TEMPLATE = """
#
# DO NOT MANUALLY EDIT THIS FILE
# AUTOGENERATED BY build/ci_matrix.py
#
version: '2.3'
services:
test-base:
build:
context: .
dockerfile: build/neuropod.dockerfile
target: neuropod-base
test-gpu:
extends: test-base
build:
args:
NEUROPOD_IS_GPU: "true"
runtime: nvidia
{}
"""
# Template for .buildkite/pipeline.yml
BUILDKITE_YML_TEMPLATE = """
#
# DO NOT MANUALLY EDIT THIS FILE
# AUTOGENERATED BY build/ci_matrix.py
#
steps:
{}
"""
# The platforms we're testing on
PLATFORMS = [
"linux_cpu",
"linux_gpu",
"macos_cpu",
]
# Versions of frameworks to test with
FRAMEWORK_VERSIONS = [
{"cuda": "10.0", "tensorflow": "1.14.0", "torch": "1.3.0", "python": "3.6"},
{"cuda": "10.0", "tensorflow": "1.15.0", "torch": "1.4.0", "python": "3.7"},
{"cuda": "10.1", "tensorflow": "2.2.0", "torch": "1.5.0", "python": "3.8"},
# No need to rerun tensorflow tests for 2.2.0 on py3.8
{"cuda": "10.1", "tensorflow": "2.2.0", "torch": "1.6.0", "python": "3.8", "test_frameworks": "torchscript,python"},
{"cuda": "10.1", "tensorflow": "2.2.0", "torch": "1.7.0", "python": "3.8", "test_frameworks": "torchscript,python"},
{"cuda": "10.2", "tensorflow": "2.2.0", "torch": "1.8.1", "python": "3.8", "test_frameworks": "torchscript,python"},
{"cuda": "10.2", "tensorflow": "2.2.0", "torch": "1.9.0", "python": "3.8", "test_frameworks": "torchscript,python"},
{"cuda": "10.2", "tensorflow": "2.2.0", "torch": "1.10.2", "python": "3.8", "test_frameworks": "torchscript,python"},
# Only testing TF
{"cuda": "11.2.1", "cudnn": "8", "tensorflow": "2.5.0", "torch": "1.7.0", "python": "3.8", "test_frameworks": "tensorflow"},
{"cuda": "11.2.1", "cudnn": "8", "tensorflow": "2.6.2", "torch": "1.7.0", "python": "3.8", "test_frameworks": "tensorflow"},
]
gh_actions_matrix = []
docker_compose_matrix = []
buildkite_yml_matrix = []
added_lint = False
for platform, framework_version in itertools.product(PLATFORMS, FRAMEWORK_VERSIONS):
# Get versions of all the dependencies
tf_version = framework_version["tensorflow"]
torch_version = framework_version["torch"]
py_version = framework_version["python"]
# Which frameworks we want to run tests for
# TODO(vip): do this better
test_frameworks = framework_version["test_frameworks"] if "test_frameworks" in framework_version else "tensorflow,torchscript,python"
# Generate the appropriate configuration
if "macos" in platform:
# This is a GH Actions build
gh_actions_matrix.extend([
" - tf: {}\n".format(tf_version),
" torch: {}\n".format(torch_version),
" python: {}\n".format(py_version),
" test_frameworks: {}\n".format(test_frameworks),
"\n",
])
elif "linux" in platform:
is_gpu = "gpu" in platform
variant_name = "test-{}-variant-tf-{}-torch-{}-py{}".format("gpu" if is_gpu else "cpu", tf_version, torch_version, py_version).replace(".", "_")
docker_compose_matrix.extend([
" {}:\n".format(variant_name),
" extends: test-{}\n".format("gpu" if is_gpu else "base"),
" build:\n",
" args:\n",
" NEUROPOD_CUDA_VERSION: {}\n".format(framework_version["cuda"]) if is_gpu else "",
" NEUROPOD_CUDNN_VERSION: {}\n".format(framework_version["cudnn"]) if is_gpu and "cudnn" in framework_version else "",
" NEUROPOD_TENSORFLOW_VERSION: {}\n".format(tf_version),
" NEUROPOD_TORCH_VERSION: {}\n".format(torch_version),
" NEUROPOD_PYTHON_VERSION: {}\n".format(py_version),
"\n",
])
plugin_config = [
" plugins:\n",
" - docker-compose#v3.7.0:\n",
" build: {}\n".format(variant_name),
" config: docker-compose.test.yml\n",
" image-repository: 027047743804.dkr.ecr.us-east-2.amazonaws.com/uber\n",
" cache-from: {}:027047743804.dkr.ecr.us-east-2.amazonaws.com/uber:{}\n".format(variant_name, variant_name),
" push-retries: 5\n",
" - docker-compose#v3.7.0:\n",
" push: {}:027047743804.dkr.ecr.us-east-2.amazonaws.com/uber:{}\n".format(variant_name, variant_name),
" config: docker-compose.test.yml\n",
" - docker-compose#v3.7.0:\n",
" run: {}\n".format(variant_name),
" config: docker-compose.test.yml\n",
" env:\n",
" - NEUROPOD_CACHE_ACCESS_KEY\n",
" - NEUROPOD_CACHE_ACCESS_SECRET\n",
" - BUILDKITE\n",
" - BUILDKITE_BRANCH\n",
" - BUILDKITE_BUILD_NUMBER\n",
" - BUILDKITE_BUILD_URL\n",
" - BUILDKITE_COMMIT\n",
" - BUILDKITE_JOB_ID\n",
" - BUILDKITE_PROJECT_SLUG\n",
" - BUILDKITE_PULL_REQUEST\n",
" - BUILDKITE_TAG\n",
" - CI\n",
" - CODECOV_TOKEN\n",
" - GH_STATUS_TOKEN\n",
" - GH_UPLOAD_TOKEN\n",
" - NEUROPOD_TEST_FRAMEWORKS\n",
" - WEB_DEPLOY_KEY\n",
" retry:\n",
" automatic: true\n",
"\n",
]
buildkite_yml_matrix.extend([
" - label: \":docker: {} Tests ({})\"\n".format("GPU" if is_gpu else "CPU", variant_name),
" timeout_in_minutes: 60\n",
" agents:\n",
" queue: {}\n".format("public-gpu" if is_gpu else "public-gpu"), # Temporarily run everything on `public-gpu`
" env:\n",
" NEUROPOD_TEST_FRAMEWORKS: {}\n".format(test_frameworks),
" command: build/ci/{}.sh\n".format("buildkite_build_gpu" if is_gpu else "buildkite_build"),
] + plugin_config)
if not is_gpu and not added_lint:
# Add a lint job to our build matrix
added_lint = True
buildkite_yml_matrix.extend([
" - label: \":docker: Lint + Docs\"\n".format(variant_name),
" timeout_in_minutes: 60\n",
" agents:\n",
" queue: public-gpu\n", # Temporarily run everything on `public-gpu`
" command: build/ci/buildkite_lint.sh\n",
] + plugin_config)
# Use the templates to create the complete config files
GH_ACTIONS_YML = GH_ACTIONS_TEMPLATE.format("".join(gh_actions_matrix))
DOCKER_COMPOSE_TEST = DOCKER_COMPOSE_TEST_TEMPLATE.format("".join(docker_compose_matrix))
BUILDKITE_YML = BUILDKITE_YML_TEMPLATE.format("".join(buildkite_yml_matrix))
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--write-config', help=('Actually write all the CI config files (e.g. pipeline.yml, mac_ci.yml, docker-compose.test.yml).'
'Otherwise, just verify that the files match the build matrix defined here'
'Default False.'), default=False, action='store_true')
args = parser.parse_args()
files = {
'./docker-compose.test.yml': DOCKER_COMPOSE_TEST,
'./.github/workflows/mac_ci.yml': GH_ACTIONS_YML,
'./.buildkite/pipeline.yml': BUILDKITE_YML,
}
if args.write_config:
for path, content in files.items():
with open(path, 'w') as f:
f.write(content)
else:
# Just verify that everything matches
for path, target_content in files.items():
with open(path, 'r') as f:
content = f.read()
if content != target_content:
raise ValueError("{} does not match current build matrix! Please run `./build/ci_matrix.py --write-config` to fix.".format(path))