ci/detect-changes.py (286 lines of code) (raw):

# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. from __future__ import print_function import functools import os import pprint import re import sys import subprocess perr = functools.partial(print, file=sys.stderr) def dump_env_vars(prefix, pattern=None): if pattern is not None: match = lambda s: re.search(pattern, s) else: match = lambda s: True for name in sorted(os.environ): if name.startswith(prefix) and match(name): perr("- {0}: {1!r}".format(name, os.environ[name])) def run_cmd(cmdline): proc = subprocess.Popen( cmdline, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) out, err = proc.communicate() if proc.returncode != 0: raise RuntimeError( "Command {cmdline} failed with code {returncode}, " "stderr was:\n{stderr}\n".format( cmdline=cmdline, returncode=proc.returncode, stderr=err.decode(), ) ) return out def get_commit_description(commit): """ Return the textual description (title + body) of the given git commit. """ out = run_cmd(["git", "show", "--no-patch", "--pretty=format:%B", commit]) return out.decode("utf-8", "ignore") def list_affected_files(commit_range): """ Return a list of files changed by the given git commit range. """ perr("Getting affected files from", repr(commit_range)) out = run_cmd(["git", "diff", "--name-only", commit_range]) return list(filter(None, (s.strip() for s in out.decode().splitlines()))) def get_travis_head_commit(): return os.environ["TRAVIS_COMMIT"] def get_travis_commit_range(): if os.environ["TRAVIS_EVENT_TYPE"] == "pull_request": # TRAVIS_COMMIT_RANGE is too pessimistic for PRs, as it may contain # unrelated changes. Instead, use the same strategy as on AppVeyor # below. run_cmd( [ "git", "fetch", "-q", "origin", "+refs/heads/{0}".format(os.environ["TRAVIS_BRANCH"]), ] ) merge_base = ( run_cmd(["git", "merge-base", "HEAD", "FETCH_HEAD"]) .decode() .strip() ) return "{0}..HEAD".format(merge_base) else: cr = os.environ["TRAVIS_COMMIT_RANGE"] # See # https://github.com/travis-ci/travis-ci/issues/4596#issuecomment-139811122 return cr.replace("...", "..") def get_travis_commit_description(): # Prefer this to get_commit_description(get_travis_head_commit()), # as rebasing or other repository events may make TRAVIS_COMMIT invalid # at the time we inspect it return os.environ["TRAVIS_COMMIT_MESSAGE"] def list_travis_affected_files(): """ Return a list of files affected in the current Travis build. """ commit_range = get_travis_commit_range() try: return list_affected_files(commit_range) except RuntimeError: # TRAVIS_COMMIT_RANGE can contain invalid revisions when # building a branch (not a PR) after rebasing: # https://github.com/travis-ci/travis-ci/issues/2668 if os.environ["TRAVIS_EVENT_TYPE"] == "pull_request": raise # If it's a rebase, it's probably enough to use the last commit only commit_range = "{0}^..".format(get_travis_head_commit()) return list_affected_files(commit_range) def list_appveyor_affected_files(): """ Return a list of files affected in the current AppVeyor build. This only works for PR builds. """ # Re-fetch PR base branch (e.g. origin/master), pointing FETCH_HEAD to it run_cmd( [ "git", "fetch", "-q", "origin", "+refs/heads/{0}".format(os.environ["APPVEYOR_REPO_BRANCH"]), ] ) # Compute base changeset between FETCH_HEAD (PR base) and HEAD (PR head) merge_base = ( run_cmd(["git", "merge-base", "HEAD", "FETCH_HEAD"]).decode().strip() ) # Compute changes files between base changeset and HEAD return list_affected_files("{0}..HEAD".format(merge_base)) def list_github_actions_affected_files(): """ Return a list of files affected in the current GitHub Actions build. """ # GitHub Actions checkout `refs/remotes/pull/$PR/merge` where `HEAD` points # to the merge commit while `HEAD^` points to the commit before. Hence, # `..HEAD^` points to all commit between master and the PR. return list_affected_files("HEAD^..") LANGUAGE_TOPICS = [ "c_glib", "cpp", "docs", "go", "java", "js", "python", "r", "ruby", "rust", "csharp", ] ALL_TOPICS = LANGUAGE_TOPICS + ["integration", "dev"] AFFECTED_DEPENDENCIES = { "java": ["integration", "python"], "js": ["integration"], "ci": ALL_TOPICS, "cpp": ["python", "c_glib", "r", "ruby", "integration"], "format": LANGUAGE_TOPICS, "go": ["integration"], ".travis.yml": ALL_TOPICS, "appveyor.yml": ALL_TOPICS, # In theory, it should ignore CONTRIBUTING.md and ISSUE_TEMPLATE.md, but in # practice it's going to be CI ".github": ALL_TOPICS, "c_glib": ["ruby"], } COMPONENTS = { "cpp", "java", "c_glib", "r", "ruby", "integration", "js", "rust", "csharp", "go", "docs", "python", "dev", } def get_affected_topics(affected_files): """ Return a dict of topics affected by the given files. Each dict value is True if affected, False otherwise. """ affected = dict.fromkeys(ALL_TOPICS, False) for path in affected_files: parts = [] head = path while head: head, tail = os.path.split(head) parts.append(tail) parts.reverse() assert parts p = parts[0] fn = parts[-1] if fn.startswith("README"): continue if p in COMPONENTS: affected[p] = True _path_already_affected = {} def _affect_dependencies(component): if component in _path_already_affected: # For circular dependencies, terminate return for topic in AFFECTED_DEPENDENCIES.get(component, ()): affected[topic] = True _affect_dependencies(topic) _path_already_affected[topic] = True _affect_dependencies(p) return affected def make_env_for_topics(affected): return { "ARROW_CI_{0}_AFFECTED".format(k.upper()): "1" if v else "0" for k, v in affected.items() } def get_unix_shell_eval(env): """ Return a shell-evalable string to setup some environment variables. """ return "; ".join(("export {0}='{1}'".format(k, v) for k, v in env.items())) def get_windows_shell_eval(env): """ Return a shell-evalable string to setup some environment variables. """ return "\n".join(('set "{0}={1}"'.format(k, v) for k, v in env.items())) def run_from_travis(): perr("Environment variables (excerpt):") dump_env_vars("TRAVIS_", "(BRANCH|COMMIT|PULL)") if ( os.environ["TRAVIS_REPO_SLUG"] == "apache/arrow" and os.environ["TRAVIS_BRANCH"] == "master" and os.environ["TRAVIS_EVENT_TYPE"] != "pull_request" ): # Never skip anything on master builds in the official repository affected = dict.fromkeys(ALL_TOPICS, True) else: desc = get_travis_commit_description() if "[skip travis]" in desc: # Skip everything affected = dict.fromkeys(ALL_TOPICS, False) elif "[force ci]" in desc or "[force travis]" in desc: # Test everything affected = dict.fromkeys(ALL_TOPICS, True) else: # Test affected topics affected_files = list_travis_affected_files() perr("Affected files:", affected_files) affected = get_affected_topics(affected_files) assert set(affected) <= set(ALL_TOPICS), affected perr("Affected topics:") perr(pprint.pformat(affected)) return get_unix_shell_eval(make_env_for_topics(affected)) def run_from_appveyor(): perr("Environment variables (excerpt):") dump_env_vars("APPVEYOR_", "(PULL|REPO)") if not os.environ.get("APPVEYOR_PULL_REQUEST_HEAD_COMMIT"): # Not a PR build, test everything affected = dict.fromkeys(ALL_TOPICS, True) else: affected_files = list_appveyor_affected_files() perr("Affected files:", affected_files) affected = get_affected_topics(affected_files) assert set(affected) <= set(ALL_TOPICS), affected perr("Affected topics:") perr(pprint.pformat(affected)) return get_windows_shell_eval(make_env_for_topics(affected)) def run_from_github(): perr("Environment variables (excerpt):") dump_env_vars( "GITHUB_", "(REPOSITORY|ACTOR|SHA|REF|HEAD_REF|BASE_REF|EVENT_NAME)" ) if os.environ["GITHUB_EVENT_NAME"] != "pull_request": # Not a PR build, test everything affected = dict.fromkeys(ALL_TOPICS, True) else: affected_files = list_github_actions_affected_files() perr("Affected files:", affected_files) affected = get_affected_topics(affected_files) assert set(affected) <= set(ALL_TOPICS), affected perr("Affected topics:") perr(pprint.pformat(affected)) return get_unix_shell_eval(make_env_for_topics(affected)) def test_get_affected_topics(): affected_topics = get_affected_topics(["cpp/CMakeLists.txt"]) assert affected_topics == { "c_glib": True, "cpp": True, "docs": False, "go": False, "java": False, "js": False, "python": True, "r": True, "ruby": True, "rust": False, "csharp": False, "integration": True, "dev": False, } affected_topics = get_affected_topics(["format/Schema.fbs"]) assert affected_topics == { "c_glib": True, "cpp": True, "docs": True, "go": True, "java": True, "js": True, "python": True, "r": True, "ruby": True, "rust": True, "csharp": True, "integration": True, "dev": False, } affected_topics = get_affected_topics([".github/workflows"]) assert affected_topics == { "c_glib": True, "cpp": True, "docs": True, "go": True, "java": True, "js": True, "python": True, "r": True, "ruby": True, "rust": True, "csharp": True, "integration": True, "dev": True, } if __name__ == "__main__": # This script should have its output evaluated by a shell, # e.g. "eval `python ci/detect-changes.py`" if os.environ.get("TRAVIS"): try: print(run_from_travis()) except Exception: # Make sure the enclosing eval will return an error print("exit 1") raise elif os.environ.get("APPVEYOR"): try: print(run_from_appveyor()) except Exception: print("exit 1") raise elif os.environ.get("GITHUB_WORKFLOW"): try: print(run_from_github()) except Exception: print("exit 1") raise else: sys.exit( "Script must be run under Travis-CI, AppVeyor or GitHub Actions" )