ci/detect-changes.py (286 lines of code) (raw):
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import print_function
import functools
import os
import pprint
import re
import sys
import subprocess
perr = functools.partial(print, file=sys.stderr)
def dump_env_vars(prefix, pattern=None):
if pattern is not None:
match = lambda s: re.search(pattern, s)
else:
match = lambda s: True
for name in sorted(os.environ):
if name.startswith(prefix) and match(name):
perr("- {0}: {1!r}".format(name, os.environ[name]))
def run_cmd(cmdline):
proc = subprocess.Popen(
cmdline, stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
out, err = proc.communicate()
if proc.returncode != 0:
raise RuntimeError(
"Command {cmdline} failed with code {returncode}, "
"stderr was:\n{stderr}\n".format(
cmdline=cmdline,
returncode=proc.returncode,
stderr=err.decode(),
)
)
return out
def get_commit_description(commit):
"""
Return the textual description (title + body) of the given git commit.
"""
out = run_cmd(["git", "show", "--no-patch", "--pretty=format:%B", commit])
return out.decode("utf-8", "ignore")
def list_affected_files(commit_range):
"""
Return a list of files changed by the given git commit range.
"""
perr("Getting affected files from", repr(commit_range))
out = run_cmd(["git", "diff", "--name-only", commit_range])
return list(filter(None, (s.strip() for s in out.decode().splitlines())))
def get_travis_head_commit():
return os.environ["TRAVIS_COMMIT"]
def get_travis_commit_range():
if os.environ["TRAVIS_EVENT_TYPE"] == "pull_request":
# TRAVIS_COMMIT_RANGE is too pessimistic for PRs, as it may contain
# unrelated changes. Instead, use the same strategy as on AppVeyor
# below.
run_cmd(
[
"git",
"fetch",
"-q",
"origin",
"+refs/heads/{0}".format(os.environ["TRAVIS_BRANCH"]),
]
)
merge_base = (
run_cmd(["git", "merge-base", "HEAD", "FETCH_HEAD"])
.decode()
.strip()
)
return "{0}..HEAD".format(merge_base)
else:
cr = os.environ["TRAVIS_COMMIT_RANGE"]
# See
# https://github.com/travis-ci/travis-ci/issues/4596#issuecomment-139811122
return cr.replace("...", "..")
def get_travis_commit_description():
# Prefer this to get_commit_description(get_travis_head_commit()),
# as rebasing or other repository events may make TRAVIS_COMMIT invalid
# at the time we inspect it
return os.environ["TRAVIS_COMMIT_MESSAGE"]
def list_travis_affected_files():
"""
Return a list of files affected in the current Travis build.
"""
commit_range = get_travis_commit_range()
try:
return list_affected_files(commit_range)
except RuntimeError:
# TRAVIS_COMMIT_RANGE can contain invalid revisions when
# building a branch (not a PR) after rebasing:
# https://github.com/travis-ci/travis-ci/issues/2668
if os.environ["TRAVIS_EVENT_TYPE"] == "pull_request":
raise
# If it's a rebase, it's probably enough to use the last commit only
commit_range = "{0}^..".format(get_travis_head_commit())
return list_affected_files(commit_range)
def list_appveyor_affected_files():
"""
Return a list of files affected in the current AppVeyor build.
This only works for PR builds.
"""
# Re-fetch PR base branch (e.g. origin/master), pointing FETCH_HEAD to it
run_cmd(
[
"git",
"fetch",
"-q",
"origin",
"+refs/heads/{0}".format(os.environ["APPVEYOR_REPO_BRANCH"]),
]
)
# Compute base changeset between FETCH_HEAD (PR base) and HEAD (PR head)
merge_base = (
run_cmd(["git", "merge-base", "HEAD", "FETCH_HEAD"]).decode().strip()
)
# Compute changes files between base changeset and HEAD
return list_affected_files("{0}..HEAD".format(merge_base))
def list_github_actions_affected_files():
"""
Return a list of files affected in the current GitHub Actions build.
"""
# GitHub Actions checkout `refs/remotes/pull/$PR/merge` where `HEAD` points
# to the merge commit while `HEAD^` points to the commit before. Hence,
# `..HEAD^` points to all commit between master and the PR.
return list_affected_files("HEAD^..")
LANGUAGE_TOPICS = [
"c_glib",
"cpp",
"docs",
"go",
"java",
"js",
"python",
"r",
"ruby",
"rust",
"csharp",
]
ALL_TOPICS = LANGUAGE_TOPICS + ["integration", "dev"]
AFFECTED_DEPENDENCIES = {
"java": ["integration", "python"],
"js": ["integration"],
"ci": ALL_TOPICS,
"cpp": ["python", "c_glib", "r", "ruby", "integration"],
"format": LANGUAGE_TOPICS,
"go": ["integration"],
".travis.yml": ALL_TOPICS,
"appveyor.yml": ALL_TOPICS,
# In theory, it should ignore CONTRIBUTING.md and ISSUE_TEMPLATE.md, but in
# practice it's going to be CI
".github": ALL_TOPICS,
"c_glib": ["ruby"],
}
COMPONENTS = {
"cpp",
"java",
"c_glib",
"r",
"ruby",
"integration",
"js",
"rust",
"csharp",
"go",
"docs",
"python",
"dev",
}
def get_affected_topics(affected_files):
"""
Return a dict of topics affected by the given files.
Each dict value is True if affected, False otherwise.
"""
affected = dict.fromkeys(ALL_TOPICS, False)
for path in affected_files:
parts = []
head = path
while head:
head, tail = os.path.split(head)
parts.append(tail)
parts.reverse()
assert parts
p = parts[0]
fn = parts[-1]
if fn.startswith("README"):
continue
if p in COMPONENTS:
affected[p] = True
_path_already_affected = {}
def _affect_dependencies(component):
if component in _path_already_affected:
# For circular dependencies, terminate
return
for topic in AFFECTED_DEPENDENCIES.get(component, ()):
affected[topic] = True
_affect_dependencies(topic)
_path_already_affected[topic] = True
_affect_dependencies(p)
return affected
def make_env_for_topics(affected):
return {
"ARROW_CI_{0}_AFFECTED".format(k.upper()): "1" if v else "0"
for k, v in affected.items()
}
def get_unix_shell_eval(env):
"""
Return a shell-evalable string to setup some environment variables.
"""
return "; ".join(("export {0}='{1}'".format(k, v) for k, v in env.items()))
def get_windows_shell_eval(env):
"""
Return a shell-evalable string to setup some environment variables.
"""
return "\n".join(('set "{0}={1}"'.format(k, v) for k, v in env.items()))
def run_from_travis():
perr("Environment variables (excerpt):")
dump_env_vars("TRAVIS_", "(BRANCH|COMMIT|PULL)")
if (
os.environ["TRAVIS_REPO_SLUG"] == "apache/arrow"
and os.environ["TRAVIS_BRANCH"] == "master"
and os.environ["TRAVIS_EVENT_TYPE"] != "pull_request"
):
# Never skip anything on master builds in the official repository
affected = dict.fromkeys(ALL_TOPICS, True)
else:
desc = get_travis_commit_description()
if "[skip travis]" in desc:
# Skip everything
affected = dict.fromkeys(ALL_TOPICS, False)
elif "[force ci]" in desc or "[force travis]" in desc:
# Test everything
affected = dict.fromkeys(ALL_TOPICS, True)
else:
# Test affected topics
affected_files = list_travis_affected_files()
perr("Affected files:", affected_files)
affected = get_affected_topics(affected_files)
assert set(affected) <= set(ALL_TOPICS), affected
perr("Affected topics:")
perr(pprint.pformat(affected))
return get_unix_shell_eval(make_env_for_topics(affected))
def run_from_appveyor():
perr("Environment variables (excerpt):")
dump_env_vars("APPVEYOR_", "(PULL|REPO)")
if not os.environ.get("APPVEYOR_PULL_REQUEST_HEAD_COMMIT"):
# Not a PR build, test everything
affected = dict.fromkeys(ALL_TOPICS, True)
else:
affected_files = list_appveyor_affected_files()
perr("Affected files:", affected_files)
affected = get_affected_topics(affected_files)
assert set(affected) <= set(ALL_TOPICS), affected
perr("Affected topics:")
perr(pprint.pformat(affected))
return get_windows_shell_eval(make_env_for_topics(affected))
def run_from_github():
perr("Environment variables (excerpt):")
dump_env_vars(
"GITHUB_", "(REPOSITORY|ACTOR|SHA|REF|HEAD_REF|BASE_REF|EVENT_NAME)"
)
if os.environ["GITHUB_EVENT_NAME"] != "pull_request":
# Not a PR build, test everything
affected = dict.fromkeys(ALL_TOPICS, True)
else:
affected_files = list_github_actions_affected_files()
perr("Affected files:", affected_files)
affected = get_affected_topics(affected_files)
assert set(affected) <= set(ALL_TOPICS), affected
perr("Affected topics:")
perr(pprint.pformat(affected))
return get_unix_shell_eval(make_env_for_topics(affected))
def test_get_affected_topics():
affected_topics = get_affected_topics(["cpp/CMakeLists.txt"])
assert affected_topics == {
"c_glib": True,
"cpp": True,
"docs": False,
"go": False,
"java": False,
"js": False,
"python": True,
"r": True,
"ruby": True,
"rust": False,
"csharp": False,
"integration": True,
"dev": False,
}
affected_topics = get_affected_topics(["format/Schema.fbs"])
assert affected_topics == {
"c_glib": True,
"cpp": True,
"docs": True,
"go": True,
"java": True,
"js": True,
"python": True,
"r": True,
"ruby": True,
"rust": True,
"csharp": True,
"integration": True,
"dev": False,
}
affected_topics = get_affected_topics([".github/workflows"])
assert affected_topics == {
"c_glib": True,
"cpp": True,
"docs": True,
"go": True,
"java": True,
"js": True,
"python": True,
"r": True,
"ruby": True,
"rust": True,
"csharp": True,
"integration": True,
"dev": True,
}
if __name__ == "__main__":
# This script should have its output evaluated by a shell,
# e.g. "eval `python ci/detect-changes.py`"
if os.environ.get("TRAVIS"):
try:
print(run_from_travis())
except Exception:
# Make sure the enclosing eval will return an error
print("exit 1")
raise
elif os.environ.get("APPVEYOR"):
try:
print(run_from_appveyor())
except Exception:
print("exit 1")
raise
elif os.environ.get("GITHUB_WORKFLOW"):
try:
print(run_from_github())
except Exception:
print("exit 1")
raise
else:
sys.exit(
"Script must be run under Travis-CI, AppVeyor or GitHub Actions"
)