You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2020/05/20 12:42:39 UTC

[arrow] branch master updated: ARROW-8872: [CI] Restore ci/detect-changes.py

This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new fd9bf16  ARROW-8872: [CI] Restore ci/detect-changes.py
fd9bf16 is described below

commit fd9bf1630822188bf8a4b784d12c12f05e04e4c0
Author: Antoine Pitrou <an...@python.org>
AuthorDate: Wed May 20 14:42:11 2020 +0200

    ARROW-8872: [CI] Restore ci/detect-changes.py
    
    This is needed for Travis-CI builds to function properly.
    The file was mistakingly removed in db1c49a77f78152d442299082f6a663e59f91174, part of PR #7080 (fixing ARROW-8662).
    
    Closes #7234 from pitrou/ARROW-8872-ci-detect-changes
    
    Authored-by: Antoine Pitrou <an...@python.org>
    Signed-off-by: Antoine Pitrou <an...@python.org>
---
 ci/detect-changes.py | 365 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 365 insertions(+)

diff --git a/ci/detect-changes.py b/ci/detect-changes.py
new file mode 100644
index 0000000..c32f6e0
--- /dev/null
+++ b/ci/detect-changes.py
@@ -0,0 +1,365 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import print_function
+
+import functools
+import os
+import pprint
+import re
+import sys
+import subprocess
+
+
+perr = functools.partial(print, file=sys.stderr)
+
+
+def dump_env_vars(prefix, pattern=None):
+    if pattern is not None:
+        match = lambda s: re.search(pattern, s)
+    else:
+        match = lambda s: True
+    for name in sorted(os.environ):
+        if name.startswith(prefix) and match(name):
+            perr("- {0}: {1!r}".format(name, os.environ[name]))
+
+
+def run_cmd(cmdline):
+    proc = subprocess.Popen(cmdline,
+                            stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    out, err = proc.communicate()
+    if proc.returncode != 0:
+        raise RuntimeError("Command {cmdline} failed with code {returncode}, "
+                           "stderr was:\n{stderr}\n"
+                           .format(cmdline=cmdline, returncode=proc.returncode,
+                                   stderr=err.decode()))
+    return out
+
+
+def get_commit_description(commit):
+    """
+    Return the textual description (title + body) of the given git commit.
+    """
+    out = run_cmd(["git", "show", "--no-patch", "--pretty=format:%B",
+                   commit])
+    return out.decode('utf-8', 'ignore')
+
+
+def list_affected_files(commit_range):
+    """
+    Return a list of files changed by the given git commit range.
+    """
+    perr("Getting affected files from", repr(commit_range))
+    out = run_cmd(["git", "diff", "--name-only", commit_range])
+    return list(filter(None, (s.strip() for s in out.decode().splitlines())))
+
+
+def get_travis_head_commit():
+    return os.environ['TRAVIS_COMMIT']
+
+
+def get_travis_commit_range():
+    if os.environ['TRAVIS_EVENT_TYPE'] == 'pull_request':
+        # TRAVIS_COMMIT_RANGE is too pessimistic for PRs, as it may contain
+        # unrelated changes.  Instead, use the same strategy as on AppVeyor
+        # below.
+        run_cmd(["git", "fetch", "-q", "origin",
+                 "+refs/heads/{0}".format(os.environ['TRAVIS_BRANCH'])])
+        merge_base = run_cmd(["git", "merge-base",
+                              "HEAD", "FETCH_HEAD"]).decode().strip()
+        return "{0}..HEAD".format(merge_base)
+    else:
+        cr = os.environ['TRAVIS_COMMIT_RANGE']
+        # See
+        # https://github.com/travis-ci/travis-ci/issues/4596#issuecomment-139811122
+        return cr.replace('...', '..')
+
+
+def get_travis_commit_description():
+    # Prefer this to get_commit_description(get_travis_head_commit()),
+    # as rebasing or other repository events may make TRAVIS_COMMIT invalid
+    # at the time we inspect it
+    return os.environ['TRAVIS_COMMIT_MESSAGE']
+
+
+def list_travis_affected_files():
+    """
+    Return a list of files affected in the current Travis build.
+    """
+    commit_range = get_travis_commit_range()
+    try:
+        return list_affected_files(commit_range)
+    except RuntimeError:
+        # TRAVIS_COMMIT_RANGE can contain invalid revisions when
+        # building a branch (not a PR) after rebasing:
+        # https://github.com/travis-ci/travis-ci/issues/2668
+        if os.environ['TRAVIS_EVENT_TYPE'] == 'pull_request':
+            raise
+        # If it's a rebase, it's probably enough to use the last commit only
+        commit_range = '{0}^..'.format(get_travis_head_commit())
+        return list_affected_files(commit_range)
+
+
+def list_appveyor_affected_files():
+    """
+    Return a list of files affected in the current AppVeyor build.
+    This only works for PR builds.
+    """
+    # Re-fetch PR base branch (e.g. origin/master), pointing FETCH_HEAD to it
+    run_cmd(["git", "fetch", "-q", "origin",
+             "+refs/heads/{0}".format(os.environ['APPVEYOR_REPO_BRANCH'])])
+    # Compute base changeset between FETCH_HEAD (PR base) and HEAD (PR head)
+    merge_base = run_cmd(["git", "merge-base",
+                          "HEAD", "FETCH_HEAD"]).decode().strip()
+    # Compute changes files between base changeset and HEAD
+    return list_affected_files("{0}..HEAD".format(merge_base))
+
+
+def list_github_actions_affected_files():
+    """
+    Return a list of files affected in the current GitHub Actions build.
+    """
+    # GitHub Actions checkout `refs/remotes/pull/$PR/merge` where `HEAD` points
+    # to the merge commit while `HEAD^` points to the commit before. Hence,
+    # `..HEAD^` points to all commit between master and the PR.
+    return list_affected_files("HEAD^..")
+
+
+LANGUAGE_TOPICS = ['c_glib', 'cpp', 'docs', 'go', 'java', 'js', 'python',
+                   'r', 'ruby', 'rust', 'csharp']
+
+ALL_TOPICS = LANGUAGE_TOPICS + ['integration', 'dev']
+
+
+AFFECTED_DEPENDENCIES = {
+    'java': ['integration', 'python'],
+    'js': ['integration'],
+    'ci': ALL_TOPICS,
+    'cpp': ['python', 'c_glib', 'r', 'ruby', 'integration'],
+    'format': LANGUAGE_TOPICS,
+    'go': ['integration'],
+    '.travis.yml': ALL_TOPICS,
+    'appveyor.yml': ALL_TOPICS,
+    # In theory, it should ignore CONTRIBUTING.md and ISSUE_TEMPLATE.md, but in
+    # practice it's going to be CI
+    '.github': ALL_TOPICS,
+    'c_glib': ['ruby']
+}
+
+COMPONENTS = {'cpp', 'java', 'c_glib', 'r', 'ruby', 'integration', 'js',
+              'rust', 'csharp', 'go', 'docs', 'python', 'dev'}
+
+
+def get_affected_topics(affected_files):
+    """
+    Return a dict of topics affected by the given files.
+    Each dict value is True if affected, False otherwise.
+    """
+    affected = dict.fromkeys(ALL_TOPICS, False)
+
+    for path in affected_files:
+        parts = []
+        head = path
+        while head:
+            head, tail = os.path.split(head)
+            parts.append(tail)
+        parts.reverse()
+        assert parts
+        p = parts[0]
+        fn = parts[-1]
+        if fn.startswith('README'):
+            continue
+
+        if p in COMPONENTS:
+            affected[p] = True
+
+        _path_already_affected = {}
+
+        def _affect_dependencies(component):
+            if component in _path_already_affected:
+                # For circular dependencies, terminate
+                return
+            for topic in AFFECTED_DEPENDENCIES.get(component, ()):
+                affected[topic] = True
+                _affect_dependencies(topic)
+                _path_already_affected[topic] = True
+
+        _affect_dependencies(p)
+
+    return affected
+
+
+def make_env_for_topics(affected):
+    return {'ARROW_CI_{0}_AFFECTED'.format(k.upper()): '1' if v else '0'
+            for k, v in affected.items()}
+
+
+def get_unix_shell_eval(env):
+    """
+    Return a shell-evalable string to setup some environment variables.
+    """
+    return "; ".join(("export {0}='{1}'".format(k, v)
+                      for k, v in env.items()))
+
+
+def get_windows_shell_eval(env):
+    """
+    Return a shell-evalable string to setup some environment variables.
+    """
+    return "\n".join(('set "{0}={1}"'.format(k, v)
+                      for k, v in env.items()))
+
+
+def run_from_travis():
+    perr("Environment variables (excerpt):")
+    dump_env_vars('TRAVIS_', '(BRANCH|COMMIT|PULL)')
+    if (os.environ['TRAVIS_REPO_SLUG'] == 'apache/arrow' and
+            os.environ['TRAVIS_BRANCH'] == 'master' and
+            os.environ['TRAVIS_EVENT_TYPE'] != 'pull_request'):
+        # Never skip anything on master builds in the official repository
+        affected = dict.fromkeys(ALL_TOPICS, True)
+    else:
+        desc = get_travis_commit_description()
+        if '[skip travis]' in desc:
+            # Skip everything
+            affected = dict.fromkeys(ALL_TOPICS, False)
+        elif '[force ci]' in desc or '[force travis]' in desc:
+            # Test everything
+            affected = dict.fromkeys(ALL_TOPICS, True)
+        else:
+            # Test affected topics
+            affected_files = list_travis_affected_files()
+            perr("Affected files:", affected_files)
+            affected = get_affected_topics(affected_files)
+            assert set(affected) <= set(ALL_TOPICS), affected
+
+    perr("Affected topics:")
+    perr(pprint.pformat(affected))
+    return get_unix_shell_eval(make_env_for_topics(affected))
+
+
+def run_from_appveyor():
+    perr("Environment variables (excerpt):")
+    dump_env_vars('APPVEYOR_', '(PULL|REPO)')
+    if not os.environ.get('APPVEYOR_PULL_REQUEST_HEAD_COMMIT'):
+        # Not a PR build, test everything
+        affected = dict.fromkeys(ALL_TOPICS, True)
+    else:
+        affected_files = list_appveyor_affected_files()
+        perr("Affected files:", affected_files)
+        affected = get_affected_topics(affected_files)
+        assert set(affected) <= set(ALL_TOPICS), affected
+
+    perr("Affected topics:")
+    perr(pprint.pformat(affected))
+    return get_windows_shell_eval(make_env_for_topics(affected))
+
+
+def run_from_github():
+    perr("Environment variables (excerpt):")
+    dump_env_vars('GITHUB_', '(REPOSITORY|ACTOR|SHA|REF|HEAD_REF|BASE_REF|EVENT_NAME)')
+    if os.environ['GITHUB_EVENT_NAME'] != 'pull_request':
+        # Not a PR build, test everything
+        affected = dict.fromkeys(ALL_TOPICS, True)
+    else:
+        affected_files = list_github_actions_affected_files()
+        perr("Affected files:", affected_files)
+        affected = get_affected_topics(affected_files)
+        assert set(affected) <= set(ALL_TOPICS), affected
+
+    perr("Affected topics:")
+    perr(pprint.pformat(affected))
+    return get_unix_shell_eval(make_env_for_topics(affected))
+
+
+def test_get_affected_topics():
+    affected_topics = get_affected_topics(['cpp/CMakeLists.txt'])
+    assert affected_topics == {
+        'c_glib': True,
+        'cpp': True,
+        'docs': False,
+        'go': False,
+        'java': False,
+        'js': False,
+        'python': True,
+        'r': True,
+        'ruby': True,
+        'rust': False,
+        'csharp': False,
+        'integration': True,
+        'dev': False
+    }
+
+    affected_topics = get_affected_topics(['format/Schema.fbs'])
+    assert affected_topics == {
+        'c_glib': True,
+        'cpp': True,
+        'docs': True,
+        'go': True,
+        'java': True,
+        'js': True,
+        'python': True,
+        'r': True,
+        'ruby': True,
+        'rust': True,
+        'csharp': True,
+        'integration': True,
+        'dev': False
+    }
+
+    affected_topics = get_affected_topics(['.github/workflows'])
+    assert affected_topics == {
+        'c_glib': True,
+        'cpp': True,
+        'docs': True,
+        'go': True,
+        'java': True,
+        'js': True,
+        'python': True,
+        'r': True,
+        'ruby': True,
+        'rust': True,
+        'csharp': True,
+        'integration': True,
+        'dev': True,
+    }
+
+
+if __name__ == "__main__":
+    # This script should have its output evaluated by a shell,
+    # e.g. "eval `python ci/detect-changes.py`"
+    if os.environ.get('TRAVIS'):
+        try:
+            print(run_from_travis())
+        except Exception:
+            # Make sure the enclosing eval will return an error
+            print("exit 1")
+            raise
+    elif os.environ.get('APPVEYOR'):
+        try:
+            print(run_from_appveyor())
+        except Exception:
+            print("exit 1")
+            raise
+    elif os.environ.get('GITHUB_WORKFLOW'):
+        try:
+            print(run_from_github())
+        except Exception:
+            print("exit 1")
+            raise
+    else:
+        sys.exit("Script must be run under Travis-CI, AppVeyor or GitHub Actions")