You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by zh...@apache.org on 2021/10/09 22:04:46 UTC

[incubator-mxnet] branch master updated: [Feature][Master] Clang-format tool to perform additional formatting and semantic checking of code. (#20433)

This is an automated email from the ASF dual-hosted git repository.

zhasheng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new 4b73646  [Feature][Master] Clang-format tool to perform additional formatting and semantic checking of code.  (#20433)
4b73646 is described below

commit 4b73646586c128a11345b4f3e1811cc0f3f1bf7d
Author: mozga <ma...@intel.com>
AuthorDate: Sun Oct 10 00:02:42 2021 +0200

    [Feature][Master] Clang-format tool to perform additional formatting and semantic checking of code.  (#20433)
    
    * Clang-format hook
    
    * Added: workflow file
    
    * tools/lint/clang_format_ci.sh was added
    
    * Permision was set on +x
    
    * Jenkins clang-format runner
    
    * Update runtime_funciton.sh file
    
    * Master last commit sha
    
    * Set BASE_SHA in greetings
    
    * GITHUB_BASE_REF and GITHUB_RUN_ID: set varaibles
    
    * Runtime function, os_x_static_build stores configuration
    
    * Greetings contains env variables
    
    * Check env params
    
    * White space between breackets
    
    * Show all refs
    
    * Correct refs to master
    
    * End up if []
    
    * Git clang format rat-excludes
    
    * LICENCE file was modifed to exclude clang-format-13 file
    
    * Greetings, update env
    
    * Remove unnecessary condition
    
    * Update LICENSE
    
    * Fix licence checker
    
    * Clang-format file update
    
    * Error message is shifted
    
    Co-authored-by: Sheng Zha <sz...@users.noreply.github.com>
---
 .clang-format                   |  40 +++
 .github/workflows/greetings.yml |   4 +
 LICENSE                         |   7 +
 ci/docker/runtime_functions.sh  |  38 +++
 rat-excludes                    |   3 +
 tools/git-pre-commit            |  22 ++
 tools/license_header.py         |   3 +
 tools/lint/clang_format_ci.sh   |  20 ++
 tools/lint/git-clang-format-13  | 586 ++++++++++++++++++++++++++++++++++++++++
 9 files changed, 723 insertions(+)

diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0000000..47a12e6
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,40 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+---
+Language: Cpp
+BasedOnStyle: Google
+ColumnLimit: 100
+AlignConsecutiveAssignments: true
+AlignConsecutiveDeclarations: false
+AlignConsecutiveMacros: true
+DerivePointerAlignment: false
+SortIncludes: true
+MaxEmptyLinesToKeep: 1
+PointerAlignment: Left
+AllowAllParametersOfDeclarationOnNextLine: false
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: Empty
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: true
+AlwaysBreakTemplateDeclarations: true
+BinPackArguments: false
+BinPackParameters: false
+SortIncludes: false
diff --git a/.github/workflows/greetings.yml b/.github/workflows/greetings.yml
index a9db175..cdf5acc 100644
--- a/.github/workflows/greetings.yml
+++ b/.github/workflows/greetings.yml
@@ -7,6 +7,10 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/first-interaction@v1
+      env:
+        GITHUB_PR_BASE_SHA: ${{ github.event.pull_request.base.sha }}
+        GITHUB_PR_RUN_ID: ${{ github.run_id }}
+        GITHUB_PR_BASE_REF: ${{ github.event.pull_request.base.ref }} 
       with:
         repo-token: ${{ secrets.GITHUB_TOKEN }}
         issue-message: |
diff --git a/LICENSE b/LICENSE
index 795130c..80abbe1 100644
--- a/LICENSE
+++ b/LICENSE
@@ -370,3 +370,10 @@
     =======================================================================================
 
     docs/python_docs/themes/mx-theme/mxtheme/static/webfonts/ (Copy of the License available at licenses/OFL1_1)
+    
+    =======================================================================================
+    Apache-2.0 WITH LLVM-exception
+    =======================================================================================
+
+    tools/lint/git-clang-format-13
+    
diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh
index 683954b..be0f1ca 100755
--- a/ci/docker/runtime_functions.sh
+++ b/ci/docker/runtime_functions.sh
@@ -701,6 +701,7 @@ build_ubuntu_gpu_large_tensor() {
 
 sanity_check() {
     set -ex
+    sanity_clang
     sanity_license
     sanity_python
     sanity_cpp
@@ -716,6 +717,43 @@ sanity_cpp() {
     3rdparty/dmlc-core/scripts/lint.py mxnet cpp include src plugin cpp-package tests --exclude_path src/operator/contrib/ctc_include include/onednn
 }
 
+sanity_clang() {
+    set -ex
+    # .github/workgflows/greetings.yml passes BASE_SHA, GITHUB_RUN_ID, GITHUB_BASE_REF for pull requests.
+    BASE_SHA="${GITHUB_PR_BASE_SHA}"
+    GITHUB_RUN_ID="${GITHUB_PR_RUN_ID}"
+    GITHUB_BASE_REF="${GITHUB_PR_BASE_REF}"
+
+    if [ "${BASE_SHA}" == "" ]; then
+        BASE_SHA=`git show-ref --hash refs/remotes/origin/master`
+        if [ "${GITHUB_RUN_ID}" == "" ] || [ "${GITHUB_BASE_REF}" == "" ]; then
+             GITHUB_RUN_ID=`(git log --pretty=format:'%h' -n 1)`
+             GITHUB_BASE_REF="master"
+        fi
+    fi
+
+    git remote add "${GITHUB_RUN_ID}" https://github.com/apache/incubator-mxnet.git
+    git fetch "${GITHUB_RUN_ID}" "$GITHUB_BASE_REF"
+    
+    tools/lint/clang_format_ci.sh "${BASE_SHA}"
+    GIT_DIFFERENCE=$(git diff)
+    if [[ -z $GIT_DIFFERENCE ]]; then
+        git remote remove "${GITHUB_RUN_ID}" # temporary remote is removed
+        exit 0
+    fi
+    
+    echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
+    echo "| clang-format failures found! Run: "
+    echo "|    tool/lint/clang_format_ci.sh ${BASE_SHA} "
+    echo "| to fix this error. "
+    echo "| For more info, see: "
+    echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
+    
+    echo "$GIT_DIFFERENCE"
+    git remote remove "${GITHUB_RUN_ID}" # temporary remote is removed
+    exit 1
+}
+
 sanity_python() {
     set -ex
     export DMLC_LOG_STACK_TRACE_DEPTH=100
diff --git a/rat-excludes b/rat-excludes
index 83e2e0c..2392ff0 100644
--- a/rat-excludes
+++ b/rat-excludes
@@ -72,6 +72,9 @@ DartConfiguration.tcl
 .*\.egg-info
 .*\.t
 
+# SPDX-License-Identifier: git-clang-format-13
+git-clang-format-13
+
 # Files generated by Cython
 core.cpp
 symbol.cpp
diff --git a/tools/git-pre-commit b/tools/git-pre-commit
new file mode 100644
index 0000000..2d69788
--- /dev/null
+++ b/tools/git-pre-commit
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#!/bin/bash
+set -e
+
+echo "Running pre-commit clang-format"
+tools/lint/git-clang-format HEAD~ --force
diff --git a/tools/license_header.py b/tools/license_header.py
index 4475171..743b29c 100755
--- a/tools/license_header.py
+++ b/tools/license_header.py
@@ -77,6 +77,9 @@ _WHITE_LIST = [
                '3rdparty/ps-lite',
                '3rdparty/tvm',
 
+               # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+               'tools/lint/git-clang-format-13',
+               
                # 3rdparty headerfiles under different licenses
                'include/onednn',
 
diff --git a/tools/lint/clang_format_ci.sh b/tools/lint/clang_format_ci.sh
new file mode 100755
index 0000000..41823b4
--- /dev/null
+++ b/tools/lint/clang_format_ci.sh
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#!/bin/sh
+set -eux 
+    tools/lint/git-clang-format-13 --verbose "$1" --
diff --git a/tools/lint/git-clang-format-13 b/tools/lint/git-clang-format-13
new file mode 100755
index 0000000..cfcfb9f
--- /dev/null
+++ b/tools/lint/git-clang-format-13
@@ -0,0 +1,586 @@
+#!/usr/bin/env python3
+#
+#===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===#
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#===------------------------------------------------------------------------===#
+
+r"""
+clang-format git integration
+============================
+
+This file provides a clang-format integration for git. Put it somewhere in your
+path and ensure that it is executable. Then, "git clang-format" will invoke
+clang-format on the changes in current files or a specific commit.
+
+For further details, run:
+git clang-format -h
+
+Requires Python 2.7 or Python 3
+"""
+
+from __future__ import absolute_import, division, print_function
+import argparse
+import collections
+import contextlib
+import errno
+import os
+import re
+import subprocess
+import sys
+
+usage = 'git clang-format [OPTIONS] [<commit>] [<commit>] [--] [<file>...]'
+
+desc = '''
+If zero or one commits are given, run clang-format on all lines that differ
+between the working directory and <commit>, which defaults to HEAD.  Changes are
+only applied to the working directory.
+
+If two commits are given (requires --diff), run clang-format on all lines in the
+second <commit> that differ from the first <commit>.
+
+The following git-config settings set the default of the corresponding option:
+  clangFormat.binary
+  clangFormat.commit
+  clangFormat.extensions
+  clangFormat.style
+'''
+
+# Name of the temporary index file in which save the output of clang-format.
+# This file is created within the .git directory.
+temp_index_basename = 'clang-format-index'
+
+
+Range = collections.namedtuple('Range', 'start, count')
+
+
+def main():
+  config = load_git_config()
+
+  # In order to keep '--' yet allow options after positionals, we need to
+  # check for '--' ourselves.  (Setting nargs='*' throws away the '--', while
+  # nargs=argparse.REMAINDER disallows options after positionals.)
+  argv = sys.argv[1:]
+  try:
+    idx = argv.index('--')
+  except ValueError:
+    dash_dash = []
+  else:
+    dash_dash = argv[idx:]
+    argv = argv[:idx]
+
+  default_extensions = ','.join([
+      # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
+      'c', 'h',  # C
+      'm',  # ObjC
+      'mm',  # ObjC++
+      'cc', 'cp', 'cpp', 'c++', 'cxx', 'hh', 'hpp', 'hxx',  # C++
+      'cu', 'cuh',  # CUDA
+      # Other languages that clang-format supports
+      'proto', 'protodevel',  # Protocol Buffers
+      'java',  # Java
+      'js',  # JavaScript
+      'ts',  # TypeScript
+      'cs',  # C Sharp
+      ])
+
+  p = argparse.ArgumentParser(
+    usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter,
+    description=desc)
+  p.add_argument('--binary',
+                 default=config.get('clangformat.binary', 'clang-format'),
+                 help='path to clang-format'),
+  p.add_argument('--commit',
+                 default=config.get('clangformat.commit', 'HEAD'),
+                 help='default commit to use if none is specified'),
+  p.add_argument('--diff', action='store_true',
+                 help='print a diff instead of applying the changes')
+  p.add_argument('--extensions',
+                 default=config.get('clangformat.extensions',
+                                    default_extensions),
+                 help=('comma-separated list of file extensions to format, '
+                       'excluding the period and case-insensitive')),
+  p.add_argument('-f', '--force', action='store_true',
+                 help='allow changes to unstaged files')
+  p.add_argument('-p', '--patch', action='store_true',
+                 help='select hunks interactively')
+  p.add_argument('-q', '--quiet', action='count', default=0,
+                 help='print less information')
+  p.add_argument('--style',
+                 default=config.get('clangformat.style', None),
+                 help='passed to clang-format'),
+  p.add_argument('-v', '--verbose', action='count', default=0,
+                 help='print extra information')
+  # We gather all the remaining positional arguments into 'args' since we need
+  # to use some heuristics to determine whether or not <commit> was present.
+  # However, to print pretty messages, we make use of metavar and help.
+  p.add_argument('args', nargs='*', metavar='<commit>',
+                 help='revision from which to compute the diff')
+  p.add_argument('ignored', nargs='*', metavar='<file>...',
+                 help='if specified, only consider differences in these files')
+  opts = p.parse_args(argv)
+
+  opts.verbose -= opts.quiet
+  del opts.quiet
+
+  commits, files = interpret_args(opts.args, dash_dash, opts.commit)
+  if len(commits) > 1:
+    if not opts.diff:
+      die('--diff is required when two commits are given')
+  else:
+    if len(commits) > 2:
+      die('at most two commits allowed; %d given' % len(commits))
+  changed_lines = compute_diff_and_extract_lines(commits, files)
+  if opts.verbose >= 1:
+    ignored_files = set(changed_lines)
+  filter_by_extension(changed_lines, opts.extensions.lower().split(','))
+  if opts.verbose >= 1:
+    ignored_files.difference_update(changed_lines)
+    if ignored_files:
+      print('Ignoring changes in the following files (wrong extension):')
+      for filename in ignored_files:
+        print('    %s' % filename)
+    if changed_lines:
+      print('Running clang-format on the following files:')
+      for filename in changed_lines:
+        print('    %s' % filename)
+  if not changed_lines:
+    if opts.verbose >= 0:
+      print('no modified files to format')
+    return
+  # The computed diff outputs absolute paths, so we must cd before accessing
+  # those files.
+  cd_to_toplevel()
+  if len(commits) > 1:
+    old_tree = commits[1]
+    new_tree = run_clang_format_and_save_to_tree(changed_lines,
+                                                 revision=commits[1],
+                                                 binary=opts.binary,
+                                                 style=opts.style)
+  else:
+    old_tree = create_tree_from_workdir(changed_lines)
+    new_tree = run_clang_format_and_save_to_tree(changed_lines,
+                                                 binary=opts.binary,
+                                                 style=opts.style)
+  if opts.verbose >= 1:
+    print('old tree: %s' % old_tree)
+    print('new tree: %s' % new_tree)
+  if old_tree == new_tree:
+    if opts.verbose >= 0:
+      print('clang-format did not modify any files')
+  elif opts.diff:
+    print_diff(old_tree, new_tree)
+  else:
+    changed_files = apply_changes(old_tree, new_tree, force=opts.force,
+                                  patch_mode=opts.patch)
+    if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
+      print('changed files:')
+      for filename in changed_files:
+        print('    %s' % filename)
+
+
+def load_git_config(non_string_options=None):
+  """Return the git configuration as a dictionary.
+
+  All options are assumed to be strings unless in `non_string_options`, in which
+  is a dictionary mapping option name (in lower case) to either "--bool" or
+  "--int"."""
+  if non_string_options is None:
+    non_string_options = {}
+  out = {}
+  for entry in run('git', 'config', '--list', '--null').split('\0'):
+    if entry:
+      if '\n' in entry:
+        name, value = entry.split('\n', 1)
+      else:
+        # A setting with no '=' ('\n' with --null) is implicitly 'true'
+        name = entry
+        value = 'true'
+      if name in non_string_options:
+        value = run('git', 'config', non_string_options[name], name)
+      out[name] = value
+  return out
+
+
+def interpret_args(args, dash_dash, default_commit):
+  """Interpret `args` as "[commits] [--] [files]" and return (commits, files).
+
+  It is assumed that "--" and everything that follows has been removed from
+  args and placed in `dash_dash`.
+
+  If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its
+  left (if present) are taken as commits.  Otherwise, the arguments are checked
+  from left to right if they are commits or files.  If commits are not given,
+  a list with `default_commit` is used."""
+  if dash_dash:
+    if len(args) == 0:
+      commits = [default_commit]
+    else:
+      commits = args
+    for commit in commits:
+      object_type = get_object_type(commit)
+      if object_type not in ('commit', 'tag'):
+        if object_type is None:
+          die("'%s' is not a commit" % commit)
+        else:
+          die("'%s' is a %s, but a commit was expected" % (commit, object_type))
+    files = dash_dash[1:]
+  elif args:
+    commits = []
+    while args:
+      if not disambiguate_revision(args[0]):
+        break
+      commits.append(args.pop(0))
+    if not commits:
+      commits = [default_commit]
+    files = args
+  else:
+    commits = [default_commit]
+    files = []
+  return commits, files
+
+
+def disambiguate_revision(value):
+  """Returns True if `value` is a revision, False if it is a file, or dies."""
+  # If `value` is ambiguous (neither a commit nor a file), the following
+  # command will die with an appropriate error message.
+  run('git', 'rev-parse', value, verbose=False)
+  object_type = get_object_type(value)
+  if object_type is None:
+    return False
+  if object_type in ('commit', 'tag'):
+    return True
+  die('`%s` is a %s, but a commit or filename was expected' %
+      (value, object_type))
+
+
+def get_object_type(value):
+  """Returns a string description of an object's type, or None if it is not
+  a valid git object."""
+  cmd = ['git', 'cat-file', '-t', value]
+  p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+  stdout, stderr = p.communicate()
+  if p.returncode != 0:
+    return None
+  return convert_string(stdout.strip())
+
+
+def compute_diff_and_extract_lines(commits, files):
+  """Calls compute_diff() followed by extract_lines()."""
+  diff_process = compute_diff(commits, files)
+  changed_lines = extract_lines(diff_process.stdout)
+  diff_process.stdout.close()
+  diff_process.wait()
+  if diff_process.returncode != 0:
+    # Assume error was already printed to stderr.
+    sys.exit(2)
+  return changed_lines
+
+
+def compute_diff(commits, files):
+  """Return a subprocess object producing the diff from `commits`.
+
+  The return value's `stdin` file object will produce a patch with the
+  differences between the working directory and the first commit if a single
+  one was specified, or the difference between both specified commits, filtered
+  on `files` (if non-empty).  Zero context lines are used in the patch."""
+  git_tool = 'diff-index'
+  if len(commits) > 1:
+    git_tool = 'diff-tree'
+  cmd = ['git', git_tool, '-p', '-U0'] + commits + ['--']
+  cmd.extend(files)
+  p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+  p.stdin.close()
+  return p
+
+
+def extract_lines(patch_file):
+  """Extract the changed lines in `patch_file`.
+
+  The return value is a dictionary mapping filename to a list of (start_line,
+  line_count) pairs.
+
+  The input must have been produced with ``-U0``, meaning unidiff format with
+  zero lines of context.  The return value is a dict mapping filename to a
+  list of line `Range`s."""
+  matches = {}
+  for line in patch_file:
+    line = convert_string(line)
+    match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
+    if match:
+      filename = match.group(1).rstrip('\r\n')
+    match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line)
+    if match:
+      start_line = int(match.group(1))
+      line_count = 1
+      if match.group(3):
+        line_count = int(match.group(3))
+      if line_count > 0:
+        matches.setdefault(filename, []).append(Range(start_line, line_count))
+  return matches
+
+
+def filter_by_extension(dictionary, allowed_extensions):
+  """Delete every key in `dictionary` that doesn't have an allowed extension.
+
+  `allowed_extensions` must be a collection of lowercase file extensions,
+  excluding the period."""
+  allowed_extensions = frozenset(allowed_extensions)
+  for filename in list(dictionary.keys()):
+    base_ext = filename.rsplit('.', 1)
+    if len(base_ext) == 1 and '' in allowed_extensions:
+        continue
+    if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions:
+      del dictionary[filename]
+
+
+def cd_to_toplevel():
+  """Change to the top level of the git repository."""
+  toplevel = run('git', 'rev-parse', '--show-toplevel')
+  os.chdir(toplevel)
+
+
+def create_tree_from_workdir(filenames):
+  """Create a new git tree with the given files from the working directory.
+
+  Returns the object ID (SHA-1) of the created tree."""
+  return create_tree(filenames, '--stdin')
+
+
+def run_clang_format_and_save_to_tree(changed_lines, revision=None,
+                                      binary='clang-format', style=None):
+  """Run clang-format on each file and save the result to a git tree.
+
+  Returns the object ID (SHA-1) of the created tree."""
+  def iteritems(container):
+      try:
+          return container.iteritems() # Python 2
+      except AttributeError:
+          return container.items() # Python 3
+  def index_info_generator():
+    for filename, line_ranges in iteritems(changed_lines):
+      if revision:
+        git_metadata_cmd = ['git', 'ls-tree',
+                            '%s:%s' % (revision, os.path.dirname(filename)),
+                            os.path.basename(filename)]
+        git_metadata = subprocess.Popen(git_metadata_cmd, stdin=subprocess.PIPE,
+                                        stdout=subprocess.PIPE)
+        stdout = git_metadata.communicate()[0]
+        mode = oct(int(stdout.split()[0], 8))
+      else:
+        mode = oct(os.stat(filename).st_mode)
+      # Adjust python3 octal format so that it matches what git expects
+      if mode.startswith('0o'):
+          mode = '0' + mode[2:]
+      blob_id = clang_format_to_blob(filename, line_ranges,
+                                     revision=revision,
+                                     binary=binary,
+                                     style=style)
+      yield '%s %s\t%s' % (mode, blob_id, filename)
+  return create_tree(index_info_generator(), '--index-info')
+
+
+def create_tree(input_lines, mode):
+  """Create a tree object from the given input.
+
+  If mode is '--stdin', it must be a list of filenames.  If mode is
+  '--index-info' is must be a list of values suitable for "git update-index
+  --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>".  Any other mode
+  is invalid."""
+  assert mode in ('--stdin', '--index-info')
+  cmd = ['git', 'update-index', '--add', '-z', mode]
+  with temporary_index_file():
+    p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
+    for line in input_lines:
+      p.stdin.write(to_bytes('%s\0' % line))
+    p.stdin.close()
+    if p.wait() != 0:
+      die('`%s` failed' % ' '.join(cmd))
+    tree_id = run('git', 'write-tree')
+    return tree_id
+
+
+def clang_format_to_blob(filename, line_ranges, revision=None,
+                         binary='clang-format', style=None):
+  """Run clang-format on the given file and save the result to a git blob.
+
+  Runs on the file in `revision` if not None, or on the file in the working
+  directory if `revision` is None.
+
+  Returns the object ID (SHA-1) of the created blob."""
+  clang_format_cmd = [binary]
+  if style:
+    clang_format_cmd.extend(['-style='+style])
+  clang_format_cmd.extend([
+      '-lines=%s:%s' % (start_line, start_line+line_count-1)
+      for start_line, line_count in line_ranges])
+  if revision:
+    clang_format_cmd.extend(['-assume-filename='+filename])
+    git_show_cmd = ['git', 'cat-file', 'blob', '%s:%s' % (revision, filename)]
+    git_show = subprocess.Popen(git_show_cmd, stdin=subprocess.PIPE,
+                                stdout=subprocess.PIPE)
+    git_show.stdin.close()
+    clang_format_stdin = git_show.stdout
+  else:
+    clang_format_cmd.extend([filename])
+    git_show = None
+    clang_format_stdin = subprocess.PIPE
+  try:
+    clang_format = subprocess.Popen(clang_format_cmd, stdin=clang_format_stdin,
+                                    stdout=subprocess.PIPE)
+    if clang_format_stdin == subprocess.PIPE:
+      clang_format_stdin = clang_format.stdin
+  except OSError as e:
+    if e.errno == errno.ENOENT:
+      die('cannot find executable "%s"' % binary)
+    else:
+      raise
+  clang_format_stdin.close()
+  hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin']
+  hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout,
+                                 stdout=subprocess.PIPE)
+  clang_format.stdout.close()
+  stdout = hash_object.communicate()[0]
+  if hash_object.returncode != 0:
+    die('`%s` failed' % ' '.join(hash_object_cmd))
+  if clang_format.wait() != 0:
+    die('`%s` failed' % ' '.join(clang_format_cmd))
+  if git_show and git_show.wait() != 0:
+    die('`%s` failed' % ' '.join(git_show_cmd))
+  return convert_string(stdout).rstrip('\r\n')
+
+
+@contextlib.contextmanager
+def temporary_index_file(tree=None):
+  """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting
+  the file afterward."""
+  index_path = create_temporary_index(tree)
+  old_index_path = os.environ.get('GIT_INDEX_FILE')
+  os.environ['GIT_INDEX_FILE'] = index_path
+  try:
+    yield
+  finally:
+    if old_index_path is None:
+      del os.environ['GIT_INDEX_FILE']
+    else:
+      os.environ['GIT_INDEX_FILE'] = old_index_path
+    os.remove(index_path)
+
+
+def create_temporary_index(tree=None):
+  """Create a temporary index file and return the created file's path.
+
+  If `tree` is not None, use that as the tree to read in.  Otherwise, an
+  empty index is created."""
+  gitdir = run('git', 'rev-parse', '--git-dir')
+  path = os.path.join(gitdir, temp_index_basename)
+  if tree is None:
+    tree = '--empty'
+  run('git', 'read-tree', '--index-output='+path, tree)
+  return path
+
+
+def print_diff(old_tree, new_tree):
+  """Print the diff between the two trees to stdout."""
+  # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
+  # is expected to be viewed by the user, and only the former does nice things
+  # like color and pagination.
+  #
+  # We also only print modified files since `new_tree` only contains the files
+  # that were modified, so unmodified files would show as deleted without the
+  # filter.
+  subprocess.check_call(['git', 'diff', '--diff-filter=M', old_tree, new_tree,
+                         '--'])
+
+
+def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
+  """Apply the changes in `new_tree` to the working directory.
+
+  Bails if there are local changes in those files and not `force`.  If
+  `patch_mode`, runs `git checkout --patch` to select hunks interactively."""
+  changed_files = run('git', 'diff-tree', '--diff-filter=M', '-r', '-z',
+                      '--name-only', old_tree,
+                      new_tree).rstrip('\0').split('\0')
+  if not force:
+    unstaged_files = run('git', 'diff-files', '--name-status', *changed_files)
+    if unstaged_files:
+      print('The following files would be modified but '
+                'have unstaged changes:', file=sys.stderr)
+      print(unstaged_files, file=sys.stderr)
+      print('Please commit, stage, or stash them first.', file=sys.stderr)
+      sys.exit(2)
+  if patch_mode:
+    # In patch mode, we could just as well create an index from the new tree
+    # and checkout from that, but then the user will be presented with a
+    # message saying "Discard ... from worktree".  Instead, we use the old
+    # tree as the index and checkout from new_tree, which gives the slightly
+    # better message, "Apply ... to index and worktree".  This is not quite
+    # right, since it won't be applied to the user's index, but oh well.
+    with temporary_index_file(old_tree):
+      subprocess.check_call(['git', 'checkout', '--patch', new_tree])
+    index_tree = old_tree
+  else:
+    with temporary_index_file(new_tree):
+      run('git', 'checkout-index', '-a', '-f')
+  return changed_files
+
+
+def run(*args, **kwargs):
+  stdin = kwargs.pop('stdin', '')
+  verbose = kwargs.pop('verbose', True)
+  strip = kwargs.pop('strip', True)
+  for name in kwargs:
+    raise TypeError("run() got an unexpected keyword argument '%s'" % name)
+  p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+                       stdin=subprocess.PIPE)
+  stdout, stderr = p.communicate(input=stdin)
+
+  stdout = convert_string(stdout)
+  stderr = convert_string(stderr)
+
+  if p.returncode == 0:
+    if stderr:
+      if verbose:
+        print('`%s` printed to stderr:' % ' '.join(args), file=sys.stderr)
+      print(stderr.rstrip(), file=sys.stderr)
+    if strip:
+      stdout = stdout.rstrip('\r\n')
+    return stdout
+  if verbose:
+    print('`%s` returned %s' % (' '.join(args), p.returncode), file=sys.stderr)
+  if stderr:
+    print(stderr.rstrip(), file=sys.stderr)
+  sys.exit(2)
+
+
+def die(message):
+  print('error:', message, file=sys.stderr)
+  sys.exit(2)
+
+
+def to_bytes(str_input):
+    # Encode to UTF-8 to get binary data.
+    if isinstance(str_input, bytes):
+        return str_input
+    return str_input.encode('utf-8')
+
+
+def to_string(bytes_input):
+    if isinstance(bytes_input, str):
+        return bytes_input
+    return bytes_input.encode('utf-8')
+
+
+def convert_string(bytes_input):
+    try:
+        return to_string(bytes_input.decode('utf-8'))
+    except AttributeError: # 'str' object has no attribute 'decode'.
+        return str(bytes_input)
+    except UnicodeError:
+        return str(bytes_input)
+
+if __name__ == '__main__':
+  main()