You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by uw...@apache.org on 2018/05/01 13:34:24 UTC
[arrow] branch master updated: ARROW-2485: Re-write of run_clang_format.py, such that it outputs the diffs of th…
This is an automated email from the ASF dual-hosted git repository.
uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new f056ef0 ARROW-2485: Re-write of run_clang_format.py, such that it outputs the diffs of th…
f056ef0 is described below
commit f056ef0ceb83ede2d23854fdae4aedc16289c61f
Author: Joshua Storck <jo...@twosigma.com>
AuthorDate: Tue May 1 15:34:16 2018 +0200
ARROW-2485: Re-write of run_clang_format.py, such that it outputs the diffs of th…
…e original file with the formatted file so that it's easier to visualize what's wrong in the formatting
Author: Joshua Storck <jo...@twosigma.com>
Closes #1918 from joshuastorck/run_clang_format_rewrite and squashes the following commits:
7e8e48bf <Joshua Storck> Handling subprocess.check_output output in a uniform manner between Python 2 and Python 3. Also changing the code that outputs the 'Formatting {}' string to a join/map call instead of a loop
ca4bd571 <Joshua Storck> Re-write of run_clang_format.py, such that it outputs the diffs of the original file with the formatted file so that it's easier to visualize what's wrong in the formatting
---
cpp/CMakeLists.txt | 4 +-
cpp/build-support/run_clang_format.py | 135 +++++++++++++++++++++-------------
2 files changed, 85 insertions(+), 54 deletions(-)
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index b913685..be940ea 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -488,7 +488,7 @@ endif (UNIX)
add_custom_target(format ${BUILD_SUPPORT_DIR}/run_clang_format.py
${CLANG_FORMAT_BIN}
${BUILD_SUPPORT_DIR}/clang_format_exclusions.txt
- ${CMAKE_CURRENT_SOURCE_DIR}/src)
+ ${CMAKE_CURRENT_SOURCE_DIR}/src --fix)
# runs clang format and exits with a non-zero exit code if any files need to be reformatted
@@ -496,7 +496,7 @@ add_custom_target(format ${BUILD_SUPPORT_DIR}/run_clang_format.py
add_custom_target(check-format ${BUILD_SUPPORT_DIR}/run_clang_format.py
${CLANG_FORMAT_BIN}
${BUILD_SUPPORT_DIR}/clang_format_exclusions.txt
- ${CMAKE_CURRENT_SOURCE_DIR}/src 1)
+ ${CMAKE_CURRENT_SOURCE_DIR}/src)
############################################################
# "make clang-tidy" and "make check-clang-tidy" targets
diff --git a/cpp/build-support/run_clang_format.py b/cpp/build-support/run_clang_format.py
index 6dec34b..229fa1e 100755
--- a/cpp/build-support/run_clang_format.py
+++ b/cpp/build-support/run_clang_format.py
@@ -16,63 +16,94 @@
# specific language governing permissions and limitations
# under the License.
+import argparse
+import codecs
+import difflib
import fnmatch
import os
import subprocess
import sys
-if len(sys.argv) < 4:
- sys.stderr.write("Usage: %s $CLANG_FORMAT_VERSION exclude_globs.txt "
- "$source_dir\n" %
- sys.argv[0])
- sys.exit(1)
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ description="Runs clang format on all of the source "
+ "files. If --fix is specified, and compares the output "
+ "with the existing file, outputting a unifiied diff if "
+ "there are any necessary changes")
+ parser.add_argument("clang_format_binary",
+ help="Path to the clang-format binary")
+ parser.add_argument("exclude_globs",
+ help="Filename containing globs for files "
+ "that should be excluded from the checks")
+ parser.add_argument("source_dir",
+ help="Root directory of the source code")
+ parser.add_argument("--fix", default=False,
+ action="store_true",
+ help="If specified, will re-format the source "
+ "code instead of comparing the re-formatted "
+ "output, defaults to %(default)s")
-CLANG_FORMAT = sys.argv[1]
-EXCLUDE_GLOBS_FILENAME = sys.argv[2]
-SOURCE_DIR = sys.argv[3]
+ arguments = parser.parse_args()
-if len(sys.argv) > 4:
- CHECK_FORMAT = int(sys.argv[4]) == 1
-else:
- CHECK_FORMAT = False
+ formatted_filenames = []
+ exclude_globs = [line.strip() for line in open(arguments.exclude_globs)]
+ for directory, subdirs, filenames in os.walk(arguments.source_dir):
+ fullpaths = (os.path.join(directory, filename) for filename in filenames)
+ source_files = filter(lambda x: x.endswith(".h") or x.endswith(".cc"), fullpaths)
+ formatted_filenames.extend(
+ # Filter out files that match the globs in the globs file
+ [filename for filename in source_files
+ if not any((fnmatch.fnmatch(filename, exclude_glob)
+ for exclude_glob in exclude_globs))])
+
+ error = False
+ if arguments.fix:
+ # Print out each file on its own line, but run
+ # clang format once for all of the files
+ print("\n".join(map(lambda x: "Formatting {}".format(x),
+ formatted_filenames)))
+ subprocess.check_call([arguments.clang_format_binary,
+ "-i"] + formatted_filenames)
+ else:
+ for filename in formatted_filenames:
+ print("Checking {}".format(filename))
+ #
+ # Due to some incompatibilities between Python 2 and
+ # Python 3, there are some specific actions we take here
+ # to make sure the difflib.unified_diff call works.
+ #
+ # In Python 2, the call to subprocess.check_output return
+ # a 'str' type. In Python 3, however, the call returns a
+ # 'bytes' type unless the 'encoding' argument is
+ # specified. Unfortunately, the 'encoding' argument is not
+ # in the Python 2 API. We could do an if/else here based
+ # on the version of Python we are running, but it's more
+ # straightforward to read the file in binary and do utf-8
+ # conversion. In Python 2, it's just converting string
+ # types to unicode types, whereas in Python 3 it's
+ # converting bytes types to utf-8 encoded str types. This
+ # approach ensures that the arguments to
+ # difflib.unified_diff are acceptable string types in both
+ # Python 2 and Python 3.
+ with open(filename, "rb") as reader:
+ # Run clang-format and capture its output
+ formatted = subprocess.check_output(
+ [arguments.clang_format_binary,
+ filename])
+ formatted = codecs.decode(formatted, "utf-8")
+ # Read the original file
+ original = codecs.decode(reader.read(), "utf-8")
+ # Run the equivalent of diff -u
+ diff = list(difflib.unified_diff(
+ original.splitlines(True),
+ formatted.splitlines(True),
+ fromfile=filename,
+ tofile="{} (after clang format)".format(
+ filename)))
+ if diff:
+ # Print out the diff to stderr
+ error = True
+ print(",".join(map(str, map(type, diff))))
+ sys.stderr.writelines(diff)
-
-exclude_globs = [line.strip() for line in open(EXCLUDE_GLOBS_FILENAME, "r")]
-
-files_to_format = []
-matches = []
-for directory, subdirs, files in os.walk(SOURCE_DIR):
- for name in files:
- name = os.path.join(directory, name)
- if not (name.endswith('.h') or name.endswith('.cc')):
- continue
-
- excluded = False
- for g in exclude_globs:
- if fnmatch.fnmatch(name, g):
- excluded = True
- break
- if not excluded:
- files_to_format.append(name)
-
-if CHECK_FORMAT:
- output = subprocess.check_output([CLANG_FORMAT, '-output-replacements-xml']
- + files_to_format,
- stderr=subprocess.STDOUT).decode('utf8')
-
- to_fix = []
- for line in output.split('\n'):
- if 'offset' in line:
- to_fix.append(line)
-
- if len(to_fix) > 0:
- print("clang-format checks failed, run 'make format' to fix")
- sys.exit(-1)
-else:
- try:
- cmd = [CLANG_FORMAT, '-i'] + files_to_format
- subprocess.check_output(cmd, stderr=subprocess.STDOUT)
- except Exception as e:
- print(e)
- print(' '.join(cmd))
- raise
+ sys.exit(1 if error else 0)
--
To stop receiving notification emails like this one, please contact
uwe@apache.org.