You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2016/03/22 16:51:30 UTC

arrow git commit: ARROW-28: Adding google's benchmark library to the toolchain

Repository: arrow
Updated Branches:
  refs/heads/master 016b92bcc -> 4ec034bbe


ARROW-28: Adding google's benchmark library to the toolchain

This isn't yet complete, but before I go further I think its worth asking some questions on peoples' preferences:

1.  It seems that the build third-party script is setting up an install directory that it is not making use of.   Do we want to keep this functionality and start adding new libraries to be placed there?  The gtest component of the tool-chain assumes it is in its own location, and this how I patterned google benchmark integration.

2.  Do we want to couple unit test builds with benchmark builds?  I am currently aiming for having them decoupled and having benchmarks off by default.

3.  I am not familiar with the Darwin/mac build environment and it is not clear if the CXX flags are required universally.  (I need to fix it anyways to move -DGTEST_USE_OWN_TR1_TUPLE=1 back to be gtest only).  Travis-ci might provide the answer.

4.  Any other basic features in the benchmark toolchain people would like to see as part of this PR?  Wes mentioned starting to create benchmarking tools lib, but I think that likely belongs in a separate PR.

Author: Micah Kornfield <em...@gmail.com>

Closes #29 from emkornfield/emk_add_benchmark and squashes the following commits:

dbd4e71 [Micah Kornfield] only run unittests is travis
ab21150 [Micah Kornfield] Enable benchmarks in cpp toolchain
40847ee [Micah Kornfield] WIP-Adding google's benchmark library to the toolchain


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/4ec034bb
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/4ec034bb
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/4ec034bb

Branch: refs/heads/master
Commit: 4ec034bbe18bd961a4bac64f2e25dba0472c28c9
Parents: 016b92b
Author: Micah Kornfield <em...@gmail.com>
Authored: Tue Mar 22 08:51:23 2016 -0700
Committer: Wes McKinney <we...@cloudera.com>
Committed: Tue Mar 22 08:51:23 2016 -0700

----------------------------------------------------------------------
 ci/travis_before_script_cpp.sh          |   2 +-
 ci/travis_script_cpp.sh                 |   4 +-
 cpp/CMakeLists.txt                      |  88 ++++++++++++++-
 cpp/README.md                           |  23 +++-
 cpp/build-support/run-test.sh           | 160 ++++++++++++++++-----------
 cpp/cmake_modules/FindGBenchmark.cmake  |  88 +++++++++++++++
 cpp/setup_build_env.sh                  |   1 +
 cpp/src/arrow/table/CMakeLists.txt      |   2 +
 cpp/src/arrow/table/column-benchmark.cc |  55 +++++++++
 cpp/src/arrow/util/CMakeLists.txt       |  14 +++
 cpp/src/arrow/util/benchmark_main.cc    |  24 ++++
 cpp/thirdparty/build_thirdparty.sh      |  20 +++-
 cpp/thirdparty/download_thirdparty.sh   |   6 +
 cpp/thirdparty/versions.sh              |   4 +
 14 files changed, 415 insertions(+), 76 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/4ec034bb/ci/travis_before_script_cpp.sh
----------------------------------------------------------------------
diff --git a/ci/travis_before_script_cpp.sh b/ci/travis_before_script_cpp.sh
index 4d5bef8..49dcc39 100755
--- a/ci/travis_before_script_cpp.sh
+++ b/ci/travis_before_script_cpp.sh
@@ -19,7 +19,7 @@ echo $GTEST_HOME
 
 : ${ARROW_CPP_INSTALL=$TRAVIS_BUILD_DIR/cpp-install}
 
-cmake -DCMAKE_INSTALL_PREFIX=$ARROW_CPP_INSTALL -DCMAKE_CXX_FLAGS="-Werror" $CPP_DIR
+cmake -DARROW_BUILD_BENCHMARKS=ON -DCMAKE_INSTALL_PREFIX=$ARROW_CPP_INSTALL -DCMAKE_CXX_FLAGS="-Werror" $CPP_DIR
 make -j4
 make install
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/4ec034bb/ci/travis_script_cpp.sh
----------------------------------------------------------------------
diff --git a/ci/travis_script_cpp.sh b/ci/travis_script_cpp.sh
index 3e843dd..d96b98f 100755
--- a/ci/travis_script_cpp.sh
+++ b/ci/travis_script_cpp.sh
@@ -9,9 +9,9 @@ pushd $CPP_BUILD_DIR
 make lint
 
 if [ $TRAVIS_OS_NAME == "linux" ]; then
-  valgrind --tool=memcheck --leak-check=yes --error-exitcode=1 ctest
+  valgrind --tool=memcheck --leak-check=yes --error-exitcode=1 ctest -L unittest
 else
-  ctest
+  ctest -L unittest
 fi
 
 popd

http://git-wip-us.apache.org/repos/asf/arrow/blob/4ec034bb/cpp/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index f5f6038..268c1d1 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -55,12 +55,21 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
   option(ARROW_BUILD_TESTS
     "Build the Arrow googletest unit tests"
     ON)
+
+  option(ARROW_BUILD_BENCHMARKS
+    "Build the Arrow micro benchmarks"
+    OFF)
+
 endif()
 
 if(NOT ARROW_BUILD_TESTS)
   set(NO_TESTS 1)
 endif()
 
+if(NOT ARROW_BUILD_BENCHMARKS)
+  set(NO_BENCHMARKS 1)
+endif()
+
 
 ############################################################
 # Compiler flags
@@ -251,9 +260,63 @@ set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}")
 include_directories(src)
 
 ############################################################
-# Testing
+# Benchmarking 
 ############################################################
+# Add a new micro benchmark, with or without an executable that should be built.
+# If benchmarks are enabled then they will be run along side unit tests with ctest.
+# 'make runbenchmark' and 'make unittest' to build/run only benchmark or unittests, 
+# respectively.
+#
+# REL_BENCHMARK_NAME is the name of the benchmark app. It may be a single component
+# (e.g. monotime-benchmark) or contain additional components (e.g.
+# net/net_util-benchmark). Either way, the last component must be a globally
+# unique name.  
+
+# The benchmark will registered as unit test with ctest with a label
+# of 'benchmark'.
+#
+# Arguments after the test name will be passed to set_tests_properties().
+function(ADD_ARROW_BENCHMARK REL_BENCHMARK_NAME)
+  if(NO_BENCHMARKS)
+    return()
+  endif()
+  get_filename_component(BENCHMARK_NAME ${REL_BENCHMARK_NAME} NAME_WE)
+   
+  if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${REL_BENCHMARK_NAME}.cc)
+    # This benchmark has a corresponding .cc file, set it up as an executable.
+    set(BENCHMARK_PATH "${EXECUTABLE_OUTPUT_PATH}/${BENCHMARK_NAME}")
+    add_executable(${BENCHMARK_NAME} "${REL_BENCHMARK_NAME}.cc")
+    target_link_libraries(${BENCHMARK_NAME} ${ARROW_BENCHMARK_LINK_LIBS})
+    add_dependencies(runbenchmark ${BENCHMARK_NAME})
+    set(NO_COLOR "--color_print=false")
+  else()
+    # No executable, just invoke the benchmark (probably a script) directly.
+    set(BENCHMARK_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${REL_BENCHMARK_NAME})
+    set(NO_COLOR "")
+  endif()
+  
+  add_test(${BENCHMARK_NAME}
+    ${BUILD_SUPPORT_DIR}/run-test.sh ${CMAKE_BINARY_DIR} benchmark ${BENCHMARK_PATH} ${NO_COLOR})
+  set_tests_properties(${BENCHMARK_NAME} PROPERTIES LABELS "benchmark")
+  if(ARGN)
+    set_tests_properties(${BENCHMARK_NAME} PROPERTIES ${ARGN})
+  endif()
+endfunction()
+
+# A wrapper for add_dependencies() that is compatible with NO_BENCHMARKS.
+function(ADD_ARROW_BENCHMARK_DEPENDENCIES REL_BENCHMARK_NAME)
+  if(NO_BENCHMARKS)
+    return()
+  endif()
+  get_filename_component(BENCMARK_NAME ${REL_BENCHMARK_NAME} NAME_WE)
 
+  add_dependencies(${BENCHMARK_NAME} ${ARGN})
+endfunction()
+
+
+############################################################
+# Testing
+############################################################
 # Add a new test case, with or without an executable that should be built.
 #
 # REL_TEST_NAME is the name of the test. It may be a single component
@@ -261,6 +324,9 @@ include_directories(src)
 # net/net_util-test). Either way, the last component must be a globally
 # unique name.
 #
+# The unit test is added with a label of "unittest" to support filtering with
+# ctest.
+#
 # Arguments after the test name will be passed to set_tests_properties().
 function(ADD_ARROW_TEST REL_TEST_NAME)
   if(NO_TESTS)
@@ -273,13 +339,15 @@ function(ADD_ARROW_TEST REL_TEST_NAME)
     set(TEST_PATH "${EXECUTABLE_OUTPUT_PATH}/${TEST_NAME}")
     add_executable(${TEST_NAME} "${REL_TEST_NAME}.cc")
     target_link_libraries(${TEST_NAME} ${ARROW_TEST_LINK_LIBS})
+    add_dependencies(unittest ${TEST_NAME})
   else()
     # No executable, just invoke the test (probably a script) directly.
     set(TEST_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${REL_TEST_NAME})
   endif()
 
   add_test(${TEST_NAME}
-    ${BUILD_SUPPORT_DIR}/run-test.sh ${TEST_PATH})
+    ${BUILD_SUPPORT_DIR}/run-test.sh ${CMAKE_BINARY_DIR} test ${TEST_PATH})
+  set_tests_properties(${TEST_NAME} PROPERTIES LABELS "unittest")
   if(ARGN)
     set_tests_properties(${TEST_NAME} PROPERTIES ${ARGN})
   endif()
@@ -335,13 +403,28 @@ if ("$ENV{GTEST_HOME}" STREQUAL "")
   set(GTest_HOME ${THIRDPARTY_DIR}/googletest-release-1.7.0)
 endif()
 
+## Google Benchmark 
+if ("$ENV{GBENCHMARK_HOME}" STREQUAL "")
+  set(GBENCHMARK_HOME ${THIRDPARTY_DIR}/installed)
+endif()
+
+
 if(ARROW_BUILD_TESTS)
+  add_custom_target(unittest ctest -L unittest)
   find_package(GTest REQUIRED)
   include_directories(SYSTEM ${GTEST_INCLUDE_DIR})
   ADD_THIRDPARTY_LIB(gtest
     STATIC_LIB ${GTEST_STATIC_LIB})
 endif()
 
+if(ARROW_BUILD_BENCHMARKS)
+  add_custom_target(runbenchmark ctest -L benchmark)
+  find_package(GBenchmark REQUIRED)
+  include_directories(SYSTEM ${GBENCHMARK_INCLUDE_DIR})
+  ADD_THIRDPARTY_LIB(benchmark
+    STATIC_LIB ${GBENCHMARK_STATIC_LIB})
+endif()
+
 ## Google PerfTools
 ##
 ## Disabled with TSAN/ASAN as well as with gold+dynamic linking (see comment
@@ -366,6 +449,7 @@ endif()
 ############################################################
 set(ARROW_MIN_TEST_LIBS arrow arrow_test_main ${ARROW_BASE_LIBS})
 set(ARROW_TEST_LINK_LIBS ${ARROW_MIN_TEST_LIBS})
+set(ARROW_BENCHMARK_LINK_LIBS arrow arrow_benchmark_main ${ARROW_BASE_LIBS})
 
 ############################################################
 # "make ctags" target

http://git-wip-us.apache.org/repos/asf/arrow/blob/4ec034bb/cpp/README.md
----------------------------------------------------------------------
diff --git a/cpp/README.md b/cpp/README.md
index 378dc4e..542cce4 100644
--- a/cpp/README.md
+++ b/cpp/README.md
@@ -29,16 +29,29 @@ Simple debug build:
     mkdir debug
     cd debug
     cmake ..
-    make
-    ctest
+    make unittest
 
 Simple release build:
 
     mkdir release
     cd release
     cmake .. -DCMAKE_BUILD_TYPE=Release
-    make
-    ctest
+    make unittest
+
+Detailed unit test logs will be placed in the build directory under `build/test-logs`.
+
+### Building/Running benchmarks
+
+Follow the directions for simple build except run cmake 
+with the `--ARROW_BUILD_BENCHMARKS` parameter set correctly:
+
+    cmake -DARROW_BUILD_BENCHMARKS=ON ..
+
+and instead of make unittest run either `make; ctest` to run both unit tests 
+and benchmarks or `make runbenchmark` to run only the benchmark tests.
+
+Benchmark logs will be placed in the build directory under `build/benchmark-logs`.
+
 
 ### Third-party environment variables
 
@@ -46,3 +59,5 @@ To set up your own specific build toolchain, here are the relevant environment
 variables
 
 * Googletest: `GTEST_HOME` (only required to build the unit tests)
+* Google Benchmark: `GBENCHMARK_HOME` (only required if building benchmarks)
+

http://git-wip-us.apache.org/repos/asf/arrow/blob/4ec034bb/cpp/build-support/run-test.sh
----------------------------------------------------------------------
diff --git a/cpp/build-support/run-test.sh b/cpp/build-support/run-test.sh
index b203913..0e628e2 100755
--- a/cpp/build-support/run-test.sh
+++ b/cpp/build-support/run-test.sh
@@ -16,24 +16,23 @@
 # Script which wraps running a test and redirects its output to a
 # test log directory.
 #
-# If KUDU_COMPRESS_TEST_OUTPUT is non-empty, then the logs will be
-# gzip-compressed while they are written.
+# Arguments:
+#    $1 - Base path for logs/artifacts.
+#    $2 - type of test (e.g. test or benchmark)
+#    $3 - path to executable
+#    $ARGN - arguments for executable
 #
-# If KUDU_FLAKY_TEST_ATTEMPTS is non-zero, and the test being run matches
-# one of the lines in the file KUDU_FLAKY_TEST_LIST, then the test will
-# be retried on failure up to the specified number of times. This can be
-# used in the gerrit workflow to prevent annoying false -1s caused by
-# tests that are known to be flaky in master.
-#
-# If KUDU_REPORT_TEST_RESULTS is non-zero, then tests are reported to the
-# central test server.
 
+OUTPUT_ROOT=$1
+shift
 ROOT=$(cd $(dirname $BASH_SOURCE)/..; pwd)
 
-TEST_LOGDIR=$ROOT/build/test-logs
+TEST_LOGDIR=$OUTPUT_ROOT/build/$1-logs
 mkdir -p $TEST_LOGDIR
 
-TEST_DEBUGDIR=$ROOT/build/test-debug
+RUN_TYPE=$1
+shift
+TEST_DEBUGDIR=$OUTPUT_ROOT/build/$RUN_TYPE-debug
 mkdir -p $TEST_DEBUGDIR
 
 TEST_DIRNAME=$(cd $(dirname $1); pwd)
@@ -43,7 +42,7 @@ TEST_EXECUTABLE="$TEST_DIRNAME/$TEST_FILENAME"
 TEST_NAME=$(echo $TEST_FILENAME | perl -pe 's/\..+?$//') # Remove path and extension (if any).
 
 # We run each test in its own subdir to avoid core file related races.
-TEST_WORKDIR=$ROOT/build/test-work/$TEST_NAME
+TEST_WORKDIR=$OUTPUT_ROOT/build/test-work/$TEST_NAME
 mkdir -p $TEST_WORKDIR
 pushd $TEST_WORKDIR >/dev/null || exit 1
 rm -f *
@@ -61,55 +60,49 @@ rm -f $LOGFILE $LOGFILE.gz
 
 pipe_cmd=cat
 
-# Configure TSAN (ignored if this isn't a TSAN build).
-#
-# Deadlock detection (new in clang 3.5) is disabled because:
-# 1. The clang 3.5 deadlock detector crashes in some unit tests. It
-#    needs compiler-rt commits c4c3dfd, 9a8efe3, and possibly others.
-# 2. Many unit tests report lock-order-inversion warnings; they should be
-#    fixed before reenabling the detector.
-TSAN_OPTIONS="$TSAN_OPTIONS detect_deadlocks=0"
-TSAN_OPTIONS="$TSAN_OPTIONS suppressions=$ROOT/build-support/tsan-suppressions.txt"
-TSAN_OPTIONS="$TSAN_OPTIONS history_size=7"
-export TSAN_OPTIONS
-
-# Enable leak detection even under LLVM 3.4, where it was disabled by default.
-# This flag only takes effect when running an ASAN build.
-ASAN_OPTIONS="$ASAN_OPTIONS detect_leaks=1"
-export ASAN_OPTIONS
-
-# Set up suppressions for LeakSanitizer
-LSAN_OPTIONS="$LSAN_OPTIONS suppressions=$ROOT/build-support/lsan-suppressions.txt"
-export LSAN_OPTIONS
-
-# Suppressions require symbolization. We'll default to using the symbolizer in
-# thirdparty.
-if [ -z "$ASAN_SYMBOLIZER_PATH" ]; then
-  export ASAN_SYMBOLIZER_PATH=$(find $NATIVE_TOOLCHAIN/llvm-3.7.0/bin -name llvm-symbolizer)
-fi
-
 # Allow for collecting core dumps.
 ARROW_TEST_ULIMIT_CORE=${ARROW_TEST_ULIMIT_CORE:-0}
 ulimit -c $ARROW_TEST_ULIMIT_CORE
 
-# Run the actual test.
-for ATTEMPT_NUMBER in $(seq 1 $TEST_EXECUTION_ATTEMPTS) ; do
-  if [ $ATTEMPT_NUMBER -lt $TEST_EXECUTION_ATTEMPTS ]; then
-    # If the test fails, the test output may or may not be left behind,
-    # depending on whether the test cleaned up or exited immediately. Either
-    # way we need to clean it up. We do this by comparing the data directory
-    # contents before and after the test runs, and deleting anything new.
-    #
-    # The comm program requires that its two inputs be sorted.
-    TEST_TMPDIR_BEFORE=$(find $TEST_TMPDIR -maxdepth 1 -type d | sort)
+
+function setup_sanitizers() {
+  # Sets environment variables for different sanitizers (it configures how) the run_tests. Function works.
+
+  # Configure TSAN (ignored if this isn't a TSAN build).
+  #
+  # Deadlock detection (new in clang 3.5) is disabled because:
+  # 1. The clang 3.5 deadlock detector crashes in some unit tests. It
+  #    needs compiler-rt commits c4c3dfd, 9a8efe3, and possibly others.
+  # 2. Many unit tests report lock-order-inversion warnings; they should be
+  #    fixed before reenabling the detector.
+  TSAN_OPTIONS="$TSAN_OPTIONS detect_deadlocks=0"
+  TSAN_OPTIONS="$TSAN_OPTIONS suppressions=$ROOT/build-support/tsan-suppressions.txt"
+  TSAN_OPTIONS="$TSAN_OPTIONS history_size=7"
+  export TSAN_OPTIONS
+  
+  # Enable leak detection even under LLVM 3.4, where it was disabled by default.
+  # This flag only takes effect when running an ASAN build.
+  ASAN_OPTIONS="$ASAN_OPTIONS detect_leaks=1"
+  export ASAN_OPTIONS
+  
+  # Set up suppressions for LeakSanitizer
+  LSAN_OPTIONS="$LSAN_OPTIONS suppressions=$ROOT/build-support/lsan-suppressions.txt"
+  export LSAN_OPTIONS
+  
+  # Suppressions require symbolization. We'll default to using the symbolizer in
+  # thirdparty.
+  if [ -z "$ASAN_SYMBOLIZER_PATH" ]; then
+    export ASAN_SYMBOLIZER_PATH=$(find $NATIVE_TOOLCHAIN/llvm-3.7.0/bin -name llvm-symbolizer)
   fi
+}
+
+function run_test() {
+  # Run gtest style tests with sanitizers if they are setup appropriately.
 
   # gtest won't overwrite old junit test files, resulting in a build failure
   # even when retries are successful.
   rm -f $XMLFILE
 
-  echo "Running $TEST_NAME, redirecting output into $LOGFILE" \
-    "(attempt ${ATTEMPT_NUMBER}/$TEST_EXECUTION_ATTEMPTS)"
   $TEST_EXECUTABLE "$@" 2>&1 \
     | $ROOT/build-support/asan_symbolize.py \
     | c++filt \
@@ -131,6 +124,46 @@ for ATTEMPT_NUMBER in $(seq 1 $TEST_EXECUTION_ATTEMPTS) ; do
     STATUS=1
     rm -f $XMLFILE
   fi
+}
+
+function post_process_tests() {
+  # If we have a LeakSanitizer report, and XML reporting is configured, add a new test
+  # case result to the XML file for the leak report. Otherwise Jenkins won't show
+  # us which tests had LSAN errors.
+  if zgrep --silent "ERROR: LeakSanitizer: detected memory leaks" $LOGFILE ; then
+      echo Test had memory leaks. Editing XML
+      perl -p -i -e '
+      if (m#</testsuite>#) {
+        print "<testcase name=\"LeakSanitizer\" status=\"run\" classname=\"LSAN\">\n";
+        print "  <failure message=\"LeakSanitizer failed\" type=\"\">\n";
+        print "    See txt log file for details\n";
+        print "  </failure>\n";
+        print "</testcase>\n";
+      }' $XMLFILE
+  fi
+}
+
+function run_other() {
+  # Generic run function for test like executables that aren't actually gtest
+  $TEST_EXECUTABLE "$@" 2>&1 | $pipe_cmd > $LOGFILE
+  STATUS=$?
+}
+
+if [ $RUN_TYPE = "test" ]; then
+    setup_sanitizers
+fi
+
+# Run the actual test.
+for ATTEMPT_NUMBER in $(seq 1 $TEST_EXECUTION_ATTEMPTS) ; do
+  if [ $ATTEMPT_NUMBER -lt $TEST_EXECUTION_ATTEMPTS ]; then
+    # If the test fails, the test output may or may not be left behind,
+    # depending on whether the test cleaned up or exited immediately. Either
+    # way we need to clean it up. We do this by comparing the data directory
+    # contents before and after the test runs, and deleting anything new.
+    #
+    # The comm program requires that its two inputs be sorted.
+    TEST_TMPDIR_BEFORE=$(find $TEST_TMPDIR -maxdepth 1 -type d | sort)
+  fi
 
   if [ $ATTEMPT_NUMBER -lt $TEST_EXECUTION_ATTEMPTS ]; then
     # Now delete any new test output.
@@ -150,7 +183,13 @@ for ATTEMPT_NUMBER in $(seq 1 $TEST_EXECUTION_ATTEMPTS) ; do
       fi
     done
   fi
-
+  echo "Running $TEST_NAME, redirecting output into $LOGFILE" \
+    "(attempt ${ATTEMPT_NUMBER}/$TEST_EXECUTION_ATTEMPTS)"
+  if [ $RUN_TYPE = "test" ]; then
+    run_test $*
+  else
+    run_other $*
+  fi
   if [ "$STATUS" -eq "0" ]; then
     break
   elif [ "$ATTEMPT_NUMBER" -lt "$TEST_EXECUTION_ATTEMPTS" ]; then
@@ -159,19 +198,8 @@ for ATTEMPT_NUMBER in $(seq 1 $TEST_EXECUTION_ATTEMPTS) ; do
   fi
 done
 
-# If we have a LeakSanitizer report, and XML reporting is configured, add a new test
-# case result to the XML file for the leak report. Otherwise Jenkins won't show
-# us which tests had LSAN errors.
-if zgrep --silent "ERROR: LeakSanitizer: detected memory leaks" $LOGFILE ; then
-    echo Test had memory leaks. Editing XML
-    perl -p -i -e '
-    if (m#</testsuite>#) {
-      print "<testcase name=\"LeakSanitizer\" status=\"run\" classname=\"LSAN\">\n";
-      print "  <failure message=\"LeakSanitizer failed\" type=\"\">\n";
-      print "    See txt log file for details\n";
-      print "  </failure>\n";
-      print "</testcase>\n";
-    }' $XMLFILE
+if [ $RUN_TYPE = "test" ]; then	
+  post_process_tests
 fi
 
 # Capture and compress core file and binary.

http://git-wip-us.apache.org/repos/asf/arrow/blob/4ec034bb/cpp/cmake_modules/FindGBenchmark.cmake
----------------------------------------------------------------------
diff --git a/cpp/cmake_modules/FindGBenchmark.cmake b/cpp/cmake_modules/FindGBenchmark.cmake
new file mode 100644
index 0000000..3e46a60
--- /dev/null
+++ b/cpp/cmake_modules/FindGBenchmark.cmake
@@ -0,0 +1,88 @@
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Tries to find Google benchmark headers and libraries.
+#
+# Usage of this module as follows:
+#
+#  find_package(GBenchark)
+#
+# Variables used by this module, they can change the default behaviour and need
+# to be set before calling find_package:
+#
+#  GBenchmark_HOME - When set, this path is inspected instead of standard library
+#                    locations as the root of the benchark installation.
+#                    The environment variable GBENCHMARK_HOME overrides this veriable.
+#
+# This module defines
+#  GBENCHMARK_INCLUDE_DIR, directory containing benchmark header directory
+#  GBENCHMARK_LIBS, directory containing benchmark libraries
+#  GBENCHMARK_STATIC_LIB, path to libbenchmark.a
+#  GBENCHMARK_FOUND, whether gbenchmark has been found
+
+if( NOT "$ENV{GBENCHMARK_HOME}" STREQUAL "")
+    file( TO_CMAKE_PATH "$ENV{GBENCHMARK_HOME}" _native_path )
+    list( APPEND _gbenchmark_roots ${_native_path} )
+elseif ( GBenchmark_HOME )
+    list( APPEND _gbenchmark_roots ${GBenchmark_HOME} )
+endif()
+
+# Try the parameterized roots, if they exist
+if ( _gbenchmark_roots )
+    find_path( GBENCHMARK_INCLUDE_DIR NAMES benchmark/benchmark.h 
+        PATHS ${_gbenchmark_roots} NO_DEFAULT_PATH
+        PATH_SUFFIXES "include" )
+    find_library( GBENCHMARK_LIBRARIES NAMES benchmark
+        PATHS ${_gbenchmark_roots} NO_DEFAULT_PATH
+        PATH_SUFFIXES "lib" )
+else ()
+    find_path( GBENCHMARK_INCLUDE_DIR NAMES benchmark/benchmark.hh )
+    find_library( GBENCHMARK_LIBRARIES NAMES benchmark )
+endif ()
+
+
+if (GBENCHMARK_INCLUDE_DIR AND GBENCHMARK_LIBRARIES)
+  set(GBENCHMARK_FOUND TRUE)
+  get_filename_component( GBENCHMARK_LIBS ${GBENCHMARK_LIBRARIES} PATH )
+  set(GBENCHMARK_LIB_NAME libbenchmark)
+  set(GBENCHMARK_STATIC_LIB ${GBENCHMARK_LIBS}/${GBENCHMARK_LIB_NAME}.a)
+else ()
+  set(GBENCHMARK_FOUND FALSE)
+endif ()
+
+if (GBENCHMARK_FOUND)
+  if (NOT GBenchmark_FIND_QUIETLY)
+    message(STATUS "Found the GBenchmark library: ${GBENCHMARK_LIBRARIES}")
+  endif ()
+else ()
+  if (NOT GBenchmark_FIND_QUIETLY)
+    set(GBENCHMARK_ERR_MSG "Could not find the GBenchmark library. Looked in ")
+    if ( _gbenchmark_roots )
+      set(GBENCHMARK_ERR_MSG "${GBENCHMARK_ERR_MSG} in ${_gbenchmark_roots}.")
+    else ()
+      set(GBENCHMARK_ERR_MSG "${GBENCHMARK_ERR_MSG} system search paths.")
+    endif ()
+    if (GBenchmark_FIND_REQUIRED)
+      message(FATAL_ERROR "${GBENCHMARK_ERR_MSG}")
+    else (GBenchmark_FIND_REQUIRED)
+      message(STATUS "${GBENCHMARK_ERR_MSG}")
+    endif (GBenchmark_FIND_REQUIRED)
+  endif ()
+endif ()
+
+mark_as_advanced(
+  GBENCHMARK_INCLUDE_DIR
+  GBENCHMARK_LIBS
+  GBENCHMARK_LIBRARIES
+  GBENCHMARK_STATIC_LIB
+)

http://git-wip-us.apache.org/repos/asf/arrow/blob/4ec034bb/cpp/setup_build_env.sh
----------------------------------------------------------------------
diff --git a/cpp/setup_build_env.sh b/cpp/setup_build_env.sh
index 1a33fe3..04688e7 100755
--- a/cpp/setup_build_env.sh
+++ b/cpp/setup_build_env.sh
@@ -7,5 +7,6 @@ SOURCE_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)
 source thirdparty/versions.sh
 
 export GTEST_HOME=$SOURCE_DIR/thirdparty/$GTEST_BASEDIR
+export GBENCHMARK_HOME=$SOURCE_DIR/thirdparty/installed
 
 echo "Build env initialized"

http://git-wip-us.apache.org/repos/asf/arrow/blob/4ec034bb/cpp/src/arrow/table/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/CMakeLists.txt b/cpp/src/arrow/table/CMakeLists.txt
index 26d843d..d9f00e7 100644
--- a/cpp/src/arrow/table/CMakeLists.txt
+++ b/cpp/src/arrow/table/CMakeLists.txt
@@ -29,3 +29,5 @@ install(FILES
 ADD_ARROW_TEST(column-test)
 ADD_ARROW_TEST(schema-test)
 ADD_ARROW_TEST(table-test)
+
+ADD_ARROW_BENCHMARK(column-benchmark)

http://git-wip-us.apache.org/repos/asf/arrow/blob/4ec034bb/cpp/src/arrow/table/column-benchmark.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/column-benchmark.cc b/cpp/src/arrow/table/column-benchmark.cc
new file mode 100644
index 0000000..c01146d
--- /dev/null
+++ b/cpp/src/arrow/table/column-benchmark.cc
@@ -0,0 +1,55 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+#include "benchmark/benchmark.h"
+
+#include "arrow/test-util.h"
+#include "arrow/table/test-common.h"
+#include "arrow/types/integer.h"
+#include "arrow/util/memory-pool.h"
+
+namespace arrow {
+namespace {
+  template <typename ArrayType>
+  std::shared_ptr<Array> MakePrimitive(int32_t length, int32_t null_count = 0) {
+    auto pool = GetDefaultMemoryPool();
+    auto data = std::make_shared<PoolBuffer>(pool);
+    auto nulls = std::make_shared<PoolBuffer>(pool);
+    data->Resize(length * sizeof(typename ArrayType::value_type));
+    nulls->Resize(util::bytes_for_bits(length));
+    return std::make_shared<ArrayType>(length, data, 10, nulls);
+  }
+}  // anonymous namespace
+
+
+static void BM_BuildInt32ColumnByChunk(benchmark::State& state) { //NOLINT non-const reference
+  ArrayVector arrays;
+  for (int chunk_n = 0; chunk_n < state.range_x(); ++chunk_n) {
+    arrays.push_back(MakePrimitive<Int32Array>(100, 10));
+  }
+  const auto INT32 = std::make_shared<Int32Type>();
+  const auto field = std::make_shared<Field>("c0", INT32);
+  std::unique_ptr<Column> column;
+  while (state.KeepRunning()) {
+    column.reset(new Column(field, arrays));
+  }
+}
+
+BENCHMARK(BM_BuildInt32ColumnByChunk)->Range(5, 50000);
+
+}  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/4ec034bb/cpp/src/arrow/util/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt
index d8e2f98..fed05e3 100644
--- a/cpp/src/arrow/util/CMakeLists.txt
+++ b/cpp/src/arrow/util/CMakeLists.txt
@@ -51,6 +51,20 @@ if (ARROW_BUILD_TESTS)
   endif()
 endif()
 
+if (ARROW_BUILD_BENCHMARKS)
+  add_library(arrow_benchmark_main benchmark_main.cc)
+  if (APPLE)
+    target_link_libraries(arrow_benchmark_main
+      benchmark
+    )
+  else()
+	  target_link_libraries(arrow_benchmark_main
+      benchmark 
+      pthread
+	  )
+  endif()
+endif()
+
 ADD_ARROW_TEST(bit-util-test)
 ADD_ARROW_TEST(buffer-test)
 ADD_ARROW_TEST(memory-pool-test)

http://git-wip-us.apache.org/repos/asf/arrow/blob/4ec034bb/cpp/src/arrow/util/benchmark_main.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/benchmark_main.cc b/cpp/src/arrow/util/benchmark_main.cc
new file mode 100644
index 0000000..c9739af
--- /dev/null
+++ b/cpp/src/arrow/util/benchmark_main.cc
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "benchmark/benchmark.h"
+
+int main(int argc, char** argv) {
+  benchmark::Initialize(&argc, argv);
+  benchmark::RunSpecifiedBenchmarks();
+  return 0;
+}

http://git-wip-us.apache.org/repos/asf/arrow/blob/4ec034bb/cpp/thirdparty/build_thirdparty.sh
----------------------------------------------------------------------
diff --git a/cpp/thirdparty/build_thirdparty.sh b/cpp/thirdparty/build_thirdparty.sh
index beb2488..294737c 100755
--- a/cpp/thirdparty/build_thirdparty.sh
+++ b/cpp/thirdparty/build_thirdparty.sh
@@ -16,6 +16,7 @@ else
   for arg in "$*"; do
     case $arg in
       "gtest")      F_GTEST=1 ;;
+      "gbenchmark")      F_GBENCHMARK=1 ;;
       *)            echo "Unknown module: $arg"; exit 1 ;;
     esac
   done
@@ -47,13 +48,15 @@ export PATH=$PREFIX/bin:$PATH
 type cmake >/dev/null 2>&1 || { echo >&2 "cmake not installed.  Aborting."; exit 1; }
 type make >/dev/null 2>&1 || { echo >&2 "make not installed.  Aborting."; exit 1; }
 
+STANDARD_DARWIN_FLAGS="-std=c++11 -stdlib=libc++"
+
 # build googletest
 GOOGLETEST_ERROR="failed for googletest!"
 if [ -n "$F_ALL" -o -n "$F_GTEST" ]; then
   cd $TP_DIR/$GTEST_BASEDIR
 
   if [[ "$OSTYPE" == "darwin"* ]]; then
-    CXXFLAGS=-fPIC cmake -DCMAKE_CXX_FLAGS="-std=c++11 -stdlib=libc++ -DGTEST_USE_OWN_TR1_TUPLE=1 -Wno-unused-value -Wno-ignored-attributes" || { echo "cmake $GOOGLETEST_ERROR" ; exit  1; }
+    CXXFLAGS=-fPIC cmake -DCMAKE_CXX_FLAGS="$STANDARD_DARWIN_FLAGS -DGTEST_USE_OWN_TR1_TUPLE=1 -Wno-unused-value -Wno-ignored-attributes" || { echo "cmake $GOOGLETEST_ERROR" ; exit  1; }
   else
     CXXFLAGS=-fPIC cmake . || { echo "cmake $GOOGLETEST_ERROR"; exit  1; }
   fi
@@ -61,5 +64,20 @@ if [ -n "$F_ALL" -o -n "$F_GTEST" ]; then
   make VERBOSE=1 || { echo "Make $GOOGLETEST_ERROR" ; exit  1; }
 fi
 
+# build google benchmark
+GBENCHMARK_ERROR="failed for google benchmark"
+if [ -n "$F_ALL" -o -n "$F_GBENCHMARK" ]; then
+  cd $TP_DIR/$GBENCHMARK_BASEDIR
+
+  CMAKE_CXX_FLAGS="--std=c++11"
+  if [[ "$OSTYPE" == "darwin"* ]]; then
+    CMAKE_CXX_FLAGS=$STANDARD_DARWIN_FLAGS
+  fi
+  cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=$PREFIX -DCMAKE_CXX_FLAGS="-fPIC $CMAKE_CXX_FLAGS" . || { echo "cmake $GBENCHMARK_ERROR" ; exit 1; }
+
+  make VERBOSE=1 install || { echo "make $GBENCHMARK_ERROR" ; exit 1; }
+fi
+
+
 echo "---------------------"
 echo "Thirdparty dependencies built and installed into $PREFIX successfully"

http://git-wip-us.apache.org/repos/asf/arrow/blob/4ec034bb/cpp/thirdparty/download_thirdparty.sh
----------------------------------------------------------------------
diff --git a/cpp/thirdparty/download_thirdparty.sh b/cpp/thirdparty/download_thirdparty.sh
index c18dd4d..d22c559 100755
--- a/cpp/thirdparty/download_thirdparty.sh
+++ b/cpp/thirdparty/download_thirdparty.sh
@@ -19,3 +19,9 @@ if [ ! -d ${GTEST_BASEDIR} ]; then
   echo "Fetching gtest"
   download_extract_and_cleanup $GTEST_URL
 fi
+
+echo ${GBENCHMARK_BASEDIR}
+if [ ! -d ${GBENCHMARK_BASEDIR} ]; then
+  echo "Fetching google benchmark"
+  download_extract_and_cleanup $GBENCHMARK_URL
+fi

http://git-wip-us.apache.org/repos/asf/arrow/blob/4ec034bb/cpp/thirdparty/versions.sh
----------------------------------------------------------------------
diff --git a/cpp/thirdparty/versions.sh b/cpp/thirdparty/versions.sh
index 12ad56e..9cfc7cd 100755
--- a/cpp/thirdparty/versions.sh
+++ b/cpp/thirdparty/versions.sh
@@ -1,3 +1,7 @@
 GTEST_VERSION=1.7.0
 GTEST_URL="https://github.com/google/googletest/archive/release-${GTEST_VERSION}.tar.gz"
 GTEST_BASEDIR=googletest-release-$GTEST_VERSION
+
+GBENCHMARK_VERSION=1.0.0
+GBENCHMARK_URL="https://github.com/google/benchmark/archive/v${GBENCHMARK_VERSION}.tar.gz"
+GBENCHMARK_BASEDIR=benchmark-$GBENCHMARK_VERSION