You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by uw...@apache.org on 2017/01/06 14:57:38 UTC

arrow git commit: ARROW-456: Add jemalloc based MemoryPool

Repository: arrow
Updated Branches:
  refs/heads/master 320f5875e -> 5bf6ae49e


ARROW-456: Add jemalloc based MemoryPool

Runtimes of the `builder-benchmark`:

```
BM_BuildPrimitiveArrayNoNulls/repeats:3               901 ms        889 ms          1   576.196MB/s
BM_BuildPrimitiveArrayNoNulls/repeats:3               833 ms        829 ms          1     617.6MB/s
BM_BuildPrimitiveArrayNoNulls/repeats:3               825 ms        821 ms          1   623.855MB/s
BM_BuildPrimitiveArrayNoNulls/repeats:3_mean          853 ms        846 ms          1   605.884MB/s
BM_BuildPrimitiveArrayNoNulls/repeats:3_stddev         34 ms         30 ms          0    21.147MB/s
BM_BuildVectorNoNulls/repeats:3                       712 ms        701 ms          1   729.866MB/s
BM_BuildVectorNoNulls/repeats:3                       671 ms        670 ms          1   764.464MB/s
BM_BuildVectorNoNulls/repeats:3                       688 ms        681 ms          1   751.285MB/s
BM_BuildVectorNoNulls/repeats:3_mean                  690 ms        684 ms          1   748.538MB/s
BM_BuildVectorNoNulls/repeats:3_stddev                 17 ms         13 ms          0   14.2578MB/s
```

With an aligned `Reallocate`, the jemalloc version is 50% faster and even outperforms `std::vector`:

```
BM_BuildPrimitiveArrayNoNulls/repeats:3               565 ms        559 ms          1   916.516MB/s
BM_BuildPrimitiveArrayNoNulls/repeats:3               540 ms        537 ms          1   952.727MB/s
BM_BuildPrimitiveArrayNoNulls/repeats:3               544 ms        543 ms          1   942.948MB/s
BM_BuildPrimitiveArrayNoNulls/repeats:3_mean          550 ms        546 ms          1   937.397MB/s
BM_BuildPrimitiveArrayNoNulls/repeats:3_stddev         11 ms          9 ms          0   15.2949MB/s
```

Author: Uwe L. Korn <uw...@xhochy.com>

Closes #270 from xhochy/ARROW-456 and squashes the following commits:

d3ce3bf [Uwe L. Korn] Zero arrays for now
831399d [Uwe L. Korn] cpplint #2
e6e251b [Uwe L. Korn] cpplint
52b3c76 [Uwe L. Korn] Add Reallocate implementation to PyArrowMemoryPool
113e650 [Uwe L. Korn] Add missing file
d331cd9 [Uwe L. Korn] Add tests for Reallocate
c2be086 [Uwe L. Korn] Add JEMALLOC_HOME to the Readme
bd47f51 [Uwe L. Korn] Add missing return value
5142ac3 [Uwe L. Korn] Don't use deprecated GBenchmark interfaces
b6bff98 [Uwe L. Korn] Add missing (win) include
6f08e19 [Uwe L. Korn] Don't build jemalloc on AppVeyor
834c3b2 [Uwe L. Korn] Add jemalloc to Travis builds
10c6839 [Uwe L. Korn] Implement Reallocate function
a17b313 [Uwe L. Korn] ARROW-456: C++: Add jemalloc based MemoryPool


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/5bf6ae49
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/5bf6ae49
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/5bf6ae49

Branch: refs/heads/master
Commit: 5bf6ae49ec561eaaef823f0eb16ccca2d2ba7cf3
Parents: 320f587
Author: Uwe L. Korn <uw...@xhochy.com>
Authored: Fri Jan 6 15:57:20 2017 +0100
Committer: Uwe L. Korn <uw...@xhochy.com>
Committed: Fri Jan 6 15:57:20 2017 +0100

----------------------------------------------------------------------
 .travis.yml                                     |  1 +
 appveyor.yml                                    |  2 +-
 ci/travis_before_script_cpp.sh                  |  5 ++
 cpp/CMakeLists.txt                              | 30 ++++++-
 cpp/README.md                                   |  1 +
 cpp/cmake_modules/Findjemalloc.cmake            | 86 ++++++++++++++++++++
 cpp/src/arrow/CMakeLists.txt                    |  1 +
 cpp/src/arrow/buffer.cc                         |  6 +-
 cpp/src/arrow/builder-benchmark.cc              | 64 +++++++++++++++
 cpp/src/arrow/builder.cc                        |  1 +
 cpp/src/arrow/column-benchmark.cc               |  2 +-
 cpp/src/arrow/io/interfaces.cc                  |  4 +-
 cpp/src/arrow/io/io-file-test.cc                | 13 +++
 cpp/src/arrow/jemalloc/CMakeLists.txt           | 80 ++++++++++++++++++
 cpp/src/arrow/jemalloc/arrow-jemalloc.pc.in     | 27 ++++++
 .../jemalloc/jemalloc-builder-benchmark.cc      | 47 +++++++++++
 .../arrow/jemalloc/jemalloc-memory_pool-test.cc | 51 ++++++++++++
 cpp/src/arrow/jemalloc/memory_pool.cc           | 74 +++++++++++++++++
 cpp/src/arrow/jemalloc/memory_pool.h            | 57 +++++++++++++
 cpp/src/arrow/jemalloc/symbols.map              | 30 +++++++
 cpp/src/arrow/memory_pool-test.cc               | 33 +++-----
 cpp/src/arrow/memory_pool-test.h                | 79 ++++++++++++++++++
 cpp/src/arrow/memory_pool.cc                    | 24 ++++++
 cpp/src/arrow/memory_pool.h                     |  1 +
 python/src/pyarrow/common.cc                    | 14 ++++
 25 files changed, 704 insertions(+), 29 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index 1634eba..e8d9104 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -15,6 +15,7 @@ addons:
     - libboost-dev
     - libboost-filesystem-dev
     - libboost-system-dev
+    - libjemalloc-dev
 
 matrix:
   fast_finish: true

http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/appveyor.yml
----------------------------------------------------------------------
diff --git a/appveyor.yml b/appveyor.yml
index 6747848..17362c9 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -32,7 +32,7 @@ build_script:
  - cd build
  # A lot of features are still deactivated as they do not build on Windows
  #  * gbenchmark doesn't build with MSVC
- - cmake -G "%GENERATOR%" -DARROW_BOOST_USE_SHARED=OFF -DARROW_IPC=OFF -DARROW_HDFS=OFF -DARROW_BUILD_BENCHMARKS=OFF ..
+ - cmake -G "%GENERATOR%" -DARROW_BOOST_USE_SHARED=OFF -DARROW_IPC=OFF -DARROW_HDFS=OFF -DARROW_BUILD_BENCHMARKS=OFF -DARROW_JEMALLOC=OFF ..
  - cmake --build . --config Debug
 
 # test_script:

http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/ci/travis_before_script_cpp.sh
----------------------------------------------------------------------
diff --git a/ci/travis_before_script_cpp.sh b/ci/travis_before_script_cpp.sh
index 73bdaeb..94a889c 100755
--- a/ci/travis_before_script_cpp.sh
+++ b/ci/travis_before_script_cpp.sh
@@ -17,6 +17,11 @@ set -ex
 
 : ${CPP_BUILD_DIR=$TRAVIS_BUILD_DIR/cpp-build}
 
+if [ $TRAVIS_OS_NAME == "osx" ]; then
+  brew update > /dev/null
+  brew install jemalloc
+fi
+
 mkdir $CPP_BUILD_DIR
 pushd $CPP_BUILD_DIR
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 13f0354..419691b 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -28,7 +28,7 @@ set(THIRDPARTY_DIR "${CMAKE_SOURCE_DIR}/thirdparty")
 
 set(GFLAGS_VERSION "2.1.2")
 set(GTEST_VERSION "1.7.0")
-set(GBENCHMARK_VERSION "1.0.0")
+set(GBENCHMARK_VERSION "1.1.0")
 set(FLATBUFFERS_VERSION "1.3.0")
 
 find_package(ClangTools)
@@ -74,6 +74,10 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
     "Build the Arrow IPC extensions"
     ON)
 
+  option(ARROW_JEMALLOC
+    "Build the Arrow jemalloc-based allocator"
+    ON)
+
   option(ARROW_BOOST_USE_SHARED
     "Rely on boost shared libraries where relevant"
     ON)
@@ -238,6 +242,16 @@ function(ADD_ARROW_BENCHMARK_DEPENDENCIES REL_BENCHMARK_NAME)
   add_dependencies(${BENCHMARK_NAME} ${ARGN})
 endfunction()
 
+# A wrapper for target_link_libraries() that is compatible with NO_BENCHMARKS.
+function(ARROW_BENCHMARK_LINK_LIBRARIES REL_BENCHMARK_NAME)
+  if(NO_TESTS)
+    return()
+  endif()
+  get_filename_component(BENCHMARK_NAME ${REL_BENCHMARK_NAME} NAME_WE)
+
+  target_link_libraries(${BENCHMARK_NAME} ${ARGN})
+endfunction()
+
 
 ############################################################
 # Testing
@@ -526,7 +540,11 @@ if(ARROW_BUILD_BENCHMARKS)
     set(GBENCHMARK_CMAKE_ARGS
           "-DCMAKE_BUILD_TYPE=Release"
           "-DCMAKE_INSTALL_PREFIX:PATH=${GBENCHMARK_PREFIX}"
+          "-DBENCHMARK_ENABLE_TESTING=OFF"
           "-DCMAKE_CXX_FLAGS=-fPIC ${GBENCHMARK_CMAKE_CXX_FLAGS}")
+    if (APPLE)
+      set(GBENCHMARK_CMAKE_ARGS ${GBENCHMARK_CMAKE_ARGS} "-DBENCHMARK_USE_LIBCXX=ON")
+    endif()
     if (CMAKE_VERSION VERSION_GREATER "3.2")
       # BUILD_BYPRODUCTS is a 3.2+ feature
       ExternalProject_Add(gbenchmark_ep
@@ -575,6 +593,12 @@ endif()
 message(STATUS "RapidJSON include dir: ${RAPIDJSON_INCLUDE_DIR}")
 include_directories(SYSTEM ${RAPIDJSON_INCLUDE_DIR})
 
+if (ARROW_JEMALLOC)
+  find_package(jemalloc REQUIRED)
+  ADD_THIRDPARTY_LIB(jemalloc
+      SHARED_LIB ${JEMALLOC_SHARED_LIB})
+endif()
+
 ## Google PerfTools
 ##
 ## Disabled with TSAN/ASAN as well as with gold+dynamic linking (see comment
@@ -737,6 +761,10 @@ add_subdirectory(src/arrow)
 add_subdirectory(src/arrow/io)
 add_subdirectory(src/arrow/util)
 
+if(ARROW_JEMALLOC)
+  add_subdirectory(src/arrow/jemalloc)
+endif()
+
 #----------------------------------------------------------------------
 # IPC library
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/README.md
----------------------------------------------------------------------
diff --git a/cpp/README.md b/cpp/README.md
index 190e6f8..b77ea99 100644
--- a/cpp/README.md
+++ b/cpp/README.md
@@ -60,6 +60,7 @@ variables
 * Google Benchmark: `GBENCHMARK_HOME` (only required if building benchmarks)
 * Flatbuffers: `FLATBUFFERS_HOME` (only required for the IPC extensions)
 * Hadoop: `HADOOP_HOME` (only required for the HDFS I/O extensions)
+* jemalloc: `JEMALLOC_HOME` (only required for the jemalloc-based memory pool)
 
 ## Continuous Integration
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/cmake_modules/Findjemalloc.cmake
----------------------------------------------------------------------
diff --git a/cpp/cmake_modules/Findjemalloc.cmake b/cpp/cmake_modules/Findjemalloc.cmake
new file mode 100644
index 0000000..e7fbb94
--- /dev/null
+++ b/cpp/cmake_modules/Findjemalloc.cmake
@@ -0,0 +1,86 @@
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Tries to find jemalloc headers and libraries.
+#
+# Usage of this module as follows:
+#
+#  find_package(jemalloc)
+#
+# Variables used by this module, they can change the default behaviour and need
+# to be set before calling find_package:
+#
+#  JEMALLOC_HOME -
+#   When set, this path is inspected instead of standard library locations as
+#   the root of the jemalloc installation.  The environment variable
+#   JEMALLOC_HOME overrides this veriable.
+#
+# This module defines
+#  JEMALLOC_INCLUDE_DIR, directory containing headers
+#  JEMALLOC_SHARED_LIB, path to libjemalloc.so/dylib
+#  JEMALLOC_FOUND, whether flatbuffers has been found
+
+if( NOT "$ENV{JEMALLOC_HOME}" STREQUAL "")
+    file( TO_CMAKE_PATH "$ENV{JEMALLOC_HOME}" _native_path )
+    list( APPEND _jemalloc_roots ${_native_path} )
+elseif ( JEMALLOC_HOME )
+    list( APPEND _jemalloc_roots ${JEMALLOC_HOME} )
+endif()
+
+set(LIBJEMALLOC_NAMES jemalloc libjemalloc.so.1 libjemalloc.so.2 libjemalloc.dylib)
+
+# Try the parameterized roots, if they exist
+if ( _jemalloc_roots )
+    find_path( JEMALLOC_INCLUDE_DIR NAMES jemalloc/jemalloc.h
+        PATHS ${_jemalloc_roots} NO_DEFAULT_PATH
+        PATH_SUFFIXES "include" )
+    find_library( JEMALLOC_SHARED_LIB NAMES ${LIBJEMALLOC_NAMES}
+        PATHS ${_jemalloc_roots} NO_DEFAULT_PATH
+        PATH_SUFFIXES "lib" )
+else ()
+    find_path( JEMALLOC_INCLUDE_DIR NAMES jemalloc/jemalloc.h )
+    message(STATUS ${JEMALLOC_INCLUDE_DIR})
+    find_library( JEMALLOC_SHARED_LIB NAMES ${LIBJEMALLOC_NAMES})
+    message(STATUS ${JEMALLOC_SHARED_LIB})
+endif ()
+
+if (JEMALLOC_INCLUDE_DIR AND JEMALLOC_SHARED_LIB)
+  set(JEMALLOC_FOUND TRUE)
+else ()
+  set(JEMALLOC_FOUND FALSE)
+endif ()
+
+if (JEMALLOC_FOUND)
+    if (NOT jemalloc_FIND_QUIETLY)
+      message(STATUS "Found the jemalloc library: ${JEMALLOC_LIBRARIES}")
+  endif ()
+else ()
+  if (NOT jemalloc_FIND_QUIETLY)
+    set(JEMALLOC_ERR_MSG "Could not find the jemalloc library. Looked in ")
+    if ( _flatbuffers_roots )
+      set(JEMALLOC_ERR_MSG "${JEMALLOC_ERR_MSG} in ${_jemalloc_roots}.")
+    else ()
+      set(JEMALLOC_ERR_MSG "${JEMALLOC_ERR_MSG} system search paths.")
+    endif ()
+    if (jemalloc_FIND_REQUIRED)
+      message(FATAL_ERROR "${JEMALLOC_ERR_MSG}")
+    else (jemalloc_FIND_REQUIRED)
+      message(STATUS "${JEMALLOC_ERR_MSG}")
+    endif (jemalloc_FIND_REQUIRED)
+  endif ()
+endif ()
+
+mark_as_advanced(
+  JEMALLOC_INCLUDE_DIR
+  JEMALLOC_SHARED_LIB
+)

http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/src/arrow/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index f8c5051..16668db 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -59,4 +59,5 @@ ADD_ARROW_TEST(schema-test)
 ADD_ARROW_TEST(status-test)
 ADD_ARROW_TEST(table-test)
 
+ADD_ARROW_BENCHMARK(builder-benchmark)
 ADD_ARROW_BENCHMARK(column-benchmark)

http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/src/arrow/buffer.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/buffer.cc b/cpp/src/arrow/buffer.cc
index 6ffa03a..6d55f88 100644
--- a/cpp/src/arrow/buffer.cc
+++ b/cpp/src/arrow/buffer.cc
@@ -80,13 +80,11 @@ Status PoolBuffer::Reserve(int64_t new_capacity) {
     uint8_t* new_data;
     new_capacity = BitUtil::RoundUpToMultipleOf64(new_capacity);
     if (mutable_data_) {
-      RETURN_NOT_OK(pool_->Allocate(new_capacity, &new_data));
-      memcpy(new_data, mutable_data_, size_);
-      pool_->Free(mutable_data_, capacity_);
+      RETURN_NOT_OK(pool_->Reallocate(capacity_, new_capacity, &mutable_data_));
     } else {
       RETURN_NOT_OK(pool_->Allocate(new_capacity, &new_data));
+      mutable_data_ = new_data;
     }
-    mutable_data_ = new_data;
     data_ = mutable_data_;
     capacity_ = new_capacity;
   }

http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/src/arrow/builder-benchmark.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/builder-benchmark.cc b/cpp/src/arrow/builder-benchmark.cc
new file mode 100644
index 0000000..67799a3
--- /dev/null
+++ b/cpp/src/arrow/builder-benchmark.cc
@@ -0,0 +1,64 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "benchmark/benchmark.h"
+
+#include "arrow/builder.h"
+#include "arrow/memory_pool.h"
+#include "arrow/test-util.h"
+
+namespace arrow {
+
+constexpr int64_t kFinalSize = 256;
+
+static void BM_BuildPrimitiveArrayNoNulls(
+    benchmark::State& state) {  // NOLINT non-const reference
+  // 2 MiB block
+  std::vector<int64_t> data(256 * 1024, 100);
+  while (state.KeepRunning()) {
+    Int64Builder builder(default_memory_pool(), arrow::int64());
+    for (int i = 0; i < kFinalSize; i++) {
+      // Build up an array of 512 MiB in size
+      builder.Append(data.data(), data.size(), nullptr);
+    }
+    std::shared_ptr<Array> out;
+    builder.Finish(&out);
+  }
+  state.SetBytesProcessed(
+      state.iterations() * data.size() * sizeof(int64_t) * kFinalSize);
+}
+
+BENCHMARK(BM_BuildPrimitiveArrayNoNulls)->Repetitions(3)->Unit(benchmark::kMillisecond);
+
+static void BM_BuildVectorNoNulls(
+    benchmark::State& state) {  // NOLINT non-const reference
+  // 2 MiB block
+  std::vector<int64_t> data(256 * 1024, 100);
+  while (state.KeepRunning()) {
+    std::vector<int64_t> builder;
+    for (int i = 0; i < kFinalSize; i++) {
+      // Build up an array of 512 MiB in size
+      builder.insert(builder.end(), data.cbegin(), data.cend());
+    }
+  }
+  state.SetBytesProcessed(
+      state.iterations() * data.size() * sizeof(int64_t) * kFinalSize);
+}
+
+BENCHMARK(BM_BuildVectorNoNulls)->Repetitions(3)->Unit(benchmark::kMillisecond);
+
+}  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/src/arrow/builder.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc
index 1d94dba..a308ea5 100644
--- a/cpp/src/arrow/builder.cc
+++ b/cpp/src/arrow/builder.cc
@@ -156,6 +156,7 @@ Status PrimitiveBuilder<T>::Resize(int32_t capacity) {
     const int64_t new_bytes = TypeTraits<T>::bytes_required(capacity);
     RETURN_NOT_OK(data_->Resize(new_bytes));
     raw_data_ = reinterpret_cast<value_type*>(data_->mutable_data());
+    // TODO(emkornfield) valgrind complains without this
     memset(data_->mutable_data() + old_bytes, 0, new_bytes - old_bytes);
   }
   return Status::OK();

http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/src/arrow/column-benchmark.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/column-benchmark.cc b/cpp/src/arrow/column-benchmark.cc
index 650ec90..8a1c775 100644
--- a/cpp/src/arrow/column-benchmark.cc
+++ b/cpp/src/arrow/column-benchmark.cc
@@ -37,7 +37,7 @@ std::shared_ptr<Array> MakePrimitive(int32_t length, int32_t null_count = 0) {
 static void BM_BuildInt32ColumnByChunk(
     benchmark::State& state) {  // NOLINT non-const reference
   ArrayVector arrays;
-  for (int chunk_n = 0; chunk_n < state.range_x(); ++chunk_n) {
+  for (int chunk_n = 0; chunk_n < state.range(0); ++chunk_n) {
     arrays.push_back(MakePrimitive<Int32Array>(100, 10));
   }
   const auto INT32 = std::make_shared<Int32Type>();

http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/src/arrow/io/interfaces.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/io/interfaces.cc b/cpp/src/arrow/io/interfaces.cc
index 23bef28..8040f93 100644
--- a/cpp/src/arrow/io/interfaces.cc
+++ b/cpp/src/arrow/io/interfaces.cc
@@ -45,8 +45,8 @@ Status ReadableFileInterface::ReadAt(
 }
 
 Status Writeable::Write(const std::string& data) {
-  return Write(reinterpret_cast<const uint8_t*>(data.c_str()),
-      static_cast<int64_t>(data.size()));
+  return Write(
+      reinterpret_cast<const uint8_t*>(data.c_str()), static_cast<int64_t>(data.size()));
 }
 
 }  // namespace io

http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/src/arrow/io/io-file-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/io/io-file-test.cc b/cpp/src/arrow/io/io-file-test.cc
index 5f5d639..378b60e 100644
--- a/cpp/src/arrow/io/io-file-test.cc
+++ b/cpp/src/arrow/io/io-file-test.cc
@@ -292,6 +292,19 @@ class MyMemoryPool : public MemoryPool {
 
   void Free(uint8_t* buffer, int64_t size) override { std::free(buffer); }
 
+  Status Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) override {
+    *ptr = reinterpret_cast<uint8_t*>(std::realloc(*ptr, new_size));
+
+    if (*ptr == NULL) {
+      std::stringstream ss;
+      ss << "realloc of size " << new_size << " failed";
+      return Status::OutOfMemory(ss.str());
+    }
+
+
+    return Status::OK();
+  }
+
   int64_t bytes_allocated() const override { return -1; }
 
   int64_t num_allocations() const { return num_allocations_; }

http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/src/arrow/jemalloc/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/jemalloc/CMakeLists.txt b/cpp/src/arrow/jemalloc/CMakeLists.txt
new file mode 100644
index 0000000..c6663eb
--- /dev/null
+++ b/cpp/src/arrow/jemalloc/CMakeLists.txt
@@ -0,0 +1,80 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# ----------------------------------------------------------------------
+# arrow_jemalloc : Arrow jemalloc-based allocator
+
+include_directories(SYSTEM "{JEMALLOC_INCLUDE_DIR}")
+
+# arrow_jemalloc library
+set(ARROW_JEMALLOC_STATIC_LINK_LIBS
+  arrow_static
+  jemalloc
+)
+set(ARROW_JEMALLOC_SHARED_LINK_LIBS
+  arrow_shared
+  jemalloc
+)
+
+if (ARROW_BUILD_STATIC)
+  set(ARROW_JEMALLOC_TEST_LINK_LIBS
+    arrow_jemalloc_static)
+else()
+  set(ARROW_jemalloc_TEST_LINK_LIBS
+    arrow_jemalloc_shared)
+endif()
+
+set(ARROW_JEMALLOC_SRCS
+  memory_pool.cc
+)
+
+if(NOT APPLE)
+  # Localize thirdparty symbols using a linker version script. This hides them
+  # from the client application. The OS X linker does not support the
+  # version-script option.
+  set(ARROW_JEMALLOC_LINK_FLAGS "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/symbols.map")
+endif()
+
+ADD_ARROW_LIB(arrow_jemalloc
+  SOURCES ${ARROW_JEMALLOC_SRCS}
+  SHARED_LINK_FLAGS ${ARROW_JEMALLOC_LINK_FLAGS}
+  SHARED_LINK_LIBS ${ARROW_JEMALLOC_SHARED_LINK_LIBS}
+  SHARED_PRIVATE_LINK_LIBS ${ARROW_JEMALLOC_SHARED_PRIVATE_LINK_LIBS}
+  STATIC_LINK_LIBS ${ARROW_JEMALLOC_STATIC_LINK_LIBS}
+  STATIC_PRIVATE_LINK_LIBS ${ARROW_JEMALLOC_STATIC_PRIVATE_LINK_LIBS}
+)
+
+ADD_ARROW_TEST(jemalloc-memory_pool-test)
+ARROW_TEST_LINK_LIBRARIES(jemalloc-memory_pool-test
+  ${ARROW_JEMALLOC_TEST_LINK_LIBS})
+
+ADD_ARROW_BENCHMARK(jemalloc-builder-benchmark)
+ARROW_BENCHMARK_LINK_LIBRARIES(jemalloc-builder-benchmark
+  ${ARROW_JEMALLOC_TEST_LINK_LIBS})
+
+# Headers: top level
+install(FILES
+  memory_pool.h
+  DESTINATION include/arrow/jemalloc)
+
+# pkg-config support
+configure_file(arrow-jemalloc.pc.in
+  "${CMAKE_CURRENT_BINARY_DIR}/arrow-jemalloc.pc"
+  @ONLY)
+install(
+  FILES "${CMAKE_CURRENT_BINARY_DIR}/arrow-jemalloc.pc"
+  DESTINATION "lib/pkgconfig/")

http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/src/arrow/jemalloc/arrow-jemalloc.pc.in
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/jemalloc/arrow-jemalloc.pc.in b/cpp/src/arrow/jemalloc/arrow-jemalloc.pc.in
new file mode 100644
index 0000000..0b300fe
--- /dev/null
+++ b/cpp/src/arrow/jemalloc/arrow-jemalloc.pc.in
@@ -0,0 +1,27 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+prefix=@CMAKE_INSTALL_PREFIX@
+libdir=${prefix}/lib
+includedir=${prefix}/include
+
+Name: Apache Arrow jemalloc-based allocator
+Description: jemalloc allocator for Arrow.
+Version: @ARROW_VERSION@
+Libs: -L${libdir} -larrow_jemalloc
+Cflags: -I${includedir}
+Requires: arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/src/arrow/jemalloc/jemalloc-builder-benchmark.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/jemalloc/jemalloc-builder-benchmark.cc b/cpp/src/arrow/jemalloc/jemalloc-builder-benchmark.cc
new file mode 100644
index 0000000..58dbaa3
--- /dev/null
+++ b/cpp/src/arrow/jemalloc/jemalloc-builder-benchmark.cc
@@ -0,0 +1,47 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "benchmark/benchmark.h"
+
+#include "arrow/builder.h"
+#include "arrow/jemalloc/memory_pool.h"
+#include "arrow/test-util.h"
+
+namespace arrow {
+
+constexpr int64_t kFinalSize = 256;
+
+static void BM_BuildPrimitiveArrayNoNulls(
+    benchmark::State& state) {  // NOLINT non-const reference
+  // 2 MiB block
+  std::vector<int64_t> data(256 * 1024, 100);
+  while (state.KeepRunning()) {
+    Int64Builder builder(jemalloc::MemoryPool::default_pool(), arrow::int64());
+    for (int i = 0; i < kFinalSize; i++) {
+      // Build up an array of 512 MiB in size
+      builder.Append(data.data(), data.size(), nullptr);
+    }
+    std::shared_ptr<Array> out;
+    builder.Finish(&out);
+  }
+  state.SetBytesProcessed(
+      state.iterations() * data.size() * sizeof(int64_t) * kFinalSize);
+}
+
+BENCHMARK(BM_BuildPrimitiveArrayNoNulls)->Repetitions(3)->Unit(benchmark::kMillisecond);
+
+}  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/src/arrow/jemalloc/jemalloc-memory_pool-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/jemalloc/jemalloc-memory_pool-test.cc b/cpp/src/arrow/jemalloc/jemalloc-memory_pool-test.cc
new file mode 100644
index 0000000..a8448ab
--- /dev/null
+++ b/cpp/src/arrow/jemalloc/jemalloc-memory_pool-test.cc
@@ -0,0 +1,51 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <limits>
+
+#include "gtest/gtest.h"
+
+#include "arrow/jemalloc/memory_pool.h"
+#include "arrow/memory_pool-test.h"
+
+namespace arrow {
+namespace jemalloc {
+namespace test {
+
+class TestJemallocMemoryPool : public ::arrow::test::TestMemoryPoolBase {
+ public:
+  ::arrow::MemoryPool* memory_pool() override {
+    return ::arrow::jemalloc::MemoryPool::default_pool();
+  }
+};
+
+TEST_F(TestJemallocMemoryPool, MemoryTracking) {
+  this->TestMemoryTracking();
+}
+
+TEST_F(TestJemallocMemoryPool, OOM) {
+  this->TestOOM();
+}
+
+TEST_F(TestJemallocMemoryPool, Reallocate) {
+  this->TestReallocate();
+}
+
+}  // namespace test
+}  // namespace jemalloc
+}  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/src/arrow/jemalloc/memory_pool.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/jemalloc/memory_pool.cc b/cpp/src/arrow/jemalloc/memory_pool.cc
new file mode 100644
index 0000000..acc09c7
--- /dev/null
+++ b/cpp/src/arrow/jemalloc/memory_pool.cc
@@ -0,0 +1,74 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/jemalloc/memory_pool.h"
+
+#include <sstream>
+
+#include <jemalloc/jemalloc.h>
+
+#include "arrow/status.h"
+
+constexpr size_t kAlignment = 64;
+
+namespace arrow {
+namespace jemalloc {
+
+MemoryPool* MemoryPool::default_pool() {
+  static MemoryPool pool;
+  return &pool;
+}
+
+MemoryPool::MemoryPool() : allocated_size_(0) {}
+
+MemoryPool::~MemoryPool() {}
+
+Status MemoryPool::Allocate(int64_t size, uint8_t** out) {
+  *out = reinterpret_cast<uint8_t*>(mallocx(size, MALLOCX_ALIGN(kAlignment)));
+  if (*out == NULL) {
+    std::stringstream ss;
+    ss << "malloc of size " << size << " failed";
+    return Status::OutOfMemory(ss.str());
+  }
+  allocated_size_ += size;
+  return Status::OK();
+}
+
+Status MemoryPool::Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) {
+  *ptr = reinterpret_cast<uint8_t*>(rallocx(*ptr, new_size, MALLOCX_ALIGN(kAlignment)));
+  if (*ptr == NULL) {
+    std::stringstream ss;
+    ss << "realloc of size " << new_size << " failed";
+    return Status::OutOfMemory(ss.str());
+  }
+
+  allocated_size_ += new_size - old_size;
+
+  return Status::OK();
+}
+
+void MemoryPool::Free(uint8_t* buffer, int64_t size) {
+  allocated_size_ -= size;
+  free(buffer);
+}
+
+int64_t MemoryPool::bytes_allocated() const {
+  return allocated_size_.load();
+}
+
+}  // namespace jemalloc
+}  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/src/arrow/jemalloc/memory_pool.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/jemalloc/memory_pool.h b/cpp/src/arrow/jemalloc/memory_pool.h
new file mode 100644
index 0000000..0d32b46
--- /dev/null
+++ b/cpp/src/arrow/jemalloc/memory_pool.h
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Public API for the jemalloc-based allocator
+
+#ifndef ARROW_JEMALLOC_MEMORY_POOL_H
+#define ARROW_JEMALLOC_MEMORY_POOL_H
+
+#include "arrow/memory_pool.h"
+
+#include <atomic>
+
+namespace arrow {
+
+class Status;
+
+namespace jemalloc {
+
+class ARROW_EXPORT MemoryPool : public ::arrow::MemoryPool {
+ public:
+  static MemoryPool* default_pool();
+
+  MemoryPool(MemoryPool const&) = delete;
+  MemoryPool& operator=(MemoryPool const&) = delete;
+
+  virtual ~MemoryPool();
+
+  Status Allocate(int64_t size, uint8_t** out) override;
+  Status Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) override;
+  void Free(uint8_t* buffer, int64_t size) override;
+
+  int64_t bytes_allocated() const override;
+
+ private:
+  MemoryPool();
+
+  std::atomic<int64_t> allocated_size_;
+};
+
+}  // namespace jemalloc
+}  // namespace arrow
+
+#endif  // ARROW_JEMALLOC_MEMORY_POOL_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/src/arrow/jemalloc/symbols.map
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/jemalloc/symbols.map b/cpp/src/arrow/jemalloc/symbols.map
new file mode 100644
index 0000000..1e87cae
--- /dev/null
+++ b/cpp/src/arrow/jemalloc/symbols.map
@@ -0,0 +1,30 @@
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License. See accompanying LICENSE file.
+
+{
+  # Symbols marked as 'local' are not exported by the DSO and thus may not
+  # be used by client applications.
+  local:
+    # devtoolset / static-libstdc++ symbols
+    __cxa_*;
+
+    extern "C++" {
+      # boost
+      boost::*;
+
+      # devtoolset or -static-libstdc++ - the Red Hat devtoolset statically
+      # links c++11 symbols into binaries so that the result may be executed on
+      # a system with an older libstdc++ which doesn't include the necessary
+      # c++11 symbols.
+      std::*;
+    };
+};

http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/src/arrow/memory_pool-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/memory_pool-test.cc b/cpp/src/arrow/memory_pool-test.cc
index d6f323d..3daf727 100644
--- a/cpp/src/arrow/memory_pool-test.cc
+++ b/cpp/src/arrow/memory_pool-test.cc
@@ -15,35 +15,28 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include "arrow/memory_pool-test.h"
+
 #include <cstdint>
 #include <limits>
 
-#include "gtest/gtest.h"
-
-#include "arrow/memory_pool.h"
-#include "arrow/status.h"
-#include "arrow/test-util.h"
-
 namespace arrow {
 
-TEST(DefaultMemoryPool, MemoryTracking) {
-  MemoryPool* pool = default_memory_pool();
+class TestDefaultMemoryPool : public ::arrow::test::TestMemoryPoolBase {
+ public:
+  ::arrow::MemoryPool* memory_pool() override { return ::arrow::default_memory_pool(); }
+};
 
-  uint8_t* data;
-  ASSERT_OK(pool->Allocate(100, &data));
-  EXPECT_EQ(static_cast<uint64_t>(0), reinterpret_cast<uint64_t>(data) % 64);
-  ASSERT_EQ(100, pool->bytes_allocated());
-
-  pool->Free(data, 100);
-  ASSERT_EQ(0, pool->bytes_allocated());
+TEST_F(TestDefaultMemoryPool, MemoryTracking) {
+  this->TestMemoryTracking();
 }
 
-TEST(DefaultMemoryPool, OOM) {
-  MemoryPool* pool = default_memory_pool();
+TEST_F(TestDefaultMemoryPool, OOM) {
+  this->TestOOM();
+}
 
-  uint8_t* data;
-  int64_t to_alloc = std::numeric_limits<int64_t>::max();
-  ASSERT_RAISES(OutOfMemory, pool->Allocate(to_alloc, &data));
+TEST_F(TestDefaultMemoryPool, Reallocate) {
+  this->TestReallocate();
 }
 
 // Death tests and valgrind are known to not play well 100% of the time. See

http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/src/arrow/memory_pool-test.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/memory_pool-test.h b/cpp/src/arrow/memory_pool-test.h
new file mode 100644
index 0000000..b9f0337
--- /dev/null
+++ b/cpp/src/arrow/memory_pool-test.h
@@ -0,0 +1,79 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "gtest/gtest.h"
+
+#include <limits>
+
+#include "arrow/memory_pool.h"
+#include "arrow/test-util.h"
+
+namespace arrow {
+
+namespace test {
+
+class TestMemoryPoolBase : public ::testing::Test {
+ public:
+  virtual ::arrow::MemoryPool* memory_pool() = 0;
+
+  void TestMemoryTracking() {
+    auto pool = memory_pool();
+
+    uint8_t* data;
+    ASSERT_OK(pool->Allocate(100, &data));
+    EXPECT_EQ(static_cast<uint64_t>(0), reinterpret_cast<uint64_t>(data) % 64);
+    ASSERT_EQ(100, pool->bytes_allocated());
+
+    pool->Free(data, 100);
+    ASSERT_EQ(0, pool->bytes_allocated());
+  }
+
+  void TestOOM() {
+    auto pool = memory_pool();
+
+    uint8_t* data;
+    int64_t to_alloc = std::numeric_limits<int64_t>::max();
+    ASSERT_RAISES(OutOfMemory, pool->Allocate(to_alloc, &data));
+  }
+
+  void TestReallocate() {
+    auto pool = memory_pool();
+
+    uint8_t* data;
+    ASSERT_OK(pool->Allocate(10, &data));
+    ASSERT_EQ(10, pool->bytes_allocated());
+    data[0] = 35;
+    data[9] = 12;
+
+    // Expand
+    ASSERT_OK(pool->Reallocate(10, 20, &data));
+    ASSERT_EQ(data[9], 12);
+    ASSERT_EQ(20, pool->bytes_allocated());
+
+    // Shrink
+    ASSERT_OK(pool->Reallocate(20, 5, &data));
+    ASSERT_EQ(data[0], 35);
+    ASSERT_EQ(5, pool->bytes_allocated());
+
+    // Free
+    pool->Free(data, 5);
+    ASSERT_EQ(0, pool->bytes_allocated());
+  }
+};
+
+}  // namespace test
+}  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/src/arrow/memory_pool.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc
index f55b1ac..aea5e21 100644
--- a/cpp/src/arrow/memory_pool.cc
+++ b/cpp/src/arrow/memory_pool.cc
@@ -17,6 +17,7 @@
 
 #include "arrow/memory_pool.h"
 
+#include <algorithm>
 #include <cstdlib>
 #include <mutex>
 #include <sstream>
@@ -67,6 +68,7 @@ class InternalMemoryPool : public MemoryPool {
   virtual ~InternalMemoryPool();
 
   Status Allocate(int64_t size, uint8_t** out) override;
+  Status Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) override;
 
   void Free(uint8_t* buffer, int64_t size) override;
 
@@ -85,6 +87,28 @@ Status InternalMemoryPool::Allocate(int64_t size, uint8_t** out) {
   return Status::OK();
 }
 
+Status InternalMemoryPool::Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) {
+  std::lock_guard<std::mutex> guard(pool_lock_);
+
+  // Note: We cannot use realloc() here as it doesn't guarantee alignment.
+
+  // Allocate new chunk
+  uint8_t* out;
+  RETURN_NOT_OK(AllocateAligned(new_size, &out));
+  // Copy contents and release old memory chunk
+  memcpy(out, *ptr, std::min(new_size, old_size));
+#ifdef _MSC_VER
+  _aligned_free(*ptr);
+#else
+  std::free(*ptr);
+#endif
+  *ptr = out;
+
+  bytes_allocated_ += new_size - old_size;
+
+  return Status::OK();
+}
+
 int64_t InternalMemoryPool::bytes_allocated() const {
   std::lock_guard<std::mutex> guard(pool_lock_);
   return bytes_allocated_;

http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/src/arrow/memory_pool.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/memory_pool.h b/cpp/src/arrow/memory_pool.h
index 4c1d699..13a3f12 100644
--- a/cpp/src/arrow/memory_pool.h
+++ b/cpp/src/arrow/memory_pool.h
@@ -31,6 +31,7 @@ class ARROW_EXPORT MemoryPool {
   virtual ~MemoryPool();
 
   virtual Status Allocate(int64_t size, uint8_t** out) = 0;
+  virtual Status Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) = 0;
   virtual void Free(uint8_t* buffer, int64_t size) = 0;
 
   virtual int64_t bytes_allocated() const = 0;

http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/python/src/pyarrow/common.cc
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/common.cc b/python/src/pyarrow/common.cc
index 8660ac8..0bdd289 100644
--- a/python/src/pyarrow/common.cc
+++ b/python/src/pyarrow/common.cc
@@ -47,6 +47,20 @@ class PyArrowMemoryPool : public arrow::MemoryPool {
     return Status::OK();
   }
 
+  Status Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) override {
+    *ptr = reinterpret_cast<uint8_t*>(std::realloc(*ptr, new_size));
+
+    if (*ptr == NULL) {
+      std::stringstream ss;
+      ss << "realloc of size " << new_size << " failed";
+      return Status::OutOfMemory(ss.str());
+    }
+
+    bytes_allocated_ += new_size - old_size;
+
+    return Status::OK();
+  }
+
   int64_t bytes_allocated() const override {
     std::lock_guard<std::mutex> guard(pool_lock_);
     return bytes_allocated_;