You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by uw...@apache.org on 2017/01/06 14:57:38 UTC
arrow git commit: ARROW-456: Add jemalloc based MemoryPool
Repository: arrow
Updated Branches:
refs/heads/master 320f5875e -> 5bf6ae49e
ARROW-456: Add jemalloc based MemoryPool
Runtimes of the `builder-benchmark`:
```
BM_BuildPrimitiveArrayNoNulls/repeats:3 901 ms 889 ms 1 576.196MB/s
BM_BuildPrimitiveArrayNoNulls/repeats:3 833 ms 829 ms 1 617.6MB/s
BM_BuildPrimitiveArrayNoNulls/repeats:3 825 ms 821 ms 1 623.855MB/s
BM_BuildPrimitiveArrayNoNulls/repeats:3_mean 853 ms 846 ms 1 605.884MB/s
BM_BuildPrimitiveArrayNoNulls/repeats:3_stddev 34 ms 30 ms 0 21.147MB/s
BM_BuildVectorNoNulls/repeats:3 712 ms 701 ms 1 729.866MB/s
BM_BuildVectorNoNulls/repeats:3 671 ms 670 ms 1 764.464MB/s
BM_BuildVectorNoNulls/repeats:3 688 ms 681 ms 1 751.285MB/s
BM_BuildVectorNoNulls/repeats:3_mean 690 ms 684 ms 1 748.538MB/s
BM_BuildVectorNoNulls/repeats:3_stddev 17 ms 13 ms 0 14.2578MB/s
```
With an aligned `Reallocate`, the jemalloc version is 50% faster and even outperforms `std::vector`:
```
BM_BuildPrimitiveArrayNoNulls/repeats:3 565 ms 559 ms 1 916.516MB/s
BM_BuildPrimitiveArrayNoNulls/repeats:3 540 ms 537 ms 1 952.727MB/s
BM_BuildPrimitiveArrayNoNulls/repeats:3 544 ms 543 ms 1 942.948MB/s
BM_BuildPrimitiveArrayNoNulls/repeats:3_mean 550 ms 546 ms 1 937.397MB/s
BM_BuildPrimitiveArrayNoNulls/repeats:3_stddev 11 ms 9 ms 0 15.2949MB/s
```
Author: Uwe L. Korn <uw...@xhochy.com>
Closes #270 from xhochy/ARROW-456 and squashes the following commits:
d3ce3bf [Uwe L. Korn] Zero arrays for now
831399d [Uwe L. Korn] cpplint #2
e6e251b [Uwe L. Korn] cpplint
52b3c76 [Uwe L. Korn] Add Reallocate implementation to PyArrowMemoryPool
113e650 [Uwe L. Korn] Add missing file
d331cd9 [Uwe L. Korn] Add tests for Reallocate
c2be086 [Uwe L. Korn] Add JEMALLOC_HOME to the Readme
bd47f51 [Uwe L. Korn] Add missing return value
5142ac3 [Uwe L. Korn] Don't use deprecated GBenchmark interfaces
b6bff98 [Uwe L. Korn] Add missing (win) include
6f08e19 [Uwe L. Korn] Don't build jemalloc on AppVeyor
834c3b2 [Uwe L. Korn] Add jemalloc to Travis builds
10c6839 [Uwe L. Korn] Implement Reallocate function
a17b313 [Uwe L. Korn] ARROW-456: C++: Add jemalloc based MemoryPool
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/5bf6ae49
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/5bf6ae49
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/5bf6ae49
Branch: refs/heads/master
Commit: 5bf6ae49ec561eaaef823f0eb16ccca2d2ba7cf3
Parents: 320f587
Author: Uwe L. Korn <uw...@xhochy.com>
Authored: Fri Jan 6 15:57:20 2017 +0100
Committer: Uwe L. Korn <uw...@xhochy.com>
Committed: Fri Jan 6 15:57:20 2017 +0100
----------------------------------------------------------------------
.travis.yml | 1 +
appveyor.yml | 2 +-
ci/travis_before_script_cpp.sh | 5 ++
cpp/CMakeLists.txt | 30 ++++++-
cpp/README.md | 1 +
cpp/cmake_modules/Findjemalloc.cmake | 86 ++++++++++++++++++++
cpp/src/arrow/CMakeLists.txt | 1 +
cpp/src/arrow/buffer.cc | 6 +-
cpp/src/arrow/builder-benchmark.cc | 64 +++++++++++++++
cpp/src/arrow/builder.cc | 1 +
cpp/src/arrow/column-benchmark.cc | 2 +-
cpp/src/arrow/io/interfaces.cc | 4 +-
cpp/src/arrow/io/io-file-test.cc | 13 +++
cpp/src/arrow/jemalloc/CMakeLists.txt | 80 ++++++++++++++++++
cpp/src/arrow/jemalloc/arrow-jemalloc.pc.in | 27 ++++++
.../jemalloc/jemalloc-builder-benchmark.cc | 47 +++++++++++
.../arrow/jemalloc/jemalloc-memory_pool-test.cc | 51 ++++++++++++
cpp/src/arrow/jemalloc/memory_pool.cc | 74 +++++++++++++++++
cpp/src/arrow/jemalloc/memory_pool.h | 57 +++++++++++++
cpp/src/arrow/jemalloc/symbols.map | 30 +++++++
cpp/src/arrow/memory_pool-test.cc | 33 +++-----
cpp/src/arrow/memory_pool-test.h | 79 ++++++++++++++++++
cpp/src/arrow/memory_pool.cc | 24 ++++++
cpp/src/arrow/memory_pool.h | 1 +
python/src/pyarrow/common.cc | 14 ++++
25 files changed, 704 insertions(+), 29 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index 1634eba..e8d9104 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -15,6 +15,7 @@ addons:
- libboost-dev
- libboost-filesystem-dev
- libboost-system-dev
+ - libjemalloc-dev
matrix:
fast_finish: true
http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/appveyor.yml
----------------------------------------------------------------------
diff --git a/appveyor.yml b/appveyor.yml
index 6747848..17362c9 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -32,7 +32,7 @@ build_script:
- cd build
# A lot of features are still deactivated as they do not build on Windows
# * gbenchmark doesn't build with MSVC
- - cmake -G "%GENERATOR%" -DARROW_BOOST_USE_SHARED=OFF -DARROW_IPC=OFF -DARROW_HDFS=OFF -DARROW_BUILD_BENCHMARKS=OFF ..
+ - cmake -G "%GENERATOR%" -DARROW_BOOST_USE_SHARED=OFF -DARROW_IPC=OFF -DARROW_HDFS=OFF -DARROW_BUILD_BENCHMARKS=OFF -DARROW_JEMALLOC=OFF ..
- cmake --build . --config Debug
# test_script:
http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/ci/travis_before_script_cpp.sh
----------------------------------------------------------------------
diff --git a/ci/travis_before_script_cpp.sh b/ci/travis_before_script_cpp.sh
index 73bdaeb..94a889c 100755
--- a/ci/travis_before_script_cpp.sh
+++ b/ci/travis_before_script_cpp.sh
@@ -17,6 +17,11 @@ set -ex
: ${CPP_BUILD_DIR=$TRAVIS_BUILD_DIR/cpp-build}
+if [ $TRAVIS_OS_NAME == "osx" ]; then
+ brew update > /dev/null
+ brew install jemalloc
+fi
+
mkdir $CPP_BUILD_DIR
pushd $CPP_BUILD_DIR
http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 13f0354..419691b 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -28,7 +28,7 @@ set(THIRDPARTY_DIR "${CMAKE_SOURCE_DIR}/thirdparty")
set(GFLAGS_VERSION "2.1.2")
set(GTEST_VERSION "1.7.0")
-set(GBENCHMARK_VERSION "1.0.0")
+set(GBENCHMARK_VERSION "1.1.0")
set(FLATBUFFERS_VERSION "1.3.0")
find_package(ClangTools)
@@ -74,6 +74,10 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
"Build the Arrow IPC extensions"
ON)
+ option(ARROW_JEMALLOC
+ "Build the Arrow jemalloc-based allocator"
+ ON)
+
option(ARROW_BOOST_USE_SHARED
"Rely on boost shared libraries where relevant"
ON)
@@ -238,6 +242,16 @@ function(ADD_ARROW_BENCHMARK_DEPENDENCIES REL_BENCHMARK_NAME)
add_dependencies(${BENCHMARK_NAME} ${ARGN})
endfunction()
+# A wrapper for target_link_libraries() that is compatible with NO_BENCHMARKS.
+function(ARROW_BENCHMARK_LINK_LIBRARIES REL_BENCHMARK_NAME)
+ if(NO_TESTS)
+ return()
+ endif()
+ get_filename_component(BENCHMARK_NAME ${REL_BENCHMARK_NAME} NAME_WE)
+
+ target_link_libraries(${BENCHMARK_NAME} ${ARGN})
+endfunction()
+
############################################################
# Testing
@@ -526,7 +540,11 @@ if(ARROW_BUILD_BENCHMARKS)
set(GBENCHMARK_CMAKE_ARGS
"-DCMAKE_BUILD_TYPE=Release"
"-DCMAKE_INSTALL_PREFIX:PATH=${GBENCHMARK_PREFIX}"
+ "-DBENCHMARK_ENABLE_TESTING=OFF"
"-DCMAKE_CXX_FLAGS=-fPIC ${GBENCHMARK_CMAKE_CXX_FLAGS}")
+ if (APPLE)
+ set(GBENCHMARK_CMAKE_ARGS ${GBENCHMARK_CMAKE_ARGS} "-DBENCHMARK_USE_LIBCXX=ON")
+ endif()
if (CMAKE_VERSION VERSION_GREATER "3.2")
# BUILD_BYPRODUCTS is a 3.2+ feature
ExternalProject_Add(gbenchmark_ep
@@ -575,6 +593,12 @@ endif()
message(STATUS "RapidJSON include dir: ${RAPIDJSON_INCLUDE_DIR}")
include_directories(SYSTEM ${RAPIDJSON_INCLUDE_DIR})
+if (ARROW_JEMALLOC)
+ find_package(jemalloc REQUIRED)
+ ADD_THIRDPARTY_LIB(jemalloc
+ SHARED_LIB ${JEMALLOC_SHARED_LIB})
+endif()
+
## Google PerfTools
##
## Disabled with TSAN/ASAN as well as with gold+dynamic linking (see comment
@@ -737,6 +761,10 @@ add_subdirectory(src/arrow)
add_subdirectory(src/arrow/io)
add_subdirectory(src/arrow/util)
+if(ARROW_JEMALLOC)
+ add_subdirectory(src/arrow/jemalloc)
+endif()
+
#----------------------------------------------------------------------
# IPC library
http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/README.md
----------------------------------------------------------------------
diff --git a/cpp/README.md b/cpp/README.md
index 190e6f8..b77ea99 100644
--- a/cpp/README.md
+++ b/cpp/README.md
@@ -60,6 +60,7 @@ variables
* Google Benchmark: `GBENCHMARK_HOME` (only required if building benchmarks)
* Flatbuffers: `FLATBUFFERS_HOME` (only required for the IPC extensions)
* Hadoop: `HADOOP_HOME` (only required for the HDFS I/O extensions)
+* jemalloc: `JEMALLOC_HOME` (only required for the jemalloc-based memory pool)
## Continuous Integration
http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/cmake_modules/Findjemalloc.cmake
----------------------------------------------------------------------
diff --git a/cpp/cmake_modules/Findjemalloc.cmake b/cpp/cmake_modules/Findjemalloc.cmake
new file mode 100644
index 0000000..e7fbb94
--- /dev/null
+++ b/cpp/cmake_modules/Findjemalloc.cmake
@@ -0,0 +1,86 @@
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Tries to find jemalloc headers and libraries.
+#
+# Usage of this module as follows:
+#
+# find_package(jemalloc)
+#
+# Variables used by this module, they can change the default behaviour and need
+# to be set before calling find_package:
+#
+# JEMALLOC_HOME -
+# When set, this path is inspected instead of standard library locations as
+# the root of the jemalloc installation. The environment variable
+# JEMALLOC_HOME overrides this veriable.
+#
+# This module defines
+# JEMALLOC_INCLUDE_DIR, directory containing headers
+# JEMALLOC_SHARED_LIB, path to libjemalloc.so/dylib
+# JEMALLOC_FOUND, whether flatbuffers has been found
+
+if( NOT "$ENV{JEMALLOC_HOME}" STREQUAL "")
+ file( TO_CMAKE_PATH "$ENV{JEMALLOC_HOME}" _native_path )
+ list( APPEND _jemalloc_roots ${_native_path} )
+elseif ( JEMALLOC_HOME )
+ list( APPEND _jemalloc_roots ${JEMALLOC_HOME} )
+endif()
+
+set(LIBJEMALLOC_NAMES jemalloc libjemalloc.so.1 libjemalloc.so.2 libjemalloc.dylib)
+
+# Try the parameterized roots, if they exist
+if ( _jemalloc_roots )
+ find_path( JEMALLOC_INCLUDE_DIR NAMES jemalloc/jemalloc.h
+ PATHS ${_jemalloc_roots} NO_DEFAULT_PATH
+ PATH_SUFFIXES "include" )
+ find_library( JEMALLOC_SHARED_LIB NAMES ${LIBJEMALLOC_NAMES}
+ PATHS ${_jemalloc_roots} NO_DEFAULT_PATH
+ PATH_SUFFIXES "lib" )
+else ()
+ find_path( JEMALLOC_INCLUDE_DIR NAMES jemalloc/jemalloc.h )
+ message(STATUS ${JEMALLOC_INCLUDE_DIR})
+ find_library( JEMALLOC_SHARED_LIB NAMES ${LIBJEMALLOC_NAMES})
+ message(STATUS ${JEMALLOC_SHARED_LIB})
+endif ()
+
+if (JEMALLOC_INCLUDE_DIR AND JEMALLOC_SHARED_LIB)
+ set(JEMALLOC_FOUND TRUE)
+else ()
+ set(JEMALLOC_FOUND FALSE)
+endif ()
+
+if (JEMALLOC_FOUND)
+ if (NOT jemalloc_FIND_QUIETLY)
+ message(STATUS "Found the jemalloc library: ${JEMALLOC_LIBRARIES}")
+ endif ()
+else ()
+ if (NOT jemalloc_FIND_QUIETLY)
+ set(JEMALLOC_ERR_MSG "Could not find the jemalloc library. Looked in ")
+ if ( _flatbuffers_roots )
+ set(JEMALLOC_ERR_MSG "${JEMALLOC_ERR_MSG} in ${_jemalloc_roots}.")
+ else ()
+ set(JEMALLOC_ERR_MSG "${JEMALLOC_ERR_MSG} system search paths.")
+ endif ()
+ if (jemalloc_FIND_REQUIRED)
+ message(FATAL_ERROR "${JEMALLOC_ERR_MSG}")
+ else (jemalloc_FIND_REQUIRED)
+ message(STATUS "${JEMALLOC_ERR_MSG}")
+ endif (jemalloc_FIND_REQUIRED)
+ endif ()
+endif ()
+
+mark_as_advanced(
+ JEMALLOC_INCLUDE_DIR
+ JEMALLOC_SHARED_LIB
+)
http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/src/arrow/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index f8c5051..16668db 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -59,4 +59,5 @@ ADD_ARROW_TEST(schema-test)
ADD_ARROW_TEST(status-test)
ADD_ARROW_TEST(table-test)
+ADD_ARROW_BENCHMARK(builder-benchmark)
ADD_ARROW_BENCHMARK(column-benchmark)
http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/src/arrow/buffer.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/buffer.cc b/cpp/src/arrow/buffer.cc
index 6ffa03a..6d55f88 100644
--- a/cpp/src/arrow/buffer.cc
+++ b/cpp/src/arrow/buffer.cc
@@ -80,13 +80,11 @@ Status PoolBuffer::Reserve(int64_t new_capacity) {
uint8_t* new_data;
new_capacity = BitUtil::RoundUpToMultipleOf64(new_capacity);
if (mutable_data_) {
- RETURN_NOT_OK(pool_->Allocate(new_capacity, &new_data));
- memcpy(new_data, mutable_data_, size_);
- pool_->Free(mutable_data_, capacity_);
+ RETURN_NOT_OK(pool_->Reallocate(capacity_, new_capacity, &mutable_data_));
} else {
RETURN_NOT_OK(pool_->Allocate(new_capacity, &new_data));
+ mutable_data_ = new_data;
}
- mutable_data_ = new_data;
data_ = mutable_data_;
capacity_ = new_capacity;
}
http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/src/arrow/builder-benchmark.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/builder-benchmark.cc b/cpp/src/arrow/builder-benchmark.cc
new file mode 100644
index 0000000..67799a3
--- /dev/null
+++ b/cpp/src/arrow/builder-benchmark.cc
@@ -0,0 +1,64 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "benchmark/benchmark.h"
+
+#include "arrow/builder.h"
+#include "arrow/memory_pool.h"
+#include "arrow/test-util.h"
+
+namespace arrow {
+
+constexpr int64_t kFinalSize = 256;
+
+static void BM_BuildPrimitiveArrayNoNulls(
+ benchmark::State& state) { // NOLINT non-const reference
+ // 2 MiB block
+ std::vector<int64_t> data(256 * 1024, 100);
+ while (state.KeepRunning()) {
+ Int64Builder builder(default_memory_pool(), arrow::int64());
+ for (int i = 0; i < kFinalSize; i++) {
+ // Build up an array of 512 MiB in size
+ builder.Append(data.data(), data.size(), nullptr);
+ }
+ std::shared_ptr<Array> out;
+ builder.Finish(&out);
+ }
+ state.SetBytesProcessed(
+ state.iterations() * data.size() * sizeof(int64_t) * kFinalSize);
+}
+
+BENCHMARK(BM_BuildPrimitiveArrayNoNulls)->Repetitions(3)->Unit(benchmark::kMillisecond);
+
+static void BM_BuildVectorNoNulls(
+ benchmark::State& state) { // NOLINT non-const reference
+ // 2 MiB block
+ std::vector<int64_t> data(256 * 1024, 100);
+ while (state.KeepRunning()) {
+ std::vector<int64_t> builder;
+ for (int i = 0; i < kFinalSize; i++) {
+ // Build up an array of 512 MiB in size
+ builder.insert(builder.end(), data.cbegin(), data.cend());
+ }
+ }
+ state.SetBytesProcessed(
+ state.iterations() * data.size() * sizeof(int64_t) * kFinalSize);
+}
+
+BENCHMARK(BM_BuildVectorNoNulls)->Repetitions(3)->Unit(benchmark::kMillisecond);
+
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/src/arrow/builder.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc
index 1d94dba..a308ea5 100644
--- a/cpp/src/arrow/builder.cc
+++ b/cpp/src/arrow/builder.cc
@@ -156,6 +156,7 @@ Status PrimitiveBuilder<T>::Resize(int32_t capacity) {
const int64_t new_bytes = TypeTraits<T>::bytes_required(capacity);
RETURN_NOT_OK(data_->Resize(new_bytes));
raw_data_ = reinterpret_cast<value_type*>(data_->mutable_data());
+ // TODO(emkornfield) valgrind complains without this
memset(data_->mutable_data() + old_bytes, 0, new_bytes - old_bytes);
}
return Status::OK();
http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/src/arrow/column-benchmark.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/column-benchmark.cc b/cpp/src/arrow/column-benchmark.cc
index 650ec90..8a1c775 100644
--- a/cpp/src/arrow/column-benchmark.cc
+++ b/cpp/src/arrow/column-benchmark.cc
@@ -37,7 +37,7 @@ std::shared_ptr<Array> MakePrimitive(int32_t length, int32_t null_count = 0) {
static void BM_BuildInt32ColumnByChunk(
benchmark::State& state) { // NOLINT non-const reference
ArrayVector arrays;
- for (int chunk_n = 0; chunk_n < state.range_x(); ++chunk_n) {
+ for (int chunk_n = 0; chunk_n < state.range(0); ++chunk_n) {
arrays.push_back(MakePrimitive<Int32Array>(100, 10));
}
const auto INT32 = std::make_shared<Int32Type>();
http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/src/arrow/io/interfaces.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/io/interfaces.cc b/cpp/src/arrow/io/interfaces.cc
index 23bef28..8040f93 100644
--- a/cpp/src/arrow/io/interfaces.cc
+++ b/cpp/src/arrow/io/interfaces.cc
@@ -45,8 +45,8 @@ Status ReadableFileInterface::ReadAt(
}
Status Writeable::Write(const std::string& data) {
- return Write(reinterpret_cast<const uint8_t*>(data.c_str()),
- static_cast<int64_t>(data.size()));
+ return Write(
+ reinterpret_cast<const uint8_t*>(data.c_str()), static_cast<int64_t>(data.size()));
}
} // namespace io
http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/src/arrow/io/io-file-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/io/io-file-test.cc b/cpp/src/arrow/io/io-file-test.cc
index 5f5d639..378b60e 100644
--- a/cpp/src/arrow/io/io-file-test.cc
+++ b/cpp/src/arrow/io/io-file-test.cc
@@ -292,6 +292,19 @@ class MyMemoryPool : public MemoryPool {
void Free(uint8_t* buffer, int64_t size) override { std::free(buffer); }
+ Status Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) override {
+ *ptr = reinterpret_cast<uint8_t*>(std::realloc(*ptr, new_size));
+
+ if (*ptr == NULL) {
+ std::stringstream ss;
+ ss << "realloc of size " << new_size << " failed";
+ return Status::OutOfMemory(ss.str());
+ }
+
+
+ return Status::OK();
+ }
+
int64_t bytes_allocated() const override { return -1; }
int64_t num_allocations() const { return num_allocations_; }
http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/src/arrow/jemalloc/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/jemalloc/CMakeLists.txt b/cpp/src/arrow/jemalloc/CMakeLists.txt
new file mode 100644
index 0000000..c6663eb
--- /dev/null
+++ b/cpp/src/arrow/jemalloc/CMakeLists.txt
@@ -0,0 +1,80 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# ----------------------------------------------------------------------
+# arrow_jemalloc : Arrow jemalloc-based allocator
+
+include_directories(SYSTEM "{JEMALLOC_INCLUDE_DIR}")
+
+# arrow_jemalloc library
+set(ARROW_JEMALLOC_STATIC_LINK_LIBS
+ arrow_static
+ jemalloc
+)
+set(ARROW_JEMALLOC_SHARED_LINK_LIBS
+ arrow_shared
+ jemalloc
+)
+
+if (ARROW_BUILD_STATIC)
+ set(ARROW_JEMALLOC_TEST_LINK_LIBS
+ arrow_jemalloc_static)
+else()
+ set(ARROW_jemalloc_TEST_LINK_LIBS
+ arrow_jemalloc_shared)
+endif()
+
+set(ARROW_JEMALLOC_SRCS
+ memory_pool.cc
+)
+
+if(NOT APPLE)
+ # Localize thirdparty symbols using a linker version script. This hides them
+ # from the client application. The OS X linker does not support the
+ # version-script option.
+ set(ARROW_JEMALLOC_LINK_FLAGS "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/symbols.map")
+endif()
+
+ADD_ARROW_LIB(arrow_jemalloc
+ SOURCES ${ARROW_JEMALLOC_SRCS}
+ SHARED_LINK_FLAGS ${ARROW_JEMALLOC_LINK_FLAGS}
+ SHARED_LINK_LIBS ${ARROW_JEMALLOC_SHARED_LINK_LIBS}
+ SHARED_PRIVATE_LINK_LIBS ${ARROW_JEMALLOC_SHARED_PRIVATE_LINK_LIBS}
+ STATIC_LINK_LIBS ${ARROW_JEMALLOC_STATIC_LINK_LIBS}
+ STATIC_PRIVATE_LINK_LIBS ${ARROW_JEMALLOC_STATIC_PRIVATE_LINK_LIBS}
+)
+
+ADD_ARROW_TEST(jemalloc-memory_pool-test)
+ARROW_TEST_LINK_LIBRARIES(jemalloc-memory_pool-test
+ ${ARROW_JEMALLOC_TEST_LINK_LIBS})
+
+ADD_ARROW_BENCHMARK(jemalloc-builder-benchmark)
+ARROW_BENCHMARK_LINK_LIBRARIES(jemalloc-builder-benchmark
+ ${ARROW_JEMALLOC_TEST_LINK_LIBS})
+
+# Headers: top level
+install(FILES
+ memory_pool.h
+ DESTINATION include/arrow/jemalloc)
+
+# pkg-config support
+configure_file(arrow-jemalloc.pc.in
+ "${CMAKE_CURRENT_BINARY_DIR}/arrow-jemalloc.pc"
+ @ONLY)
+install(
+ FILES "${CMAKE_CURRENT_BINARY_DIR}/arrow-jemalloc.pc"
+ DESTINATION "lib/pkgconfig/")
http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/src/arrow/jemalloc/arrow-jemalloc.pc.in
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/jemalloc/arrow-jemalloc.pc.in b/cpp/src/arrow/jemalloc/arrow-jemalloc.pc.in
new file mode 100644
index 0000000..0b300fe
--- /dev/null
+++ b/cpp/src/arrow/jemalloc/arrow-jemalloc.pc.in
@@ -0,0 +1,27 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+prefix=@CMAKE_INSTALL_PREFIX@
+libdir=${prefix}/lib
+includedir=${prefix}/include
+
+Name: Apache Arrow jemalloc-based allocator
+Description: jemalloc allocator for Arrow.
+Version: @ARROW_VERSION@
+Libs: -L${libdir} -larrow_jemalloc
+Cflags: -I${includedir}
+Requires: arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/src/arrow/jemalloc/jemalloc-builder-benchmark.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/jemalloc/jemalloc-builder-benchmark.cc b/cpp/src/arrow/jemalloc/jemalloc-builder-benchmark.cc
new file mode 100644
index 0000000..58dbaa3
--- /dev/null
+++ b/cpp/src/arrow/jemalloc/jemalloc-builder-benchmark.cc
@@ -0,0 +1,47 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "benchmark/benchmark.h"
+
+#include "arrow/builder.h"
+#include "arrow/jemalloc/memory_pool.h"
+#include "arrow/test-util.h"
+
+namespace arrow {
+
+constexpr int64_t kFinalSize = 256;
+
+static void BM_BuildPrimitiveArrayNoNulls(
+ benchmark::State& state) { // NOLINT non-const reference
+ // 2 MiB block
+ std::vector<int64_t> data(256 * 1024, 100);
+ while (state.KeepRunning()) {
+ Int64Builder builder(jemalloc::MemoryPool::default_pool(), arrow::int64());
+ for (int i = 0; i < kFinalSize; i++) {
+ // Build up an array of 512 MiB in size
+ builder.Append(data.data(), data.size(), nullptr);
+ }
+ std::shared_ptr<Array> out;
+ builder.Finish(&out);
+ }
+ state.SetBytesProcessed(
+ state.iterations() * data.size() * sizeof(int64_t) * kFinalSize);
+}
+
+BENCHMARK(BM_BuildPrimitiveArrayNoNulls)->Repetitions(3)->Unit(benchmark::kMillisecond);
+
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/src/arrow/jemalloc/jemalloc-memory_pool-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/jemalloc/jemalloc-memory_pool-test.cc b/cpp/src/arrow/jemalloc/jemalloc-memory_pool-test.cc
new file mode 100644
index 0000000..a8448ab
--- /dev/null
+++ b/cpp/src/arrow/jemalloc/jemalloc-memory_pool-test.cc
@@ -0,0 +1,51 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <limits>
+
+#include "gtest/gtest.h"
+
+#include "arrow/jemalloc/memory_pool.h"
+#include "arrow/memory_pool-test.h"
+
+namespace arrow {
+namespace jemalloc {
+namespace test {
+
+class TestJemallocMemoryPool : public ::arrow::test::TestMemoryPoolBase {
+ public:
+ ::arrow::MemoryPool* memory_pool() override {
+ return ::arrow::jemalloc::MemoryPool::default_pool();
+ }
+};
+
+TEST_F(TestJemallocMemoryPool, MemoryTracking) {
+ this->TestMemoryTracking();
+}
+
+TEST_F(TestJemallocMemoryPool, OOM) {
+ this->TestOOM();
+}
+
+TEST_F(TestJemallocMemoryPool, Reallocate) {
+ this->TestReallocate();
+}
+
+} // namespace test
+} // namespace jemalloc
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/src/arrow/jemalloc/memory_pool.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/jemalloc/memory_pool.cc b/cpp/src/arrow/jemalloc/memory_pool.cc
new file mode 100644
index 0000000..acc09c7
--- /dev/null
+++ b/cpp/src/arrow/jemalloc/memory_pool.cc
@@ -0,0 +1,74 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/jemalloc/memory_pool.h"
+
+#include <sstream>
+
+#include <jemalloc/jemalloc.h>
+
+#include "arrow/status.h"
+
+constexpr size_t kAlignment = 64;
+
+namespace arrow {
+namespace jemalloc {
+
+MemoryPool* MemoryPool::default_pool() {
+ static MemoryPool pool;
+ return &pool;
+}
+
+MemoryPool::MemoryPool() : allocated_size_(0) {}
+
+MemoryPool::~MemoryPool() {}
+
+Status MemoryPool::Allocate(int64_t size, uint8_t** out) {
+ *out = reinterpret_cast<uint8_t*>(mallocx(size, MALLOCX_ALIGN(kAlignment)));
+ if (*out == NULL) {
+ std::stringstream ss;
+ ss << "malloc of size " << size << " failed";
+ return Status::OutOfMemory(ss.str());
+ }
+ allocated_size_ += size;
+ return Status::OK();
+}
+
+Status MemoryPool::Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) {
+ *ptr = reinterpret_cast<uint8_t*>(rallocx(*ptr, new_size, MALLOCX_ALIGN(kAlignment)));
+ if (*ptr == NULL) {
+ std::stringstream ss;
+ ss << "realloc of size " << new_size << " failed";
+ return Status::OutOfMemory(ss.str());
+ }
+
+ allocated_size_ += new_size - old_size;
+
+ return Status::OK();
+}
+
+void MemoryPool::Free(uint8_t* buffer, int64_t size) {
+ allocated_size_ -= size;
+ free(buffer);
+}
+
+int64_t MemoryPool::bytes_allocated() const {
+ return allocated_size_.load();
+}
+
+} // namespace jemalloc
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/src/arrow/jemalloc/memory_pool.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/jemalloc/memory_pool.h b/cpp/src/arrow/jemalloc/memory_pool.h
new file mode 100644
index 0000000..0d32b46
--- /dev/null
+++ b/cpp/src/arrow/jemalloc/memory_pool.h
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Public API for the jemalloc-based allocator
+
+#ifndef ARROW_JEMALLOC_MEMORY_POOL_H
+#define ARROW_JEMALLOC_MEMORY_POOL_H
+
+#include "arrow/memory_pool.h"
+
+#include <atomic>
+
+namespace arrow {
+
+class Status;
+
+namespace jemalloc {
+
+class ARROW_EXPORT MemoryPool : public ::arrow::MemoryPool {
+ public:
+ static MemoryPool* default_pool();
+
+ MemoryPool(MemoryPool const&) = delete;
+ MemoryPool& operator=(MemoryPool const&) = delete;
+
+ virtual ~MemoryPool();
+
+ Status Allocate(int64_t size, uint8_t** out) override;
+ Status Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) override;
+ void Free(uint8_t* buffer, int64_t size) override;
+
+ int64_t bytes_allocated() const override;
+
+ private:
+ MemoryPool();
+
+ std::atomic<int64_t> allocated_size_;
+};
+
+} // namespace jemalloc
+} // namespace arrow
+
+#endif // ARROW_JEMALLOC_MEMORY_POOL_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/src/arrow/jemalloc/symbols.map
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/jemalloc/symbols.map b/cpp/src/arrow/jemalloc/symbols.map
new file mode 100644
index 0000000..1e87cae
--- /dev/null
+++ b/cpp/src/arrow/jemalloc/symbols.map
@@ -0,0 +1,30 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License. See accompanying LICENSE file.
+
+{
+ # Symbols marked as 'local' are not exported by the DSO and thus may not
+ # be used by client applications.
+ local:
+ # devtoolset / static-libstdc++ symbols
+ __cxa_*;
+
+ extern "C++" {
+ # boost
+ boost::*;
+
+ # devtoolset or -static-libstdc++ - the Red Hat devtoolset statically
+ # links c++11 symbols into binaries so that the result may be executed on
+ # a system with an older libstdc++ which doesn't include the necessary
+ # c++11 symbols.
+ std::*;
+ };
+};
http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/src/arrow/memory_pool-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/memory_pool-test.cc b/cpp/src/arrow/memory_pool-test.cc
index d6f323d..3daf727 100644
--- a/cpp/src/arrow/memory_pool-test.cc
+++ b/cpp/src/arrow/memory_pool-test.cc
@@ -15,35 +15,28 @@
// specific language governing permissions and limitations
// under the License.
+#include "arrow/memory_pool-test.h"
+
#include <cstdint>
#include <limits>
-#include "gtest/gtest.h"
-
-#include "arrow/memory_pool.h"
-#include "arrow/status.h"
-#include "arrow/test-util.h"
-
namespace arrow {
-TEST(DefaultMemoryPool, MemoryTracking) {
- MemoryPool* pool = default_memory_pool();
+class TestDefaultMemoryPool : public ::arrow::test::TestMemoryPoolBase {
+ public:
+ ::arrow::MemoryPool* memory_pool() override { return ::arrow::default_memory_pool(); }
+};
- uint8_t* data;
- ASSERT_OK(pool->Allocate(100, &data));
- EXPECT_EQ(static_cast<uint64_t>(0), reinterpret_cast<uint64_t>(data) % 64);
- ASSERT_EQ(100, pool->bytes_allocated());
-
- pool->Free(data, 100);
- ASSERT_EQ(0, pool->bytes_allocated());
+TEST_F(TestDefaultMemoryPool, MemoryTracking) {
+ this->TestMemoryTracking();
}
-TEST(DefaultMemoryPool, OOM) {
- MemoryPool* pool = default_memory_pool();
+TEST_F(TestDefaultMemoryPool, OOM) {
+ this->TestOOM();
+}
- uint8_t* data;
- int64_t to_alloc = std::numeric_limits<int64_t>::max();
- ASSERT_RAISES(OutOfMemory, pool->Allocate(to_alloc, &data));
+TEST_F(TestDefaultMemoryPool, Reallocate) {
+ this->TestReallocate();
}
// Death tests and valgrind are known to not play well 100% of the time. See
http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/src/arrow/memory_pool-test.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/memory_pool-test.h b/cpp/src/arrow/memory_pool-test.h
new file mode 100644
index 0000000..b9f0337
--- /dev/null
+++ b/cpp/src/arrow/memory_pool-test.h
@@ -0,0 +1,79 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "gtest/gtest.h"
+
+#include <limits>
+
+#include "arrow/memory_pool.h"
+#include "arrow/test-util.h"
+
+namespace arrow {
+
+namespace test {
+
+class TestMemoryPoolBase : public ::testing::Test {
+ public:
+ virtual ::arrow::MemoryPool* memory_pool() = 0;
+
+ void TestMemoryTracking() {
+ auto pool = memory_pool();
+
+ uint8_t* data;
+ ASSERT_OK(pool->Allocate(100, &data));
+ EXPECT_EQ(static_cast<uint64_t>(0), reinterpret_cast<uint64_t>(data) % 64);
+ ASSERT_EQ(100, pool->bytes_allocated());
+
+ pool->Free(data, 100);
+ ASSERT_EQ(0, pool->bytes_allocated());
+ }
+
+ void TestOOM() {
+ auto pool = memory_pool();
+
+ uint8_t* data;
+ int64_t to_alloc = std::numeric_limits<int64_t>::max();
+ ASSERT_RAISES(OutOfMemory, pool->Allocate(to_alloc, &data));
+ }
+
+ void TestReallocate() {
+ auto pool = memory_pool();
+
+ uint8_t* data;
+ ASSERT_OK(pool->Allocate(10, &data));
+ ASSERT_EQ(10, pool->bytes_allocated());
+ data[0] = 35;
+ data[9] = 12;
+
+ // Expand
+ ASSERT_OK(pool->Reallocate(10, 20, &data));
+ ASSERT_EQ(data[9], 12);
+ ASSERT_EQ(20, pool->bytes_allocated());
+
+ // Shrink
+ ASSERT_OK(pool->Reallocate(20, 5, &data));
+ ASSERT_EQ(data[0], 35);
+ ASSERT_EQ(5, pool->bytes_allocated());
+
+ // Free
+ pool->Free(data, 5);
+ ASSERT_EQ(0, pool->bytes_allocated());
+ }
+};
+
+} // namespace test
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/src/arrow/memory_pool.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc
index f55b1ac..aea5e21 100644
--- a/cpp/src/arrow/memory_pool.cc
+++ b/cpp/src/arrow/memory_pool.cc
@@ -17,6 +17,7 @@
#include "arrow/memory_pool.h"
+#include <algorithm>
#include <cstdlib>
#include <mutex>
#include <sstream>
@@ -67,6 +68,7 @@ class InternalMemoryPool : public MemoryPool {
virtual ~InternalMemoryPool();
Status Allocate(int64_t size, uint8_t** out) override;
+ Status Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) override;
void Free(uint8_t* buffer, int64_t size) override;
@@ -85,6 +87,28 @@ Status InternalMemoryPool::Allocate(int64_t size, uint8_t** out) {
return Status::OK();
}
+Status InternalMemoryPool::Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) {
+ std::lock_guard<std::mutex> guard(pool_lock_);
+
+ // Note: We cannot use realloc() here as it doesn't guarantee alignment.
+
+ // Allocate new chunk
+ uint8_t* out;
+ RETURN_NOT_OK(AllocateAligned(new_size, &out));
+ // Copy contents and release old memory chunk
+ memcpy(out, *ptr, std::min(new_size, old_size));
+#ifdef _MSC_VER
+ _aligned_free(*ptr);
+#else
+ std::free(*ptr);
+#endif
+ *ptr = out;
+
+ bytes_allocated_ += new_size - old_size;
+
+ return Status::OK();
+}
+
int64_t InternalMemoryPool::bytes_allocated() const {
std::lock_guard<std::mutex> guard(pool_lock_);
return bytes_allocated_;
http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/cpp/src/arrow/memory_pool.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/memory_pool.h b/cpp/src/arrow/memory_pool.h
index 4c1d699..13a3f12 100644
--- a/cpp/src/arrow/memory_pool.h
+++ b/cpp/src/arrow/memory_pool.h
@@ -31,6 +31,7 @@ class ARROW_EXPORT MemoryPool {
virtual ~MemoryPool();
virtual Status Allocate(int64_t size, uint8_t** out) = 0;
+ virtual Status Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) = 0;
virtual void Free(uint8_t* buffer, int64_t size) = 0;
virtual int64_t bytes_allocated() const = 0;
http://git-wip-us.apache.org/repos/asf/arrow/blob/5bf6ae49/python/src/pyarrow/common.cc
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/common.cc b/python/src/pyarrow/common.cc
index 8660ac8..0bdd289 100644
--- a/python/src/pyarrow/common.cc
+++ b/python/src/pyarrow/common.cc
@@ -47,6 +47,20 @@ class PyArrowMemoryPool : public arrow::MemoryPool {
return Status::OK();
}
+ Status Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) override {
+ *ptr = reinterpret_cast<uint8_t*>(std::realloc(*ptr, new_size));
+
+ if (*ptr == NULL) {
+ std::stringstream ss;
+ ss << "realloc of size " << new_size << " failed";
+ return Status::OutOfMemory(ss.str());
+ }
+
+ bytes_allocated_ += new_size - old_size;
+
+ return Status::OK();
+ }
+
int64_t bytes_allocated() const override {
std::lock_guard<std::mutex> guard(pool_lock_);
return bytes_allocated_;