You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by fs...@apache.org on 2020/04/23 00:37:27 UTC
[arrow] branch master updated: ARROW-8540: [C++] Add memory
allocation benchmarks
This is an automated email from the ASF dual-hosted git repository.
fsaintjacques pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new c22b6c4 ARROW-8540: [C++] Add memory allocation benchmarks
c22b6c4 is described below
commit c22b6c4921aacfceafa853b584d84cd5c7e33ed4
Author: Antoine Pitrou <an...@python.org>
AuthorDate: Wed Apr 22 20:36:57 2020 -0400
ARROW-8540: [C++] Add memory allocation benchmarks
Example output:
```
-------------------------------------------------------------------------------------------------------
Benchmark Time CPU Iterations
-------------------------------------------------------------------------------------------------------
TouchArea/size:4096/real_time 20.1 ns 20.1 ns 34893671
TouchArea/size:65536/real_time 483 ns 483 ns 1448647
TouchArea/size:1048576/real_time 7670 ns 7669 ns 90816
TouchArea/size:16777216/real_time 124297 ns 124280 ns 5611
AllocateDeallocate<SystemAlloc>/size:4096/real_time 18.6 ns 18.6 ns 37781939
AllocateDeallocate<SystemAlloc>/size:65536/real_time 161 ns 161 ns 4360765
AllocateDeallocate<SystemAlloc>/size:1048576/real_time 328 ns 328 ns 2131288
AllocateDeallocate<SystemAlloc>/size:16777216/real_time 160 ns 160 ns 4366862
AllocateTouchDeallocate<SystemAlloc>/size:4096/real_time 40.4 ns 40.4 ns 17333165
AllocateTouchDeallocate<SystemAlloc>/size:65536/real_time 640 ns 640 ns 1092988
AllocateTouchDeallocate<SystemAlloc>/size:1048576/real_time 7959 ns 7958 ns 87693
AllocateTouchDeallocate<SystemAlloc>/size:16777216/real_time 124816 ns 124801 ns 5602
AllocateDeallocate<Jemalloc>/size:4096/real_time 22.2 ns 22.2 ns 31611774
AllocateDeallocate<Jemalloc>/size:65536/real_time 157 ns 157 ns 4460745
AllocateDeallocate<Jemalloc>/size:1048576/real_time 330 ns 330 ns 2113808
AllocateDeallocate<Jemalloc>/size:16777216/real_time 158 ns 158 ns 4439623
AllocateTouchDeallocate<Jemalloc>/size:4096/real_time 43.0 ns 43.0 ns 16252256
AllocateTouchDeallocate<Jemalloc>/size:65536/real_time 638 ns 638 ns 1091897
AllocateTouchDeallocate<Jemalloc>/size:1048576/real_time 7961 ns 7960 ns 87755
AllocateTouchDeallocate<Jemalloc>/size:16777216/real_time 124699 ns 124682 ns 5588
AllocateDeallocate<Mimalloc>/size:4096/real_time 232 ns 232 ns 3015215
AllocateDeallocate<Mimalloc>/size:65536/real_time 153 ns 153 ns 4527945
AllocateDeallocate<Mimalloc>/size:1048576/real_time 146 ns 146 ns 4720662
AllocateDeallocate<Mimalloc>/size:16777216/real_time 144 ns 144 ns 4859165
AllocateTouchDeallocate<Mimalloc>/size:4096/real_time 254 ns 254 ns 2750031
AllocateTouchDeallocate<Mimalloc>/size:65536/real_time 635 ns 635 ns 1100267
AllocateTouchDeallocate<Mimalloc>/size:1048576/real_time 7753 ns 7752 ns 89887
AllocateTouchDeallocate<Mimalloc>/size:16777216/real_time 124518 ns 124501 ns 5604
```
Closes #6997 from pitrou/ARROW-8540-pool-benchmark
Authored-by: Antoine Pitrou <an...@python.org>
Signed-off-by: François Saint-Jacques <fs...@gmail.com>
---
cpp/src/arrow/CMakeLists.txt | 1 +
cpp/src/arrow/memory_pool_benchmark.cc | 129 +++++++++++++++++++++++++++++++++
2 files changed, 130 insertions(+)
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 5d4725f..e888716 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -551,6 +551,7 @@ if(ARROW_COMPUTE)
endif()
add_arrow_benchmark(builder_benchmark)
+add_arrow_benchmark(memory_pool_benchmark)
add_arrow_benchmark(type_benchmark)
#
diff --git a/cpp/src/arrow/memory_pool_benchmark.cc b/cpp/src/arrow/memory_pool_benchmark.cc
new file mode 100644
index 0000000..ba39310
--- /dev/null
+++ b/cpp/src/arrow/memory_pool_benchmark.cc
@@ -0,0 +1,129 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/memory_pool.h"
+#include "arrow/result.h"
+#include "arrow/util/logging.h"
+
+#include "benchmark/benchmark.h"
+
+namespace arrow {
+
+struct SystemAlloc {
+ static Result<MemoryPool*> GetAllocator() { return system_memory_pool(); }
+};
+
+#ifdef ARROW_JEMALLOC
+struct Jemalloc {
+ static Result<MemoryPool*> GetAllocator() {
+ MemoryPool* pool;
+ RETURN_NOT_OK(jemalloc_memory_pool(&pool));
+ return pool;
+ }
+};
+#endif
+
+#ifdef ARROW_MIMALLOC
+struct Mimalloc {
+ static Result<MemoryPool*> GetAllocator() {
+ MemoryPool* pool;
+ RETURN_NOT_OK(mimalloc_memory_pool(&pool));
+ return pool;
+ }
+};
+#endif
+
+static void TouchCacheLines(uint8_t* data, int64_t nbytes) {
+ uint8_t total = 0;
+ while (nbytes > 0) {
+ total += *data;
+ data += 64;
+ nbytes -= 64;
+ }
+ benchmark::DoNotOptimize(total);
+}
+
+// Benchmark the cost of accessing always the same memory area.
+// This gives us a lower bound of the potential difference between
+// AllocateTouchDeallocate and AllocateDeallocate.
+static void TouchArea(benchmark::State& state) { // NOLINT non-const reference
+ const int64_t nbytes = state.range(0);
+ MemoryPool* pool = default_memory_pool();
+ uint8_t* data;
+ ARROW_CHECK_OK(pool->Allocate(nbytes, &data));
+
+ for (auto _ : state) {
+ TouchCacheLines(data, nbytes);
+ }
+
+ pool->Free(data, nbytes);
+}
+
+// Benchmark the raw cost of allocating memory.
+// Note this is a best case situation: we always allocate and deallocate exactly
+// the same size, without any other allocator traffic. However, it can be
+// representative of workloads where we routinely create and destroy
+// temporary buffers for intermediate computation results.
+template <typename Alloc>
+static void AllocateDeallocate(benchmark::State& state) { // NOLINT non-const reference
+ const int64_t nbytes = state.range(0);
+ MemoryPool* pool = *Alloc::GetAllocator();
+
+ for (auto _ : state) {
+ uint8_t* data;
+ ARROW_CHECK_OK(pool->Allocate(nbytes, &data));
+ pool->Free(data, nbytes);
+ }
+}
+
+// Benchmark the cost of allocating memory plus accessing it.
+template <typename Alloc>
+static void AllocateTouchDeallocate(
+ benchmark::State& state) { // NOLINT non-const reference
+ const int64_t nbytes = state.range(0);
+ MemoryPool* pool = *Alloc::GetAllocator();
+
+ for (auto _ : state) {
+ uint8_t* data;
+ ARROW_CHECK_OK(pool->Allocate(nbytes, &data));
+ TouchCacheLines(data, nbytes);
+ pool->Free(data, nbytes);
+ }
+}
+
+#define BENCHMARK_ALLOCATE_ARGS \
+ ->RangeMultiplier(16)->Range(4096, 16 * 1024 * 1024)->ArgName("size")->UseRealTime()
+
+#define BENCHMARK_ALLOCATE(benchmark_func, template_param) \
+ BENCHMARK_TEMPLATE(benchmark_func, template_param) BENCHMARK_ALLOCATE_ARGS
+
+BENCHMARK(TouchArea) BENCHMARK_ALLOCATE_ARGS;
+
+BENCHMARK_ALLOCATE(AllocateDeallocate, SystemAlloc);
+BENCHMARK_ALLOCATE(AllocateTouchDeallocate, SystemAlloc);
+
+#ifdef ARROW_JEMALLOC
+BENCHMARK_ALLOCATE(AllocateDeallocate, Jemalloc);
+BENCHMARK_ALLOCATE(AllocateTouchDeallocate, Jemalloc);
+#endif
+
+#ifdef ARROW_MIMALLOC
+BENCHMARK_ALLOCATE(AllocateDeallocate, Mimalloc);
+BENCHMARK_ALLOCATE(AllocateTouchDeallocate, Mimalloc);
+#endif
+
+} // namespace arrow