You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by fs...@apache.org on 2020/04/23 00:37:27 UTC

[arrow] branch master updated: ARROW-8540: [C++] Add memory allocation benchmarks

This is an automated email from the ASF dual-hosted git repository.

fsaintjacques pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new c22b6c4  ARROW-8540: [C++] Add memory allocation benchmarks
c22b6c4 is described below

commit c22b6c4921aacfceafa853b584d84cd5c7e33ed4
Author: Antoine Pitrou <an...@python.org>
AuthorDate: Wed Apr 22 20:36:57 2020 -0400

    ARROW-8540: [C++] Add memory allocation benchmarks
    
    Example output:
    ```
    -------------------------------------------------------------------------------------------------------
    Benchmark                                                             Time             CPU   Iterations
    -------------------------------------------------------------------------------------------------------
    TouchArea/size:4096/real_time                                      20.1 ns         20.1 ns     34893671
    TouchArea/size:65536/real_time                                      483 ns          483 ns      1448647
    TouchArea/size:1048576/real_time                                   7670 ns         7669 ns        90816
    TouchArea/size:16777216/real_time                                124297 ns       124280 ns         5611
    
    AllocateDeallocate<SystemAlloc>/size:4096/real_time                18.6 ns         18.6 ns     37781939
    AllocateDeallocate<SystemAlloc>/size:65536/real_time                161 ns          161 ns      4360765
    AllocateDeallocate<SystemAlloc>/size:1048576/real_time              328 ns          328 ns      2131288
    AllocateDeallocate<SystemAlloc>/size:16777216/real_time             160 ns          160 ns      4366862
    AllocateTouchDeallocate<SystemAlloc>/size:4096/real_time           40.4 ns         40.4 ns     17333165
    AllocateTouchDeallocate<SystemAlloc>/size:65536/real_time           640 ns          640 ns      1092988
    AllocateTouchDeallocate<SystemAlloc>/size:1048576/real_time        7959 ns         7958 ns        87693
    AllocateTouchDeallocate<SystemAlloc>/size:16777216/real_time     124816 ns       124801 ns         5602
    
    AllocateDeallocate<Jemalloc>/size:4096/real_time                   22.2 ns         22.2 ns     31611774
    AllocateDeallocate<Jemalloc>/size:65536/real_time                   157 ns          157 ns      4460745
    AllocateDeallocate<Jemalloc>/size:1048576/real_time                 330 ns          330 ns      2113808
    AllocateDeallocate<Jemalloc>/size:16777216/real_time                158 ns          158 ns      4439623
    AllocateTouchDeallocate<Jemalloc>/size:4096/real_time              43.0 ns         43.0 ns     16252256
    AllocateTouchDeallocate<Jemalloc>/size:65536/real_time              638 ns          638 ns      1091897
    AllocateTouchDeallocate<Jemalloc>/size:1048576/real_time           7961 ns         7960 ns        87755
    AllocateTouchDeallocate<Jemalloc>/size:16777216/real_time        124699 ns       124682 ns         5588
    
    AllocateDeallocate<Mimalloc>/size:4096/real_time                    232 ns          232 ns      3015215
    AllocateDeallocate<Mimalloc>/size:65536/real_time                   153 ns          153 ns      4527945
    AllocateDeallocate<Mimalloc>/size:1048576/real_time                 146 ns          146 ns      4720662
    AllocateDeallocate<Mimalloc>/size:16777216/real_time                144 ns          144 ns      4859165
    AllocateTouchDeallocate<Mimalloc>/size:4096/real_time               254 ns          254 ns      2750031
    AllocateTouchDeallocate<Mimalloc>/size:65536/real_time              635 ns          635 ns      1100267
    AllocateTouchDeallocate<Mimalloc>/size:1048576/real_time           7753 ns         7752 ns        89887
    AllocateTouchDeallocate<Mimalloc>/size:16777216/real_time        124518 ns       124501 ns         5604
    ```
    
    Closes #6997 from pitrou/ARROW-8540-pool-benchmark
    
    Authored-by: Antoine Pitrou <an...@python.org>
    Signed-off-by: François Saint-Jacques <fs...@gmail.com>
---
 cpp/src/arrow/CMakeLists.txt           |   1 +
 cpp/src/arrow/memory_pool_benchmark.cc | 129 +++++++++++++++++++++++++++++++++
 2 files changed, 130 insertions(+)

diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 5d4725f..e888716 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -551,6 +551,7 @@ if(ARROW_COMPUTE)
 endif()
 
 add_arrow_benchmark(builder_benchmark)
+add_arrow_benchmark(memory_pool_benchmark)
 add_arrow_benchmark(type_benchmark)
 
 #
diff --git a/cpp/src/arrow/memory_pool_benchmark.cc b/cpp/src/arrow/memory_pool_benchmark.cc
new file mode 100644
index 0000000..ba39310
--- /dev/null
+++ b/cpp/src/arrow/memory_pool_benchmark.cc
@@ -0,0 +1,129 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/memory_pool.h"
+#include "arrow/result.h"
+#include "arrow/util/logging.h"
+
+#include "benchmark/benchmark.h"
+
+namespace arrow {
+
+struct SystemAlloc {
+  static Result<MemoryPool*> GetAllocator() { return system_memory_pool(); }
+};
+
+#ifdef ARROW_JEMALLOC
+struct Jemalloc {
+  static Result<MemoryPool*> GetAllocator() {
+    MemoryPool* pool;
+    RETURN_NOT_OK(jemalloc_memory_pool(&pool));
+    return pool;
+  }
+};
+#endif
+
+#ifdef ARROW_MIMALLOC
+struct Mimalloc {
+  static Result<MemoryPool*> GetAllocator() {
+    MemoryPool* pool;
+    RETURN_NOT_OK(mimalloc_memory_pool(&pool));
+    return pool;
+  }
+};
+#endif
+
+static void TouchCacheLines(uint8_t* data, int64_t nbytes) {
+  uint8_t total = 0;
+  while (nbytes > 0) {
+    total += *data;
+    data += 64;
+    nbytes -= 64;
+  }
+  benchmark::DoNotOptimize(total);
+}
+
+// Benchmark the cost of accessing always the same memory area.
+// This gives us a lower bound of the potential difference between
+// AllocateTouchDeallocate and AllocateDeallocate.
+static void TouchArea(benchmark::State& state) {  // NOLINT non-const reference
+  const int64_t nbytes = state.range(0);
+  MemoryPool* pool = default_memory_pool();
+  uint8_t* data;
+  ARROW_CHECK_OK(pool->Allocate(nbytes, &data));
+
+  for (auto _ : state) {
+    TouchCacheLines(data, nbytes);
+  }
+
+  pool->Free(data, nbytes);
+}
+
+// Benchmark the raw cost of allocating memory.
+// Note this is a best case situation: we always allocate and deallocate exactly
+// the same size, without any other allocator traffic.  However, it can be
+// representative of workloads where we routinely create and destroy
+// temporary buffers for intermediate computation results.
+template <typename Alloc>
+static void AllocateDeallocate(benchmark::State& state) {  // NOLINT non-const reference
+  const int64_t nbytes = state.range(0);
+  MemoryPool* pool = *Alloc::GetAllocator();
+
+  for (auto _ : state) {
+    uint8_t* data;
+    ARROW_CHECK_OK(pool->Allocate(nbytes, &data));
+    pool->Free(data, nbytes);
+  }
+}
+
+// Benchmark the cost of allocating memory plus accessing it.
+template <typename Alloc>
+static void AllocateTouchDeallocate(
+    benchmark::State& state) {  // NOLINT non-const reference
+  const int64_t nbytes = state.range(0);
+  MemoryPool* pool = *Alloc::GetAllocator();
+
+  for (auto _ : state) {
+    uint8_t* data;
+    ARROW_CHECK_OK(pool->Allocate(nbytes, &data));
+    TouchCacheLines(data, nbytes);
+    pool->Free(data, nbytes);
+  }
+}
+
+#define BENCHMARK_ALLOCATE_ARGS \
+  ->RangeMultiplier(16)->Range(4096, 16 * 1024 * 1024)->ArgName("size")->UseRealTime()
+
+#define BENCHMARK_ALLOCATE(benchmark_func, template_param) \
+  BENCHMARK_TEMPLATE(benchmark_func, template_param) BENCHMARK_ALLOCATE_ARGS
+
+BENCHMARK(TouchArea) BENCHMARK_ALLOCATE_ARGS;
+
+BENCHMARK_ALLOCATE(AllocateDeallocate, SystemAlloc);
+BENCHMARK_ALLOCATE(AllocateTouchDeallocate, SystemAlloc);
+
+#ifdef ARROW_JEMALLOC
+BENCHMARK_ALLOCATE(AllocateDeallocate, Jemalloc);
+BENCHMARK_ALLOCATE(AllocateTouchDeallocate, Jemalloc);
+#endif
+
+#ifdef ARROW_MIMALLOC
+BENCHMARK_ALLOCATE(AllocateDeallocate, Mimalloc);
+BENCHMARK_ALLOCATE(AllocateTouchDeallocate, Mimalloc);
+#endif
+
+}  // namespace arrow