You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2019/07/26 21:48:20 UTC
[arrow] branch master updated: ARROW-6045: [C++] Add benchmark for double and float encoding/decoding, as well as NaN encoding

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new ca4f679  ARROW-6045: [C++] Add benchmark for double and float encoding/decoding, as well as NaN encoding
ca4f679 is described below

commit ca4f6793475a465cd4e29568b87e977c4e8c619d
Author: Itamar Turner-Trauring <it...@itamarst.org>
AuthorDate: Fri Jul 26 16:48:06 2019 -0500

    ARROW-6045: [C++] Add benchmark for double and float encoding/decoding, as well as NaN encoding
    
    Was testing report that NaN encoding was slow. Couldn't reproduce it, but may as well have the benchmark.
    
    Closes #4915 from itamarst/parquet-nan-encoding and squashes the following commits:
    
    bca37aae1 <Itamar Turner-Trauring> Add benchmark for double and float encoding/decoding, as well as NaN encoding for double and float.
    
    Authored-by: Itamar Turner-Trauring <it...@itamarst.org>
    Signed-off-by: Wes McKinney <we...@apache.org>
---
 cpp/src/parquet/encoding-benchmark.cc | 83 +++++++++++++++++++++++++++++++++++
 1 file changed, 83 insertions(+)

diff --git a/cpp/src/parquet/encoding-benchmark.cc b/cpp/src/parquet/encoding-benchmark.cc
index 8960d6b..71659ea 100644
--- a/cpp/src/parquet/encoding-benchmark.cc
+++ b/cpp/src/parquet/encoding-benchmark.cc
@@ -29,6 +29,7 @@
 #include "parquet/platform.h"
 #include "parquet/schema.h"
 
+#include <cmath>
 #include <random>
 
 using arrow::default_memory_pool;
@@ -114,6 +115,88 @@ static void BM_PlainDecodingInt64(benchmark::State& state) {
 
 BENCHMARK(BM_PlainDecodingInt64)->Range(MIN_RANGE, MAX_RANGE);
 
+static void BM_PlainEncodingDouble(benchmark::State& state) {
+  std::vector<double> values(state.range(0), 64.0);
+  auto encoder = MakeTypedEncoder<DoubleType>(Encoding::PLAIN);
+  for (auto _ : state) {
+    encoder->Put(values.data(), static_cast<int>(values.size()));
+    encoder->FlushValues();
+  }
+  state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(double));
+}
+
+BENCHMARK(BM_PlainEncodingDouble)->Range(MIN_RANGE, MAX_RANGE);
+
+static void BM_PlainEncodingDoubleNaN(benchmark::State& state) {
+  std::vector<double> values(state.range(0), nan(""));
+  auto encoder = MakeTypedEncoder<DoubleType>(Encoding::PLAIN);
+  for (auto _ : state) {
+    encoder->Put(values.data(), static_cast<int>(values.size()));
+    encoder->FlushValues();
+  }
+  state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(double));
+}
+
+BENCHMARK(BM_PlainEncodingDoubleNaN)->Range(MIN_RANGE, MAX_RANGE);
+
+static void BM_PlainDecodingDouble(benchmark::State& state) {
+  std::vector<double> values(state.range(0), 64.0);
+  auto encoder = MakeTypedEncoder<DoubleType>(Encoding::PLAIN);
+  encoder->Put(values.data(), static_cast<int>(values.size()));
+  std::shared_ptr<Buffer> buf = encoder->FlushValues();
+
+  for (auto _ : state) {
+    auto decoder = MakeTypedDecoder<DoubleType>(Encoding::PLAIN);
+    decoder->SetData(static_cast<int>(values.size()), buf->data(),
+                     static_cast<int>(buf->size()));
+    decoder->Decode(values.data(), static_cast<int>(values.size()));
+  }
+  state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(double));
+}
+
+BENCHMARK(BM_PlainDecodingDouble)->Range(MIN_RANGE, MAX_RANGE);
+
+static void BM_PlainEncodingFloat(benchmark::State& state) {
+  std::vector<float> values(state.range(0), 64.0);
+  auto encoder = MakeTypedEncoder<FloatType>(Encoding::PLAIN);
+  for (auto _ : state) {
+    encoder->Put(values.data(), static_cast<int>(values.size()));
+    encoder->FlushValues();
+  }
+  state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(float));
+}
+
+BENCHMARK(BM_PlainEncodingFloat)->Range(MIN_RANGE, MAX_RANGE);
+
+static void BM_PlainEncodingFloatNaN(benchmark::State& state) {
+  std::vector<float> values(state.range(0), nanf(""));
+  auto encoder = MakeTypedEncoder<FloatType>(Encoding::PLAIN);
+  for (auto _ : state) {
+    encoder->Put(values.data(), static_cast<int>(values.size()));
+    encoder->FlushValues();
+  }
+  state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(float));
+}
+
+BENCHMARK(BM_PlainEncodingFloatNaN)->Range(MIN_RANGE, MAX_RANGE);
+
+static void BM_PlainDecodingFloat(benchmark::State& state) {
+  std::vector<float> values(state.range(0), 64.0);
+  auto encoder = MakeTypedEncoder<FloatType>(Encoding::PLAIN);
+  encoder->Put(values.data(), static_cast<int>(values.size()));
+  std::shared_ptr<Buffer> buf = encoder->FlushValues();
+
+  for (auto _ : state) {
+    auto decoder = MakeTypedDecoder<FloatType>(Encoding::PLAIN);
+    decoder->SetData(static_cast<int>(values.size()), buf->data(),
+                     static_cast<int>(buf->size()));
+    decoder->Decode(values.data(), static_cast<int>(values.size()));
+  }
+  state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(float));
+}
+
+BENCHMARK(BM_PlainDecodingFloat)->Range(MIN_RANGE, MAX_RANGE);
+
 template <typename Type>
 static void DecodeDict(std::vector<typename Type::c_type>& values,
                        benchmark::State& state) {