You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2023/01/03 18:04:14 UTC
[arrow] branch master updated: GH-14951: [C++][Parquet] Add benchmarks for DELTA_BINARY_PACKED encoding (#15140)
This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 8ed4513cec GH-14951: [C++][Parquet] Add benchmarks for DELTA_BINARY_PACKED encoding (#15140)
8ed4513cec is described below
commit 8ed4513cecc066148ac782cc5bb94738e51318ad
Author: mwish <an...@qq.com>
AuthorDate: Wed Jan 4 02:04:08 2023 +0800
GH-14951: [C++][Parquet] Add benchmarks for DELTA_BINARY_PACKED encoding (#15140)
This patch support benchmark for DELTA_BINARY_PACKED. Different from PLAIN, it should considering the cases that data can or cannot be well compressed
* Closes: #14951
Lead-authored-by: mwish <an...@qq.com>
Co-authored-by: mwish <ma...@gmail.com>
Co-authored-by: Antoine Pitrou <an...@python.org>
Signed-off-by: Antoine Pitrou <an...@python.org>
---
cpp/src/parquet/encoding_benchmark.cc | 121 +++++++++++++++++++++++++++++++++-
1 file changed, 119 insertions(+), 2 deletions(-)
diff --git a/cpp/src/parquet/encoding_benchmark.cc b/cpp/src/parquet/encoding_benchmark.cc
index b02c7eaa46..e6a3c2c58c 100644
--- a/cpp/src/parquet/encoding_benchmark.cc
+++ b/cpp/src/parquet/encoding_benchmark.cc
@@ -31,6 +31,7 @@
#include "parquet/schema.h"
#include <cmath>
+#include <limits>
#include <random>
using arrow::default_memory_pool;
@@ -452,6 +453,122 @@ BENCHMARK(BM_ByteStreamSplitEncode_Float_Avx512)->Range(MIN_RANGE, MAX_RANGE);
BENCHMARK(BM_ByteStreamSplitEncode_Double_Avx512)->Range(MIN_RANGE, MAX_RANGE);
#endif
+template <typename DType>
+static auto MakeDeltaBitPackingInputFixed(size_t length) {
+ using T = typename DType::c_type;
+ return std::vector<T>(length, 42);
+}
+
+template <typename DType>
+static auto MakeDeltaBitPackingInputNarrow(size_t length) {
+ using T = typename DType::c_type;
+ auto numbers = std::vector<T>(length);
+ ::arrow::randint<T, T>(length, 0, 1000, &numbers);
+ return numbers;
+}
+
+template <typename DType>
+static auto MakeDeltaBitPackingInputWide(size_t length) {
+ using T = typename DType::c_type;
+ auto numbers = std::vector<T>(length);
+ ::arrow::randint<T, T>(length, std::numeric_limits<T>::min() >> 2,
+ std::numeric_limits<T>::max() >> 2, &numbers);
+ return numbers;
+}
+
+template <typename DType, typename NumberGenerator>
+static void BM_DeltaBitPackingEncode(benchmark::State& state, NumberGenerator gen) {
+ using T = typename DType::c_type;
+ std::vector<T> values = gen(state.range(0));
+ auto encoder = MakeTypedEncoder<DType>(Encoding::DELTA_BINARY_PACKED);
+ for (auto _ : state) {
+ encoder->Put(values.data(), static_cast<int>(values.size()));
+ encoder->FlushValues();
+ }
+ state.SetBytesProcessed(state.iterations() * values.size() * sizeof(T));
+ state.SetItemsProcessed(state.iterations() * values.size());
+}
+
+static void BM_DeltaBitPackingEncode_Int32_Fixed(benchmark::State& state) {
+ BM_DeltaBitPackingEncode<Int32Type>(state, MakeDeltaBitPackingInputFixed<Int32Type>);
+}
+
+static void BM_DeltaBitPackingEncode_Int64_Fixed(benchmark::State& state) {
+ BM_DeltaBitPackingEncode<Int64Type>(state, MakeDeltaBitPackingInputFixed<Int64Type>);
+}
+
+static void BM_DeltaBitPackingEncode_Int32_Narrow(benchmark::State& state) {
+ BM_DeltaBitPackingEncode<Int32Type>(state, MakeDeltaBitPackingInputNarrow<Int32Type>);
+}
+
+static void BM_DeltaBitPackingEncode_Int64_Narrow(benchmark::State& state) {
+ BM_DeltaBitPackingEncode<Int64Type>(state, MakeDeltaBitPackingInputNarrow<Int64Type>);
+}
+
+static void BM_DeltaBitPackingEncode_Int32_Wide(benchmark::State& state) {
+ BM_DeltaBitPackingEncode<Int32Type>(state, MakeDeltaBitPackingInputWide<Int32Type>);
+}
+
+static void BM_DeltaBitPackingEncode_Int64_Wide(benchmark::State& state) {
+ BM_DeltaBitPackingEncode<Int64Type>(state, MakeDeltaBitPackingInputWide<Int64Type>);
+}
+
+BENCHMARK(BM_DeltaBitPackingEncode_Int32_Fixed)->Range(MIN_RANGE, MAX_RANGE);
+BENCHMARK(BM_DeltaBitPackingEncode_Int64_Fixed)->Range(MIN_RANGE, MAX_RANGE);
+BENCHMARK(BM_DeltaBitPackingEncode_Int32_Narrow)->Range(MIN_RANGE, MAX_RANGE);
+BENCHMARK(BM_DeltaBitPackingEncode_Int64_Narrow)->Range(MIN_RANGE, MAX_RANGE);
+BENCHMARK(BM_DeltaBitPackingEncode_Int32_Wide)->Range(MIN_RANGE, MAX_RANGE);
+BENCHMARK(BM_DeltaBitPackingEncode_Int64_Wide)->Range(MIN_RANGE, MAX_RANGE);
+
+template <typename DType, typename NumberGenerator>
+static void BM_DeltaBitPackingDecode(benchmark::State& state, NumberGenerator gen) {
+ using T = typename DType::c_type;
+ std::vector<T> values = gen(state.range(0));
+ auto encoder = MakeTypedEncoder<DType>(Encoding::DELTA_BINARY_PACKED);
+ encoder->Put(values.data(), static_cast<int>(values.size()));
+ std::shared_ptr<Buffer> buf = encoder->FlushValues();
+
+ for (auto _ : state) {
+ auto decoder = MakeTypedDecoder<DType>(Encoding::DELTA_BINARY_PACKED);
+ decoder->SetData(static_cast<int>(values.size()), buf->data(),
+ static_cast<int>(buf->size()));
+ decoder->Decode(values.data(), static_cast<int>(values.size()));
+ }
+ state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(T));
+ state.SetItemsProcessed(state.iterations() * state.range(0));
+}
+
+static void BM_DeltaBitPackingDecode_Int32_Fixed(benchmark::State& state) {
+ BM_DeltaBitPackingDecode<Int32Type>(state, MakeDeltaBitPackingInputFixed<Int32Type>);
+}
+
+static void BM_DeltaBitPackingDecode_Int64_Fixed(benchmark::State& state) {
+ BM_DeltaBitPackingDecode<Int64Type>(state, MakeDeltaBitPackingInputFixed<Int64Type>);
+}
+
+static void BM_DeltaBitPackingDecode_Int32_Narrow(benchmark::State& state) {
+ BM_DeltaBitPackingDecode<Int32Type>(state, MakeDeltaBitPackingInputNarrow<Int32Type>);
+}
+
+static void BM_DeltaBitPackingDecode_Int64_Narrow(benchmark::State& state) {
+ BM_DeltaBitPackingDecode<Int64Type>(state, MakeDeltaBitPackingInputNarrow<Int64Type>);
+}
+
+static void BM_DeltaBitPackingDecode_Int32_Wide(benchmark::State& state) {
+ BM_DeltaBitPackingDecode<Int32Type>(state, MakeDeltaBitPackingInputWide<Int32Type>);
+}
+
+static void BM_DeltaBitPackingDecode_Int64_Wide(benchmark::State& state) {
+ BM_DeltaBitPackingDecode<Int64Type>(state, MakeDeltaBitPackingInputWide<Int64Type>);
+}
+
+BENCHMARK(BM_DeltaBitPackingDecode_Int32_Fixed)->Range(MIN_RANGE, MAX_RANGE);
+BENCHMARK(BM_DeltaBitPackingDecode_Int64_Fixed)->Range(MIN_RANGE, MAX_RANGE);
+BENCHMARK(BM_DeltaBitPackingDecode_Int32_Narrow)->Range(MIN_RANGE, MAX_RANGE);
+BENCHMARK(BM_DeltaBitPackingDecode_Int64_Narrow)->Range(MIN_RANGE, MAX_RANGE);
+BENCHMARK(BM_DeltaBitPackingDecode_Int32_Wide)->Range(MIN_RANGE, MAX_RANGE);
+BENCHMARK(BM_DeltaBitPackingDecode_Int64_Wide)->Range(MIN_RANGE, MAX_RANGE);
+
template <typename Type>
static void DecodeDict(std::vector<typename Type::c_type>& values,
benchmark::State& state) {
@@ -538,8 +655,8 @@ class BenchmarkDecodeArrow : public ::benchmark::Fixture {
}
void InitDataInputs() {
- // Generate a random string dictionary without any nulls so that this dataset can be
- // used for benchmarking the DecodeArrowNonNull API
+ // Generate a random string dictionary without any nulls so that this dataset can
+ // be used for benchmarking the DecodeArrowNonNull API
constexpr int repeat_factor = 8;
constexpr int64_t min_length = 2;
constexpr int64_t max_length = 10;