You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2020/05/19 07:52:16 UTC

[GitHub] [arrow] pitrou commented on a change in pull request #7213: ARROW-8841: [C++] Add benchmark and unittest for encoding::PLAIN spaced

pitrou commented on a change in pull request #7213:
URL: https://github.com/apache/arrow/pull/7213#discussion_r427099420



##########
File path: cpp/src/parquet/encoding_benchmark.cc
##########
@@ -199,6 +200,130 @@ static void BM_PlainDecodingFloat(benchmark::State& state) {
 
 BENCHMARK(BM_PlainDecodingFloat)->Range(MIN_RANGE, MAX_RANGE);
 
+template <typename ParquetType>
+struct BM_SpacedEncodingTraits;
+
+template <>
+struct BM_SpacedEncodingTraits<BooleanType> {
+  // Leverage UInt8 vector array data for Boolean, the input src of PutSpaced is bool*
+  using ArrowType = ::arrow::UInt8Type;
+  using ArrayType = ::arrow::UInt8Array;
+  using CType = bool;
+};
+
+template <>
+struct BM_SpacedEncodingTraits<FloatType> {
+  using ArrowType = typename EncodingTraits<FloatType>::ArrowType;
+  using ArrayType = typename arrow::TypeTraits<ArrowType>::ArrayType;
+  using CType = typename FloatType::c_type;
+};
+
+template <>
+struct BM_SpacedEncodingTraits<DoubleType> {
+  using ArrowType = typename EncodingTraits<DoubleType>::ArrowType;
+  using ArrayType = typename arrow::TypeTraits<ArrowType>::ArrayType;
+  using CType = typename DoubleType::c_type;
+};
+
+static void BM_PlainSpacedArgs(benchmark::internal::Benchmark* bench) {
+  static const auto BM_kPlainSpacedSize =
+      arrow::internal::CpuInfo::GetInstance()->CacheSize(
+          arrow::internal::CpuInfo::L1_CACHE);
+
+  bench->Args({/*size*/ BM_kPlainSpacedSize, /*null_percentage=*/1});
+  bench->Args({/*size*/ BM_kPlainSpacedSize, /*null_percentage=*/10});
+  bench->Args({/*size*/ BM_kPlainSpacedSize, /*null_percentage=*/50});
+  bench->Args({/*size*/ BM_kPlainSpacedSize, /*null_percentage=*/90});
+  bench->Args({/*size*/ BM_kPlainSpacedSize, /*null_percentage=*/99});
+}
+
+template <typename ParquetType>
+static void BM_PlainEncodingSpaced(benchmark::State& state) {
+  using ArrowType = typename BM_SpacedEncodingTraits<ParquetType>::ArrowType;
+  using ArrayType = typename BM_SpacedEncodingTraits<ParquetType>::ArrayType;
+  using CType = typename BM_SpacedEncodingTraits<ParquetType>::CType;
+
+  const auto num_values = state.range(0);
+  const double null_percent = static_cast<double>(state.range(1)) / 100.0;
+
+  auto rand = ::arrow::random::RandomArrayGenerator(1923);
+  const auto array = rand.Numeric<ArrowType>(num_values, -100, 100, null_percent);
+  const auto valid_bits = array->null_bitmap_data();
+  const auto array_actual = arrow::internal::checked_pointer_cast<ArrayType>(array);
+
+  auto encoder = MakeTypedEncoder<ParquetType>(Encoding::PLAIN);
+  for (auto _ : state) {
+    // Cast only happens for special BooleanType as it use UInt8 for the array data to
+    // match a bool* input to PutSpaced.
+    encoder->PutSpaced(reinterpret_cast<const CType*>(array_actual->raw_values()),
+                       num_values, valid_bits, 0);
+    encoder->FlushValues();
+  }
+  state.SetBytesProcessed(state.iterations() * num_values * sizeof(CType));
+}
+
+static void BM_PlainEncodingSpacedBoolean(benchmark::State& state) {
+  BM_PlainEncodingSpaced<BooleanType>(state);
+}
+BENCHMARK(BM_PlainEncodingSpacedBoolean)->Apply(BM_PlainSpacedArgs);
+
+static void BM_PlainEncodingSpacedFloat(benchmark::State& state) {
+  BM_PlainEncodingSpaced<FloatType>(state);
+}
+BENCHMARK(BM_PlainEncodingSpacedFloat)->Apply(BM_PlainSpacedArgs);
+
+static void BM_PlainEncodingSpacedDouble(benchmark::State& state) {
+  BM_PlainEncodingSpaced<DoubleType>(state);
+}
+BENCHMARK(BM_PlainEncodingSpacedDouble)->Apply(BM_PlainSpacedArgs);
+
+template <typename ParquetType>
+static void BM_PlainDecodingSpaced(benchmark::State& state) {
+  using ArrowType = typename BM_SpacedEncodingTraits<ParquetType>::ArrowType;
+  using ArrayType = typename BM_SpacedEncodingTraits<ParquetType>::ArrayType;
+  using CType = typename BM_SpacedEncodingTraits<ParquetType>::CType;
+
+  const auto num_values = state.range(0);
+  const double null_percent = static_cast<double>(state.range(1)) / 100.0;
+
+  auto rand = ::arrow::random::RandomArrayGenerator(1923);
+  const auto array = rand.Numeric<ArrowType>(num_values, -100, 100, null_percent);
+  const auto valid_bits = array->null_bitmap_data();
+  const auto null_count = array->null_count();
+  const auto array_actual = arrow::internal::checked_pointer_cast<ArrayType>(array);
+
+  auto encoder = MakeTypedEncoder<ParquetType>(Encoding::PLAIN);
+  // Cast only happens for special BooleanType as it use UInt8 for the array data to match
+  // a bool* input to PutSpaced.

Review comment:
       Is `bool` guaranteed to be the same as `uint8_t`???




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org