You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by gr...@apache.org on 2019/06/14 17:32:33 UTC

[kudu] 01/03: KUDU-2846 (part 2): optimize IS [NOT] NULL predicates

This is an automated email from the ASF dual-hosted git repository.

granthenke pushed a commit to branch branch-1.10.x
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit bd66430d80f7f400567b178af73c7688cea50cb8
Author: Todd Lipcon <to...@apache.org>
AuthorDate: Thu Jun 13 11:19:36 2019 -0700

    KUDU-2846 (part 2): optimize IS [NOT] NULL predicates
    
    This does a simple optimization to use bitwise ops on the selection and
    null vectors to implement NOT NULL and NULL, rather than the current
    branchy code. The speedup is about 75x.
    
    Before:
      int32  NULL       (c IS NULL) 615.9M evals/sec       4.54 cycles/eval
      int32  NOT NULL   (c IS NOT NULL) 636.3M evals/sec   4.39 cycles/eval
      int32  NULL       (c IS NOT NULL) 645.3M evals/sec   4.33 cycles/eval
    
    After:
      int32  NULL       (c IS NULL) 46811.4M evals/sec     0.03 cycles/eval
      int32  NOT NULL   (c IS NOT NULL) 45706.1M evals/sec 0.03 cycles/eval
      int32  NULL       (c IS NOT NULL) 48855.0M evals/sec 0.03 cycles/eval
    
    Change-Id: I14fb75c7c330e2415fd7a877186f7ed41a1accce
    Reviewed-on: http://gerrit.cloudera.org:8080/13639
    Tested-by: Kudu Jenkins
    Reviewed-by: Andrew Wong <aw...@cloudera.com>
    (cherry picked from commit fe7c1575946f8ace3eaaea8adf1d03caf4da3c34)
    Reviewed-on: http://gerrit.cloudera.org:8080/13650
    Tested-by: Grant Henke <gr...@apache.org>
    Reviewed-by: Will Berkeley <wd...@gmail.com>
---
 src/kudu/common/column_predicate-test.cc | 39 ++++++++++++++++++++++++--------
 src/kudu/common/column_predicate.cc      | 27 +++++++++++-----------
 2 files changed, 42 insertions(+), 24 deletions(-)

diff --git a/src/kudu/common/column_predicate-test.cc b/src/kudu/common/column_predicate-test.cc
index 946915a..335bf78 100644
--- a/src/kudu/common/column_predicate-test.cc
+++ b/src/kudu/common/column_predicate-test.cc
@@ -1510,9 +1510,6 @@ class ColumnPredicateBenchmark : public KuduTest {
    static constexpr auto kColType = TypeParam::physical_type;
    static constexpr int kNumRows = 1024;
 
-   ColumnPredicateBenchmark() {
-   }
-
    void DoTest(const std::function<ColumnPredicate(const ColumnSchema&)>& pred_factory) {
      const int num_iters = AllowSlowTests() ? 1000000 : 100;
      const int num_evals = num_iters * kNumRows;
@@ -1520,6 +1517,12 @@ class ColumnPredicateBenchmark : public KuduTest {
      for (bool nullable : {false, true}) {
        ColumnSchema cs("c", kColType, nullable);
        auto pred = pred_factory(cs);
+       if (pred.predicate_type() == PredicateType::None) {
+         // The predicate factory might return None in the case of
+         // NULL on a NOT NULL column.
+         continue;
+       }
+
        ScopedColumnBlock<kColType> b(kNumRows);
        for (int i = 0; i < kNumRows; i++) {
          b[i] = rand() % 3;
@@ -1549,6 +1552,9 @@ class ColumnPredicateBenchmark : public KuduTest {
    }
 };
 
+template<class T>
+class RangePredicateBenchmark : public ColumnPredicateBenchmark<T> {};
+
 using test_types = ::testing::Types<
   DataTypeTraits<INT8>,
   DataTypeTraits<INT16>,
@@ -1557,26 +1563,39 @@ using test_types = ::testing::Types<
   DataTypeTraits<FLOAT>,
   DataTypeTraits<DOUBLE>>;
 
-TYPED_TEST_CASE(ColumnPredicateBenchmark, test_types);
+TYPED_TEST_CASE(RangePredicateBenchmark, test_types);
 
-TYPED_TEST(ColumnPredicateBenchmark, TestEquals) {
+TYPED_TEST(RangePredicateBenchmark, TestEquals) {
   const typename TypeParam::cpp_type ref_val = 0;
-  ColumnPredicateBenchmark<TypeParam>::DoTest(
+  RangePredicateBenchmark<TypeParam>::DoTest(
       [&](const ColumnSchema& cs) { return ColumnPredicate::Equality(cs, &ref_val); });
 }
 
-TYPED_TEST(ColumnPredicateBenchmark, TestLessThan) {
+TYPED_TEST(RangePredicateBenchmark, TestLessThan) {
   const typename TypeParam::cpp_type upper = 0;
-  ColumnPredicateBenchmark<TypeParam>::DoTest(
+  RangePredicateBenchmark<TypeParam>::DoTest(
       [&](const ColumnSchema& cs) { return ColumnPredicate::Range(cs, nullptr, &upper); });
 }
 
-TYPED_TEST(ColumnPredicateBenchmark, TestRange) {
+TYPED_TEST(RangePredicateBenchmark, TestRange) {
   const typename TypeParam::cpp_type lower = 0;
   const typename TypeParam::cpp_type upper = 2;
-  ColumnPredicateBenchmark<TypeParam>::DoTest(
+  RangePredicateBenchmark<TypeParam>::DoTest(
       [&](const ColumnSchema& cs) { return ColumnPredicate::Range(cs, &lower, &upper); });
 }
 
+// IS NULL and IS NOT NULL predicates don't look at the data itself, so no need
+// to type-parameterize them.
+class NullPredicateBenchmark : public ColumnPredicateBenchmark<DataTypeTraits<INT32>> {};
+
+TEST_F(NullPredicateBenchmark, TestIsNull) {
+  NullPredicateBenchmark::DoTest(
+      [&](const ColumnSchema& cs) { return ColumnPredicate::IsNull(cs); });
+}
+TEST_F(NullPredicateBenchmark, TestIsNotNull) {
+  NullPredicateBenchmark::DoTest(
+      [&](const ColumnSchema& cs) { return ColumnPredicate::IsNotNull(cs); });
+}
+
 
 } // namespace kudu
diff --git a/src/kudu/common/column_predicate.cc b/src/kudu/common/column_predicate.cc
index 3b665f1..bea1142 100644
--- a/src/kudu/common/column_predicate.cc
+++ b/src/kudu/common/column_predicate.cc
@@ -33,6 +33,7 @@
 #include "kudu/gutil/port.h"
 #include "kudu/gutil/strings/join.h"
 #include "kudu/gutil/strings/substitute.h"
+#include "kudu/util/alignment.h"
 #include "kudu/util/bitmap.h"
 #include "kudu/util/logging.h"
 #include "kudu/util/memory/arena.h"
@@ -717,6 +718,16 @@ void ApplyPredicate(const ColumnBlock& block, SelectionVector* sel, P p) {
     }
   }
 }
+
+template<bool IS_NOT_NULL>
+void ApplyNullPredicate(const ColumnBlock& block, uint8_t* __restrict__ sel_vec) {
+  int n_bytes = KUDU_ALIGN_UP(block.nrows(), 8) / 8;
+  for (int i = 0; i < n_bytes; i++) {
+    uint8_t null_byte = block.null_bitmap()[i];
+    if (!IS_NOT_NULL) null_byte = ~null_byte;
+    sel_vec[i] &= null_byte;
+  }
+}
 } // anonymous namespace
 
 template <DataType PhysicalType>
@@ -748,13 +759,7 @@ void ColumnPredicate::EvaluateForPhysicalType(const ColumnBlock& block,
     };
     case PredicateType::IsNotNull: {
       if (!block.is_nullable()) return;
-      // TODO: make this more efficient by using bitwise operations on the
-      // null and selection vectors.
-      for (size_t i = 0; i < block.nrows(); i++) {
-        if (sel->IsRowSelected(i) && block.is_null(i)) {
-          BitmapClear(sel->mutable_bitmap(), i);
-        }
-      }
+      ApplyNullPredicate<true>(block, sel->mutable_bitmap());
       return;
     };
     case PredicateType::IsNull: {
@@ -762,13 +767,7 @@ void ColumnPredicate::EvaluateForPhysicalType(const ColumnBlock& block,
         BitmapChangeBits(sel->mutable_bitmap(), 0, block.nrows(), false);
         return;
       }
-      // TODO(wdberkeley): make this more efficient by using bitwise operations on the
-      // null and selection vectors.
-      for (size_t i = 0; i < block.nrows(); i++) {
-        if (sel->IsRowSelected(i) && !block.is_null(i)) {
-          BitmapClear(sel->mutable_bitmap(), i);
-        }
-      }
+      ApplyNullPredicate<false>(block, sel->mutable_bitmap());
       return;
     }
     case PredicateType::InList: {