You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by gr...@apache.org on 2019/06/14 17:32:33 UTC
[kudu] 01/03: KUDU-2846 (part 2): optimize IS [NOT] NULL predicates
This is an automated email from the ASF dual-hosted git repository.
granthenke pushed a commit to branch branch-1.10.x
in repository https://gitbox.apache.org/repos/asf/kudu.git
commit bd66430d80f7f400567b178af73c7688cea50cb8
Author: Todd Lipcon <to...@apache.org>
AuthorDate: Thu Jun 13 11:19:36 2019 -0700
KUDU-2846 (part 2): optimize IS [NOT] NULL predicates
This does a simple optimization to use bitwise ops on the selection and
null vectors to implement NOT NULL and NULL, rather than the current
branchy code. The speedup is about 75x.
Before:
int32 NULL (c IS NULL) 615.9M evals/sec 4.54 cycles/eval
int32 NOT NULL (c IS NOT NULL) 636.3M evals/sec 4.39 cycles/eval
int32 NULL (c IS NOT NULL) 645.3M evals/sec 4.33 cycles/eval
After:
int32 NULL (c IS NULL) 46811.4M evals/sec 0.03 cycles/eval
int32 NOT NULL (c IS NOT NULL) 45706.1M evals/sec 0.03 cycles/eval
int32 NULL (c IS NOT NULL) 48855.0M evals/sec 0.03 cycles/eval
Change-Id: I14fb75c7c330e2415fd7a877186f7ed41a1accce
Reviewed-on: http://gerrit.cloudera.org:8080/13639
Tested-by: Kudu Jenkins
Reviewed-by: Andrew Wong <aw...@cloudera.com>
(cherry picked from commit fe7c1575946f8ace3eaaea8adf1d03caf4da3c34)
Reviewed-on: http://gerrit.cloudera.org:8080/13650
Tested-by: Grant Henke <gr...@apache.org>
Reviewed-by: Will Berkeley <wd...@gmail.com>
---
src/kudu/common/column_predicate-test.cc | 39 ++++++++++++++++++++++++--------
src/kudu/common/column_predicate.cc | 27 +++++++++++-----------
2 files changed, 42 insertions(+), 24 deletions(-)
diff --git a/src/kudu/common/column_predicate-test.cc b/src/kudu/common/column_predicate-test.cc
index 946915a..335bf78 100644
--- a/src/kudu/common/column_predicate-test.cc
+++ b/src/kudu/common/column_predicate-test.cc
@@ -1510,9 +1510,6 @@ class ColumnPredicateBenchmark : public KuduTest {
static constexpr auto kColType = TypeParam::physical_type;
static constexpr int kNumRows = 1024;
- ColumnPredicateBenchmark() {
- }
-
void DoTest(const std::function<ColumnPredicate(const ColumnSchema&)>& pred_factory) {
const int num_iters = AllowSlowTests() ? 1000000 : 100;
const int num_evals = num_iters * kNumRows;
@@ -1520,6 +1517,12 @@ class ColumnPredicateBenchmark : public KuduTest {
for (bool nullable : {false, true}) {
ColumnSchema cs("c", kColType, nullable);
auto pred = pred_factory(cs);
+ if (pred.predicate_type() == PredicateType::None) {
+ // The predicate factory might return None in the case of
+ // NULL on a NOT NULL column.
+ continue;
+ }
+
ScopedColumnBlock<kColType> b(kNumRows);
for (int i = 0; i < kNumRows; i++) {
b[i] = rand() % 3;
@@ -1549,6 +1552,9 @@ class ColumnPredicateBenchmark : public KuduTest {
}
};
+template<class T>
+class RangePredicateBenchmark : public ColumnPredicateBenchmark<T> {};
+
using test_types = ::testing::Types<
DataTypeTraits<INT8>,
DataTypeTraits<INT16>,
@@ -1557,26 +1563,39 @@ using test_types = ::testing::Types<
DataTypeTraits<FLOAT>,
DataTypeTraits<DOUBLE>>;
-TYPED_TEST_CASE(ColumnPredicateBenchmark, test_types);
+TYPED_TEST_CASE(RangePredicateBenchmark, test_types);
-TYPED_TEST(ColumnPredicateBenchmark, TestEquals) {
+TYPED_TEST(RangePredicateBenchmark, TestEquals) {
const typename TypeParam::cpp_type ref_val = 0;
- ColumnPredicateBenchmark<TypeParam>::DoTest(
+ RangePredicateBenchmark<TypeParam>::DoTest(
[&](const ColumnSchema& cs) { return ColumnPredicate::Equality(cs, &ref_val); });
}
-TYPED_TEST(ColumnPredicateBenchmark, TestLessThan) {
+TYPED_TEST(RangePredicateBenchmark, TestLessThan) {
const typename TypeParam::cpp_type upper = 0;
- ColumnPredicateBenchmark<TypeParam>::DoTest(
+ RangePredicateBenchmark<TypeParam>::DoTest(
[&](const ColumnSchema& cs) { return ColumnPredicate::Range(cs, nullptr, &upper); });
}
-TYPED_TEST(ColumnPredicateBenchmark, TestRange) {
+TYPED_TEST(RangePredicateBenchmark, TestRange) {
const typename TypeParam::cpp_type lower = 0;
const typename TypeParam::cpp_type upper = 2;
- ColumnPredicateBenchmark<TypeParam>::DoTest(
+ RangePredicateBenchmark<TypeParam>::DoTest(
[&](const ColumnSchema& cs) { return ColumnPredicate::Range(cs, &lower, &upper); });
}
+// IS NULL and IS NOT NULL predicates don't look at the data itself, so no need
+// to type-parameterize them.
+class NullPredicateBenchmark : public ColumnPredicateBenchmark<DataTypeTraits<INT32>> {};
+
+TEST_F(NullPredicateBenchmark, TestIsNull) {
+ NullPredicateBenchmark::DoTest(
+ [&](const ColumnSchema& cs) { return ColumnPredicate::IsNull(cs); });
+}
+TEST_F(NullPredicateBenchmark, TestIsNotNull) {
+ NullPredicateBenchmark::DoTest(
+ [&](const ColumnSchema& cs) { return ColumnPredicate::IsNotNull(cs); });
+}
+
} // namespace kudu
diff --git a/src/kudu/common/column_predicate.cc b/src/kudu/common/column_predicate.cc
index 3b665f1..bea1142 100644
--- a/src/kudu/common/column_predicate.cc
+++ b/src/kudu/common/column_predicate.cc
@@ -33,6 +33,7 @@
#include "kudu/gutil/port.h"
#include "kudu/gutil/strings/join.h"
#include "kudu/gutil/strings/substitute.h"
+#include "kudu/util/alignment.h"
#include "kudu/util/bitmap.h"
#include "kudu/util/logging.h"
#include "kudu/util/memory/arena.h"
@@ -717,6 +718,16 @@ void ApplyPredicate(const ColumnBlock& block, SelectionVector* sel, P p) {
}
}
}
+
+template<bool IS_NOT_NULL>
+void ApplyNullPredicate(const ColumnBlock& block, uint8_t* __restrict__ sel_vec) {
+ int n_bytes = KUDU_ALIGN_UP(block.nrows(), 8) / 8;
+ for (int i = 0; i < n_bytes; i++) {
+ uint8_t null_byte = block.null_bitmap()[i];
+ if (!IS_NOT_NULL) null_byte = ~null_byte;
+ sel_vec[i] &= null_byte;
+ }
+}
} // anonymous namespace
template <DataType PhysicalType>
@@ -748,13 +759,7 @@ void ColumnPredicate::EvaluateForPhysicalType(const ColumnBlock& block,
};
case PredicateType::IsNotNull: {
if (!block.is_nullable()) return;
- // TODO: make this more efficient by using bitwise operations on the
- // null and selection vectors.
- for (size_t i = 0; i < block.nrows(); i++) {
- if (sel->IsRowSelected(i) && block.is_null(i)) {
- BitmapClear(sel->mutable_bitmap(), i);
- }
- }
+ ApplyNullPredicate<true>(block, sel->mutable_bitmap());
return;
};
case PredicateType::IsNull: {
@@ -762,13 +767,7 @@ void ColumnPredicate::EvaluateForPhysicalType(const ColumnBlock& block,
BitmapChangeBits(sel->mutable_bitmap(), 0, block.nrows(), false);
return;
}
- // TODO(wdberkeley): make this more efficient by using bitwise operations on the
- // null and selection vectors.
- for (size_t i = 0; i < block.nrows(); i++) {
- if (sel->IsRowSelected(i) && !block.is_null(i)) {
- BitmapClear(sel->mutable_bitmap(), i);
- }
- }
+ ApplyNullPredicate<false>(block, sel->mutable_bitmap());
return;
}
case PredicateType::InList: {