You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by ab...@apache.org on 2019/12/09 22:34:08 UTC

[kudu] 01/03: Improve SIMD code generation for primitive predicates

This is an automated email from the ASF dual-hosted git repository.

abukor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit c9dd2b520a8b82500eb6a56961b9da0ccd2ed752
Author: Todd Lipcon <to...@apache.org>
AuthorDate: Thu Dec 5 11:26:09 2019 -0800

    Improve SIMD code generation for primitive predicates
    
    This adds a local (on-stack) copy of the bounds for range and equality
    predicates before evaluating them against the columns. These on-stack
    copies help the compiler realize that the stores to the selection vector
    can't overwrite the predicate itself, and thus allows SIMD code
    generation.
    
    Benchmarked with column_predicate-test. Highlighting the 'NOT NULL'
    results (since this doesn't change the evaluation of nulls):
    
    Before:
     int8   NOT NULL   (c >= 0 AND c < 2) 1363.5M evals/sec	2.09 cycles/eval
     int16  NOT NULL   (c >= 0 AND c < 2) 1238.3M evals/sec	2.30 cycles/eval
     int32  NOT NULL   (c >= 0 AND c < 2) 1321.3M evals/sec	2.15 cycles/eval
     int64  NOT NULL   (c >= 0 AND c < 2) 1408.3M evals/sec	2.02 cycles/eval
     float  NOT NULL   (c >= 0 AND c < 2) 1134.8M evals/sec	2.52 cycles/eval
     double NOT NULL   (c >= 0 AND c < 2) 1144.2M evals/sec	2.49 cycles/eval
    
    After:
     int8   NOT NULL   (c >= 0 AND c < 2) 3152.2M evals/sec	0.88 cycles/eval
     int16  NOT NULL   (c >= 0 AND c < 2) 3309.6M evals/sec	0.85 cycles/eval
     int32  NOT NULL   (c >= 0 AND c < 2) 3384.0M evals/sec	0.85 cycles/eval
     int64  NOT NULL   (c >= 0 AND c < 2) 1847.6M evals/sec	1.57 cycles/eval
     float  NOT NULL   (c >= 0 AND c < 2) 3268.3M evals/sec	0.88 cycles/eval
     double NOT NULL   (c >= 0 AND c < 2) 2245.2M evals/sec	1.27 cycles/eval
    
    The numbers for non-range predicates didn't seem to change here.
    
    Change-Id: I1772584c1d0c53128608ea26248dd4ab069b8108
    Reviewed-on: http://gerrit.cloudera.org:8080/14855
    Reviewed-by: Adar Dembo <ad...@cloudera.com>
    Tested-by: Kudu Jenkins
---
 src/kudu/common/column_predicate.cc | 29 ++++++++++++++++++-----------
 src/kudu/common/column_predicate.h  |  1 +
 2 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/src/kudu/common/column_predicate.cc b/src/kudu/common/column_predicate.cc
index bea1142..2c84e85 100644
--- a/src/kudu/common/column_predicate.cc
+++ b/src/kudu/common/column_predicate.cc
@@ -673,7 +673,7 @@ int ApplyPredicatePrimitive(const ColumnBlock& block, uint8_t* __restrict__ sel_
   const cpp_type* data = reinterpret_cast<const cpp_type*>(block.data());
   const int n_chunks = block.nrows() / 8;
   for (int i = 0; i < n_chunks; i++) {
-    uint8_t res_8 = 0;;
+    uint8_t res_8 = 0;
     for (int j = 0; j < 8; j++) {
       res_8 |= p(data++) << j;
     }
@@ -733,27 +733,34 @@ void ApplyNullPredicate(const ColumnBlock& block, uint8_t* __restrict__ sel_vec)
 template <DataType PhysicalType>
 void ColumnPredicate::EvaluateForPhysicalType(const ColumnBlock& block,
                                               SelectionVector* sel) const {
+  using traits = DataTypeTraits<PhysicalType>;
+  using cpp_type = typename traits::cpp_type;
+
   switch (predicate_type()) {
     case PredicateType::Range: {
+      cpp_type local_lower = lower_ ? *static_cast<const cpp_type*>(lower_) : cpp_type();
+      cpp_type local_upper = upper_ ? *static_cast<const cpp_type*>(upper_) : cpp_type();
+
       if (lower_ == nullptr) {
-        ApplyPredicate<PhysicalType>(block, sel, [this] (const void* cell) {
-          return DataTypeTraits<PhysicalType>::Compare(cell, this->upper_) < 0;
+        ApplyPredicate<PhysicalType>(block, sel, [local_upper] (const void* cell) {
+            return traits::Compare(cell, &local_upper) < 0;
         });
       } else if (upper_ == nullptr) {
-        ApplyPredicate<PhysicalType>(block, sel, [this] (const void* cell) {
-          return DataTypeTraits<PhysicalType>::Compare(cell, this->lower_) >= 0;
+        ApplyPredicate<PhysicalType>(block, sel, [local_lower] (const void* cell) {
+            return traits::Compare(cell, &local_lower) >= 0;
         });
       } else {
-        ApplyPredicate<PhysicalType>(block, sel, [this] (const void* cell) {
-          return DataTypeTraits<PhysicalType>::Compare(cell, this->upper_) < 0 &&
-                 DataTypeTraits<PhysicalType>::Compare(cell, this->lower_) >= 0;
+        ApplyPredicate<PhysicalType>(block, sel, [local_lower, local_upper] (const void* cell) {
+            return traits::Compare(cell, &local_upper) < 0 &&
+                   traits::Compare(cell, &local_lower) >= 0;
         });
       }
       return;
     };
     case PredicateType::Equality: {
-      ApplyPredicate<PhysicalType>(block, sel, [this] (const void* cell) {
-        return DataTypeTraits<PhysicalType>::Compare(cell, this->lower_) == 0;
+      cpp_type local_lower = lower_ ? *static_cast<const cpp_type*>(lower_) : cpp_type();
+      ApplyPredicate<PhysicalType>(block, sel, [local_lower] (const void* cell) {
+            return traits::Compare(cell, &local_lower) == 0;
       });
       return;
     };
@@ -774,7 +781,7 @@ void ColumnPredicate::EvaluateForPhysicalType(const ColumnBlock& block,
       ApplyPredicate<PhysicalType>(block, sel, [this] (const void* cell) {
         return std::binary_search(values_.begin(), values_.end(), cell,
                                   [] (const void* lhs, const void* rhs) {
-                                    return DataTypeTraits<PhysicalType>::Compare(lhs, rhs) < 0;
+                                    return traits::Compare(lhs, rhs) < 0;
                                   });
       });
       return;
diff --git a/src/kudu/common/column_predicate.h b/src/kudu/common/column_predicate.h
index 2527fe1..963a172 100644
--- a/src/kudu/common/column_predicate.h
+++ b/src/kudu/common/column_predicate.h
@@ -22,6 +22,7 @@
 #include <cstdint>
 #include <ostream>
 #include <string>
+#include <utility>
 #include <vector>
 
 #include <boost/optional/optional.hpp>