You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by da...@apache.org on 2016/10/03 21:50:41 UTC

[7/8] kudu git commit: KUDU-1652: Partition pruning fails with IS NOT NULL predicate on PK column

KUDU-1652: Partition pruning fails with IS NOT NULL predicate on PK column

Scan optimization could also fail in certain cases, but the fix is the
same.

Change-Id: Icba2defeffff74e86b0e266e668974bce0ad9b0e
Reviewed-on: http://gerrit.cloudera.org:8080/4541
Reviewed-by: Adar Dembo <ad...@cloudera.com>
Tested-by: Kudu Jenkins
Reviewed-by: Jean-Daniel Cryans <jd...@apache.org>
(cherry picked from commit ce17a9c4eb34dcfe63c8d4321d38d18a0cb8c5c2)
Reviewed-on: http://gerrit.cloudera.org:8080/4607
Reviewed-by: Todd Lipcon <to...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/77769ad5
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/77769ad5
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/77769ad5

Branch: refs/heads/branch-1.0.x
Commit: 77769ad557ebe41633ca589d8d4e5e39494bf1cc
Parents: 298aff7
Author: Dan Burkert <da...@cloudera.com>
Authored: Fri Sep 23 16:12:01 2016 -0700
Committer: Dan Burkert <da...@cloudera.com>
Committed: Mon Oct 3 21:45:31 2016 +0000

----------------------------------------------------------------------
 .../org/apache/kudu/client/KuduPredicate.java   | 11 ++--
 .../org/apache/kudu/client/PartitionPruner.java | 63 ++++++++++---------
 .../org/apache/kudu/client/TestKuduClient.java  |  6 ++
 src/kudu/common/column_predicate.cc             |  1 -
 src/kudu/common/key_util.cc                     | 65 ++++++++++++--------
 src/kudu/common/partition_pruner-test.cc        |  3 +
 src/kudu/common/scan_spec-test.cc               | 11 ++++
 7 files changed, 99 insertions(+), 61 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/77769ad5/java/kudu-client/src/main/java/org/apache/kudu/client/KuduPredicate.java
----------------------------------------------------------------------
diff --git a/java/kudu-client/src/main/java/org/apache/kudu/client/KuduPredicate.java b/java/kudu-client/src/main/java/org/apache/kudu/client/KuduPredicate.java
index 76c7594..03a6004 100644
--- a/java/kudu-client/src/main/java/org/apache/kudu/client/KuduPredicate.java
+++ b/java/kudu-client/src/main/java/org/apache/kudu/client/KuduPredicate.java
@@ -22,6 +22,7 @@ import com.google.common.base.Objects;
 import com.google.common.base.Preconditions;
 import com.google.common.primitives.UnsignedBytes;
 import com.google.protobuf.ByteString;
+
 import org.apache.kudu.ColumnSchema;
 import org.apache.kudu.Common;
 import org.apache.kudu.Schema;
@@ -467,18 +468,19 @@ public class KuduPredicate {
   public static KuduPredicate fromPB(Schema schema, Common.ColumnPredicatePB pb) {
     ColumnSchema column = schema.getColumn(pb.getColumn());
     switch (pb.getPredicateCase()) {
-      case EQUALITY: {
+      case EQUALITY:
         return new KuduPredicate(PredicateType.EQUALITY, column,
                                  pb.getEquality().getValue().toByteArray(), null);
-
-      }
       case RANGE: {
         Common.ColumnPredicatePB.Range range = pb.getRange();
         return new KuduPredicate(PredicateType.RANGE, column,
                                  range.hasLower() ? range.getLower().toByteArray() : null,
                                  range.hasUpper() ? range.getUpper().toByteArray() : null);
       }
-      default: throw new IllegalArgumentException("unknown predicate type");
+      case IS_NOT_NULL:
+        return newIsNotNullPredicate(column);
+      default:
+        throw new IllegalArgumentException("unknown predicate type");
     }
   }
 
@@ -614,7 +616,6 @@ public class KuduPredicate {
     }
   }
 
-
   /**
    * Checks that the column is one of the expected types.
    * @param column the column being checked

http://git-wip-us.apache.org/repos/asf/kudu/blob/77769ad5/java/kudu-client/src/main/java/org/apache/kudu/client/PartitionPruner.java
----------------------------------------------------------------------
diff --git a/java/kudu-client/src/main/java/org/apache/kudu/client/PartitionPruner.java b/java/kudu-client/src/main/java/org/apache/kudu/client/PartitionPruner.java
index 4545767..50f78a2 100644
--- a/java/kudu-client/src/main/java/org/apache/kudu/client/PartitionPruner.java
+++ b/java/kudu-client/src/main/java/org/apache/kudu/client/PartitionPruner.java
@@ -359,21 +359,24 @@ public class PartitionPruner {
 
     // Copy predicates into the row in range partition key column order,
     // stopping after the first missing predicate.
-    for (int idx : rangePartitionColumnIdxs) {
+    loop: for (int idx : rangePartitionColumnIdxs) {
       ColumnSchema column = schema.getColumnByIndex(idx);
       KuduPredicate predicate = predicates.get(column.getName());
       if (predicate == null) break;
 
-      if (predicate.getType() != KuduPredicate.PredicateType.EQUALITY &&
-          predicate.getType() != KuduPredicate.PredicateType.RANGE) {
-        throw new IllegalArgumentException(
-            String.format("unexpected predicate type can not be pushed into key: %s", predicate));
+      switch (predicate.getType()) {
+        case RANGE:
+          if (predicate.getLower() == null) break loop;
+          // fall through
+        case EQUALITY:
+          row.setRaw(idx, predicate.getLower());
+          pushedPredicates++;
+          break;
+        case IS_NOT_NULL: break loop;
+        default:
+          throw new IllegalArgumentException(
+              String.format("unexpected predicate type can not be pushed into key: %s", predicate));
       }
-
-      byte[] value = predicate.getLower();
-      if (value == null) break;
-      row.setRaw(idx, value);
-      pushedPredicates++;
     }
 
     // If no predicates were pushed, no need to do any more work.
@@ -406,32 +409,34 @@ public class PartitionPruner {
 
     // Step 1: copy predicates into the row in range partition key column order, stopping after
     // the first missing predicate.
-    for (int idx : rangePartitionColumnIdxs) {
+    loop: for (int idx : rangePartitionColumnIdxs) {
       ColumnSchema column = schema.getColumnByIndex(idx);
       KuduPredicate predicate = predicates.get(column.getName());
       if (predicate == null) break;
 
-      if (predicate.getType() == KuduPredicate.PredicateType.EQUALITY) {
-        byte[] value = predicate.getLower();
-        row.setRaw(idx, value);
-        pushedPredicates++;
-        finalPredicate = predicate;
-      } else if (predicate.getType() == KuduPredicate.PredicateType.RANGE) {
-
-        if (predicate.getUpper() != null) {
-          row.setRaw(idx, predicate.getUpper());
+      switch (predicate.getType()) {
+        case EQUALITY:
+          row.setRaw(idx, predicate.getLower());
           pushedPredicates++;
           finalPredicate = predicate;
-        }
+          break;
+        case RANGE:
+          if (predicate.getUpper() != null) {
+            row.setRaw(idx, predicate.getUpper());
+            pushedPredicates++;
+            finalPredicate = predicate;
+          }
 
-        // After the first column with a range constraint we stop pushing
-        // constraints into the upper bound. Instead, we push minimum values
-        // to the remaining columns (below), which is the maximally tight
-        // constraint.
-        break;
-      } else {
-        throw new IllegalArgumentException(
-            String.format("unexpected predicate type can not be pushed into key: %s", predicate));
+          // After the first column with a range constraint we stop pushing
+          // constraints into the upper bound. Instead, we push minimum values
+          // to the remaining columns (below), which is the maximally tight
+          // constraint.
+          break loop;
+        case IS_NOT_NULL:
+          break loop;
+        default:
+          throw new IllegalArgumentException(
+              String.format("unexpected predicate type can not be pushed into key: %s", predicate));
       }
     }
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/77769ad5/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduClient.java
----------------------------------------------------------------------
diff --git a/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduClient.java b/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduClient.java
index 0ae3727..e59ea8e 100644
--- a/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduClient.java
+++ b/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduClient.java
@@ -331,6 +331,12 @@ public class TestKuduClient extends BaseKuduTest {
         KuduPredicate.newComparisonPredicate(schema.getColumn("c2"), GREATER, "c2_30"),
         KuduPredicate.newComparisonPredicate(schema.getColumn("c2"), LESS, "c2_20")
     ).size());
+
+    // IS NOT NULL
+    assertEquals(100, scanTableToStrings(table,
+       KuduPredicate.newIsNotNullPredicate(schema.getColumn("c2")),
+       KuduPredicate.newIsNotNullPredicate(schema.getColumn("key"))
+    ).size());
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/kudu/blob/77769ad5/src/kudu/common/column_predicate.cc
----------------------------------------------------------------------
diff --git a/src/kudu/common/column_predicate.cc b/src/kudu/common/column_predicate.cc
index 5dfedbe..229dcd6 100644
--- a/src/kudu/common/column_predicate.cc
+++ b/src/kudu/common/column_predicate.cc
@@ -108,7 +108,6 @@ ColumnPredicate ColumnPredicate::ExclusiveRange(ColumnSchema column,
 }
 
 ColumnPredicate ColumnPredicate::IsNotNull(ColumnSchema column) {
-  CHECK(column.is_nullable());
   return ColumnPredicate(PredicateType::IsNotNull, move(column), nullptr, nullptr);
 }
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/77769ad5/src/kudu/common/key_util.cc
----------------------------------------------------------------------
diff --git a/src/kudu/common/key_util.cc b/src/kudu/common/key_util.cc
index f62caba..357744e 100644
--- a/src/kudu/common/key_util.cc
+++ b/src/kudu/common/key_util.cc
@@ -146,28 +146,35 @@ int PushUpperBoundKeyPredicates(ColIdxIter first,
   // Step 1: copy predicates into the row in key column order, stopping after
   // the first range predicate.
 
-  for (auto col_idx_it = first; col_idx_it < last; std::advance(col_idx_it, 1)) {
+  bool break_loop = false;
+  for (auto col_idx_it = first; !break_loop && col_idx_it < last; std::advance(col_idx_it, 1)) {
     const ColumnSchema& column = schema.column(*col_idx_it);
     const ColumnPredicate* predicate = FindOrNull(predicates, column.name());
     if (predicate == nullptr) break;
     size_t size = column.type_info()->size();
-    if (predicate->predicate_type() == PredicateType::Equality) {
-      memcpy(row->mutable_cell_ptr(*col_idx_it), predicate->raw_lower(), size);
-      pushed_predicates++;
-      final_predicate = predicate;
-    } else if (predicate->predicate_type() == PredicateType::Range) {
-      if (predicate->raw_upper() != nullptr) {
-        memcpy(row->mutable_cell_ptr(*col_idx_it), predicate->raw_upper(), size);
+    switch (predicate->predicate_type()) {
+      case PredicateType::Equality:
+        memcpy(row->mutable_cell_ptr(*col_idx_it), predicate->raw_lower(), size);
         pushed_predicates++;
         final_predicate = predicate;
-      }
-      // After the first column with a range constraint we stop pushing
-      // constraints into the upper bound. Instead, we push minimum values
-      // to the remaining columns (below), which is the maximally tight
-      // constraint.
-      break;
-    } else {
-      LOG(FATAL) << "unexpected predicate type can not be pushed into key";
+        break;
+      case PredicateType::Range:
+        if (predicate->raw_upper() != nullptr) {
+          memcpy(row->mutable_cell_ptr(*col_idx_it), predicate->raw_upper(), size);
+          pushed_predicates++;
+          final_predicate = predicate;
+        }
+        // After the first column with a range constraint we stop pushing
+        // constraints into the upper bound. Instead, we push minimum values
+        // to the remaining columns (below), which is the maximally tight
+        // constraint.
+        break_loop = true;
+        break;
+      case PredicateType::IsNotNull:
+        break_loop = true;
+        break;
+      case PredicateType::None:
+        LOG(FATAL) << "NONE predicate can not be pushed into key";
     }
   }
 
@@ -200,23 +207,29 @@ int PushLowerBoundKeyPredicates(ColIdxIter first,
   // Step 1: copy predicates into the row in key column order, stopping after
   // the first missing predicate.
 
-  for (auto col_idx_it = first; col_idx_it < last; std::advance(col_idx_it, 1)) {
+  bool break_loop = false;
+  for (auto col_idx_it = first; !break_loop && col_idx_it < last; std::advance(col_idx_it, 1)) {
     const ColumnSchema& column = schema.column(*col_idx_it);
     const ColumnPredicate* predicate = FindOrNull(predicates, column.name());
     if (predicate == nullptr) break;
     size_t size = column.type_info()->size();
-    if (predicate->predicate_type() == PredicateType::Equality) {
-      memcpy(row->mutable_cell_ptr(*col_idx_it), predicate->raw_lower(), size);
-      pushed_predicates++;
-    } else if (predicate->predicate_type() == PredicateType::Range) {
-      if (predicate->raw_lower() != nullptr) {
+
+    switch (predicate->predicate_type()) {
+      case PredicateType::Range:
+        if (predicate->raw_lower() == nullptr) {
+          break_loop = true;
+          break;
+        }
+        // Fall through.
+      case PredicateType::Equality:
         memcpy(row->mutable_cell_ptr(*col_idx_it), predicate->raw_lower(), size);
         pushed_predicates++;
-      } else {
         break;
-      }
-    } else {
-      LOG(FATAL) << "unexpected predicate type can not be pushed into key";
+      case PredicateType::IsNotNull:
+        break_loop = true;
+        break;
+      case PredicateType::None:
+        LOG(FATAL) << "NONE predicate can not be pushed into key";
     }
   }
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/77769ad5/src/kudu/common/partition_pruner-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/common/partition_pruner-test.cc b/src/kudu/common/partition_pruner-test.cc
index 889ebaa..9c85420 100644
--- a/src/kudu/common/partition_pruner-test.cc
+++ b/src/kudu/common/partition_pruner-test.cc
@@ -433,6 +433,9 @@ TEST(TestPartitionPruner, TestRangePruning) {
   Check({ ColumnPredicate::Equality(schema.column(2), &zero),
           ColumnPredicate::Range(schema.column(1), nullptr, &m0) },
         2);
+
+  // c IS NOT NULL
+  Check({ ColumnPredicate::IsNotNull(schema.column(2)) }, 3);
 }
 
 TEST(TestPartitionPruner, TestHashPruning) {

http://git-wip-us.apache.org/repos/asf/kudu/blob/77769ad5/src/kudu/common/scan_spec-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/common/scan_spec-test.cc b/src/kudu/common/scan_spec-test.cc
index 428d9cd..1bfa404 100644
--- a/src/kudu/common/scan_spec-test.cc
+++ b/src/kudu/common/scan_spec-test.cc
@@ -309,6 +309,17 @@ TEST_F(CompositeIntKeysTest, TestPredicateOrderDoesntMatter) {
             spec.ToString(schema_));
 }
 
+// Test that IS NOT NULL predicates do *not* get pushed into the primary key
+// bounds. This is a regression test for KUDU-1652, where previously attempting
+// to push an IS NOT NULL predicate would cause a CHECK failure.
+TEST_F(CompositeIntKeysTest, TestIsNotNullPushdown) {
+  ScanSpec spec;
+  spec.AddPredicate(ColumnPredicate::IsNotNull(schema_.column(0)));
+  SCOPED_TRACE(spec.ToString(schema_));
+  spec.OptimizeScan(schema_, &arena_, &pool_, true);
+  EXPECT_EQ("`a` IS NOT NULL", spec.ToString(schema_));
+}
+
 // Tests that a scan spec without primary key bounds will not have predicates
 // after optimization.
 TEST_F(CompositeIntKeysTest, TestLiftPrimaryKeyBounds_NoBounds) {