You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by kw...@apache.org on 2016/12/06 19:07:30 UTC

[2/8] incubator-impala git commit: IMPALA-4571: Push IN predicates to Kudu

IMPALA-4571: Push IN predicates to Kudu

Fixes the KuduScanNode to convert InPredicates to
KuduPredicates and push them to the Kudu scan if possible.

An InPredicate can be pushed to the scan if expression is of
the exact form:
   <SlotRef> IN (<LiteralExpr>, <LiteralExpr>, ...)

That means the InPredicate has the following properties:
1) It has a list of literal values (i.e. not a subquery);
   All values are LiteralExprs (not SlotRefs).
2) Not negative, i.e. only 'IN' supported, not 'NOT IN'
3) The SlotRef is not wrapped in any casts
4) The types of all values match the type of the SlotRef
   exactly.

A planner test was added exercising all supported types as
well as exprs where the values would not be supported.

TODO: perf testing
TODO: consider a limit on the number of list values before
      keeping the predicate on the Impala scan node
      (determine from testing)

Change-Id: I8988d4819d20d467b48e286917e347ca00f60cf0
Reviewed-on: http://gerrit.cloudera.org:8080/5316
Reviewed-by: Matthew Jacobs <mj...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/9f387c85
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/9f387c85
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/9f387c85

Branch: refs/heads/master
Commit: 9f387c858354a5c7df5fd922e731f887aa7e51f7
Parents: 867b243
Author: Matthew Jacobs <mj...@cloudera.com>
Authored: Thu Dec 1 18:16:33 2016 -0800
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Tue Dec 6 03:24:39 2016 +0000

----------------------------------------------------------------------
 .../org/apache/impala/planner/KuduScanNode.java | 63 +++++++++++++++++++-
 .../queries/PlannerTest/kudu-selectivity.test   | 33 ++++++++++
 .../queries/PlannerTest/tpch-kudu.test          | 10 ++--
 3 files changed, 99 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f387c85/fe/src/main/java/org/apache/impala/planner/KuduScanNode.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/KuduScanNode.java b/fe/src/main/java/org/apache/impala/planner/KuduScanNode.java
index 883a556..04fb244 100644
--- a/fe/src/main/java/org/apache/impala/planner/KuduScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/KuduScanNode.java
@@ -26,6 +26,7 @@ import org.apache.impala.analysis.Analyzer;
 import org.apache.impala.analysis.BinaryPredicate;
 import org.apache.impala.analysis.BoolLiteral;
 import org.apache.impala.analysis.Expr;
+import org.apache.impala.analysis.InPredicate;
 import org.apache.impala.analysis.LiteralExpr;
 import org.apache.impala.analysis.NullLiteral;
 import org.apache.impala.analysis.NumericLiteral;
@@ -274,7 +275,11 @@ public class KuduScanNode extends ScanNode {
       KuduClient client, org.apache.kudu.client.KuduTable rpcTable) {
     ListIterator<Expr> it = conjuncts_.listIterator();
     while (it.hasNext()) {
-      if (tryConvertKuduPredicate(analyzer, rpcTable, it.next())) it.remove();
+      Expr predicate = it.next();
+      if (tryConvertBinaryKuduPredicate(analyzer, rpcTable, predicate) ||
+          tryConvertInListKuduPredicate(analyzer, rpcTable, predicate)) {
+        it.remove();
+      }
     }
   }
 
@@ -282,7 +287,7 @@ public class KuduScanNode extends ScanNode {
    * If 'expr' can be converted to a KuduPredicate, returns true and updates
    * kuduPredicates_ and kuduConjuncts_.
    */
-  private boolean tryConvertKuduPredicate(Analyzer analyzer,
+  private boolean tryConvertBinaryKuduPredicate(Analyzer analyzer,
       org.apache.kudu.client.KuduTable table, Expr expr) {
     if (!(expr instanceof BinaryPredicate)) return false;
     BinaryPredicate predicate = (BinaryPredicate) expr;
@@ -347,6 +352,60 @@ public class KuduScanNode extends ScanNode {
   }
 
   /**
+   * If the InList 'expr' can be converted to a KuduPredicate, returns true and updates
+   * kuduPredicates_ and kuduConjuncts_.
+   */
+  private boolean tryConvertInListKuduPredicate(Analyzer analyzer,
+      org.apache.kudu.client.KuduTable table, Expr expr) {
+    if (!(expr instanceof InPredicate)) return false;
+    InPredicate predicate = (InPredicate) expr;
+
+    // Only convert IN predicates, i.e. cannot convert NOT IN.
+    if (predicate.isNotIn()) return false;
+
+    // Do not convert if there is an implicit cast.
+    if (!(predicate.getChild(0) instanceof SlotRef)) return false;
+    SlotRef ref = (SlotRef) predicate.getChild(0);
+
+    // KuduPredicate takes a list of values as Objects.
+    List<Object> values = Lists.newArrayList();
+    for (int i = 1; i < predicate.getChildren().size(); ++i) {
+      if (!(predicate.getChild(i).isLiteral())) return false;
+      Object value = getKuduInListValue((LiteralExpr) predicate.getChild(i));
+      Preconditions.checkNotNull(value == null);
+      values.add(value);
+    }
+
+    String colName = ref.getDesc().getColumn().getName();
+    ColumnSchema column = table.getSchema().getColumn(colName);
+    kuduPredicates_.add(KuduPredicate.newInListPredicate(column, values));
+    kuduConjuncts_.add(predicate);
+    return true;
+  }
+
+  /**
+   * Return the value of the InList child expression 'e' as an Object that can be
+   * added to a KuduPredicate. If the Expr is not supported by Kudu or the type doesn't
+   * match the expected PrimitiveType 'type', null is returned.
+   */
+  private static Object getKuduInListValue(LiteralExpr e) {
+    switch (e.getType().getPrimitiveType()) {
+      case BOOLEAN: return ((BoolLiteral) e).getValue();
+      case TINYINT: return (byte) ((NumericLiteral) e).getLongValue();
+      case SMALLINT: return (short) ((NumericLiteral) e).getLongValue();
+      case INT: return (int) ((NumericLiteral) e).getLongValue();
+      case BIGINT: return ((NumericLiteral) e).getLongValue();
+      case FLOAT: return (float) ((NumericLiteral) e).getDoubleValue();
+      case DOUBLE: return ((NumericLiteral) e).getDoubleValue();
+      case STRING: return ((StringLiteral) e).getValue();
+      default:
+        Preconditions.checkState(false,
+            "Unsupported Kudu type considered for predicate: %s", e.getType().toSql());
+    }
+    return null;
+  }
+
+  /**
    * Returns a Kudu comparison operator for the BinaryPredicate operator, or null if
    * the operation is not supported by Kudu.
    */

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f387c85/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test b/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test
index a187ed8..eba7741 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test
@@ -97,3 +97,36 @@ F00:PLAN FRAGMENT [RANDOM]
      hosts=3 per-host-mem=0B
      tuple-ids=0 row-size=124B cardinality=2
 ====
+select * from functional_kudu.alltypes where
+-- predicates that can be pushed
+tinyint_col in (1, 2) and
+smallint_col in (false, 2) and
+int_col in (true, 2) and
+bigint_col in (1, 2) and
+bool_col in (true) and
+float_col in (0.0) and
+double_col in (0.0) and
+string_col in ("foo", cast("foo" as char(10))) and
+
+-- predicates that cannot be pushed because the SlotRef is wrapped in a cast
+tinyint_col in (1, 999) and
+smallint_col in (99999, 2) and
+int_col in (9999999999) and
+bigint_col in (9999999999999999999) and
+bool_col in (1) and
+float_col in (cast('NaN' as float)) and
+double_col in (cast('inf' as double)) and
+
+-- 'NOT IN' and lists containing slotrefs cannot be pushed
+string_col not in ("bar") and
+id in (int_col)
+---- PLAN
+F00:PLAN FRAGMENT [UNPARTITIONED]
+  PLAN-ROOT SINK
+  |
+  00:SCAN KUDU [functional_kudu.alltypes]
+     predicates: id IN (int_col), string_col NOT IN ('bar'), bigint_col IN (9999999999999999999), double_col IN (CAST('inf' AS DOUBLE)), float_col IN (CAST('NaN' AS FLOAT)), int_col IN (9999999999), smallint_col IN (99999, 2), tinyint_col IN (1, 999), bool_col IN (1)
+     kudu predicates: double_col IN (0.0), float_col IN (0.0), bigint_col IN (1, 2), int_col IN (1, 2), smallint_col IN (0, 2), string_col IN ('foo', 'foo       '), tinyint_col IN (1, 2), bool_col IN (TRUE)
+     hosts=3 per-host-mem=unavailable
+     tuple-ids=0 row-size=126B cardinality=4
+====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f387c85/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test
index a5f2511..950fb90 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test
@@ -725,8 +725,8 @@ PLAN-ROOT SINK
 |  hash predicates: o_orderkey = l_orderkey
 |
 |--01:SCAN KUDU [tpch_kudu.lineitem]
-|     predicates: l_shipmode IN ('MAIL', 'SHIP'), l_commitdate < l_receiptdate, l_shipdate < l_commitdate
-|     kudu predicates: l_receiptdate < '1995-01-01', l_receiptdate >= '1994-01-01'
+|     predicates: l_commitdate < l_receiptdate, l_shipdate < l_commitdate
+|     kudu predicates: l_shipmode IN ('MAIL', 'SHIP'), l_receiptdate < '1995-01-01', l_receiptdate >= '1994-01-01'
 |
 00:SCAN KUDU [tpch_kudu.orders]
 ====
@@ -917,7 +917,8 @@ PLAN-ROOT SINK
 |  hash predicates: ps_partkey = p_partkey
 |
 |--01:SCAN KUDU [tpch_kudu.part]
-|     predicates: p_size IN (49, 14, 23, 45, 19, 3, 36, 9), p_brand != 'Brand#45', NOT p_type LIKE 'MEDIUM POLISHED%'
+|     predicates: p_brand != 'Brand#45', NOT p_type LIKE 'MEDIUM POLISHED%'
+|     kudu predicates: p_size IN (49, 14, 23, 45, 19, 3, 36, 9)
 |
 00:SCAN KUDU [tpch_kudu.partsupp]
 ====
@@ -1080,8 +1081,7 @@ PLAN-ROOT SINK
 |     kudu predicates: p_size >= 1
 |
 00:SCAN KUDU [tpch_kudu.lineitem]
-   predicates: l_shipmode IN ('AIR', 'AIR REG')
-   kudu predicates: l_shipinstruct = 'DELIVER IN PERSON'
+   kudu predicates: l_shipmode IN ('AIR', 'AIR REG'), l_shipinstruct = 'DELIVER IN PERSON'
 ====
 # Q20 - Potential Part Promotion Query
 select