You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mj...@apache.org on 2017/07/31 21:50:01 UTC

incubator-impala git commit: IMPALA-5336: Fix partition pruning when column is cast

Repository: incubator-impala
Updated Branches:
  refs/heads/master b2dbcbc2d -> 922ee7031


IMPALA-5336: Fix partition pruning when column is cast

Partition pruning has two mechanisms:
1) Simple predicates (e.g. binary predicates of the form
   <SlotRef> <op> <LiteralExpr>) can be used to derive lists
   of matching partition ids directly from the
   partition key values. This is handled directly in the FE
   and is very efficient for supported simple predicates.
2) General expr evaluation of predicates using the BE (via
   FeSupport). This works for all predicates, so is the
   mechanism used for predicates not supported by (1).

The issue was that (1) was being used when a binary
predicate contained an implicit cast on the SlotRef. While
this is OK when being evaluated by the BE, the simple
mechanism in (1) would not be able to match the partition
key values with the predicate literal because the partition
key values cannot be cast in the FE.

The fix is to force binary predicates involving a cast to be
evaluated in the BE.

Testing: A planner test was added to demonstrate the
expected partition pruning occurs.

Some modifications were made to the functional schema table
stringpartitionkey, so it will be necessary to reload those
tables:

load-data.py -w functional-query --table_names=stringpartitionkey

Change-Id: I94f597a6589f5e34d2b74abcd29be77c4161cd99
Reviewed-on: http://gerrit.cloudera.org:8080/7521
Reviewed-by: Matthew Jacobs <mj...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/922ee703
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/922ee703
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/922ee703

Branch: refs/heads/master
Commit: 922ee70317ec21ddc7c12a50c6b46f39ec0e7f90
Parents: b2dbcbc
Author: Matthew Jacobs <mj...@cloudera.com>
Authored: Tue Jul 25 14:13:01 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Mon Jul 31 21:49:17 2017 +0000

----------------------------------------------------------------------
 .../impala/planner/HdfsPartitionPruner.java       |  1 +
 .../org/apache/impala/planner/PlannerTest.java    |  7 +++++++
 testdata/bin/compute-table-stats.sh               |  2 +-
 .../functional/functional_schema_template.sql     |  7 +++++++
 .../datasets/functional/schema_constraints.csv    |  1 +
 .../queries/PlannerTest/partition-pruning.test    | 18 ++++++++++++++++++
 6 files changed, 35 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/922ee703/fe/src/main/java/org/apache/impala/planner/HdfsPartitionPruner.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/HdfsPartitionPruner.java b/fe/src/main/java/org/apache/impala/planner/HdfsPartitionPruner.java
index 7c05283..7e7d852 100644
--- a/fe/src/main/java/org/apache/impala/planner/HdfsPartitionPruner.java
+++ b/fe/src/main/java/org/apache/impala/planner/HdfsPartitionPruner.java
@@ -182,6 +182,7 @@ public class HdfsPartitionPruner {
         return false;
       }
       BinaryPredicate bp = (BinaryPredicate)expr;
+      if (bp.getChild(0).isImplicitCast()) return false;
       SlotRef slot = bp.getBoundSlot();
       if (slot == null) return false;
       Expr bindingExpr = bp.getSlotBinding(slot.getSlotId());

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/922ee703/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
index 3b199f3..8289ee8 100644
--- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
+++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
@@ -456,6 +456,13 @@ public class PlannerTest extends PlannerTestBase {
   }
 
   @Test
+  public void testPartitionPruning() {
+    TQueryOptions options = defaultQueryOptions();
+    options.setExplain_level(TExplainLevel.EXTENDED);
+    runPlannerTestFile("partition-pruning", options);
+  }
+
+  @Test
   public void testComputeStatsDisableSpill() throws ImpalaException {
     TQueryCtx queryCtx = TestUtils.createQueryContext(Catalog.DEFAULT_DB,
         System.getProperty("user.name"));

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/922ee703/testdata/bin/compute-table-stats.sh
----------------------------------------------------------------------
diff --git a/testdata/bin/compute-table-stats.sh b/testdata/bin/compute-table-stats.sh
index 54c71a2..f27972c 100755
--- a/testdata/bin/compute-table-stats.sh
+++ b/testdata/bin/compute-table-stats.sh
@@ -32,7 +32,7 @@ COMPUTE_STATS_SCRIPT="${IMPALA_HOME}/tests/util/compute_table_stats.py --impalad
 # Run compute stats over as many of the tables used in the Planner tests as possible.
 ${COMPUTE_STATS_SCRIPT} --db_names=functional\
     --table_names="alltypes,alltypesagg,alltypesaggmultifilesnopart,alltypesaggnonulls,
-    alltypessmall,alltypestiny,jointbl,dimtbl"
+    alltypessmall,alltypestiny,jointbl,dimtbl,stringpartitionkey"
 
 # We cannot load HBase on s3 and isilon yet.
 if [ "${TARGET_FILESYSTEM}" = "hdfs" ]; then

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/922ee703/testdata/datasets/functional/functional_schema_template.sql
----------------------------------------------------------------------
diff --git a/testdata/datasets/functional/functional_schema_template.sql b/testdata/datasets/functional/functional_schema_template.sql
index 6472226..ff7b00d 100644
--- a/testdata/datasets/functional/functional_schema_template.sql
+++ b/testdata/datasets/functional/functional_schema_template.sql
@@ -1247,6 +1247,13 @@ string_col string
 id int
 ---- ALTER
 ALTER TABLE {table_name} ADD IF NOT EXISTS PARTITION (string_col = "partition1");
+ALTER TABLE {table_name} ADD IF NOT EXISTS PARTITION (string_col = "2009-01-01 00:00:00");
+---- LOAD
+SET hive.exec.dynamic.partition.mode=nonstrict;
+SET hive.exec.dynamic.partition=true;
+INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} PARTITION(string_col)
+SELECT id, timestamp_col as string_col from functional.alltypestiny
+WHERE timestamp_col = "2009-01-01 00:00:00";
 ====
 ---- DATASET
 functional

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/922ee703/testdata/datasets/functional/schema_constraints.csv
----------------------------------------------------------------------
diff --git a/testdata/datasets/functional/schema_constraints.csv b/testdata/datasets/functional/schema_constraints.csv
index bb3487f..a3566c4 100644
--- a/testdata/datasets/functional/schema_constraints.csv
+++ b/testdata/datasets/functional/schema_constraints.csv
@@ -11,6 +11,7 @@ table_name:hbasealltypeserror, constraint:restrict_to, table_format:hbase/none/n
 table_name:hbasealltypeserrornonulls, constraint:restrict_to, table_format:hbase/none/none
 
 table_name:alltypesinsert, constraint:restrict_to, table_format:text/none/none
+table_name:stringpartitionkey, constraint:restrict_to, table_format:text/none/none
 table_name:alltypesnopart_insert, constraint:restrict_to, table_format:text/none/none
 table_name:insert_overwrite_nopart, constraint:restrict_to, table_format:text/none/none
 table_name:insert_overwrite_partitioned, constraint:restrict_to, table_format:text/none/none

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/922ee703/testdata/workloads/functional-planner/queries/PlannerTest/partition-pruning.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/partition-pruning.test b/testdata/workloads/functional-planner/queries/PlannerTest/partition-pruning.test
new file mode 100644
index 0000000..972ae6e
--- /dev/null
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/partition-pruning.test
@@ -0,0 +1,18 @@
+# IMPALA-5336: Test partition pruning when the string partition col is implicitly cast
+# to TIMESTAMP
+select * from functional.stringpartitionkey
+where string_col=cast("2009-01-01 00:00:00" as timestamp);
+---- PLAN
+F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+|  Per-Host Resources: mem-estimate=32.00MB mem-reservation=0B
+PLAN-ROOT SINK
+|  mem-estimate=0B mem-reservation=0B
+|
+00:SCAN HDFS [functional.stringpartitionkey]
+   partitions=1/2 files=1 size=2B
+   stats-rows=1 extrapolated-rows=disabled
+   table stats: rows=1 size=2B
+   column stats: all
+   mem-estimate=32.00MB mem-reservation=0B
+   tuple-ids=0 row-size=20B cardinality=1
+====