You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by jn...@apache.org on 2016/08/05 20:29:22 UTC

drill git commit: DRILL-4825: Fix incorrect result issue caused by partition pruning when same table is queried multiple times with different filters in query.

Repository: drill
Updated Branches:
  refs/heads/master f476eb59f -> 18e16610a


DRILL-4825: Fix incorrect result issue caused by partition pruning when same table is queried multiple times with different filters in query.

1) Introduce DirPrunedEnumerableTableScan which will take file selection as part of digest.
2) When directory-based pruning happens, create instance of DirPrunedEnumerableTableScan.


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/18e16610
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/18e16610
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/18e16610

Branch: refs/heads/master
Commit: 18e16610ab446e1f435db35dd6dce60c0be0d07a
Parents: f476eb5
Author: Jinfeng Ni <jn...@apache.org>
Authored: Thu Aug 4 17:54:30 2016 -0700
Committer: Jinfeng Ni <jn...@apache.org>
Committed: Fri Aug 5 12:00:36 2016 -0700

----------------------------------------------------------------------
 .../planner/FileSystemPartitionDescriptor.java  |  4 +-
 .../logical/DirPrunedEnumerableTableScan.java   | 82 ++++++++++++++++++++
 .../drill/exec/store/dfs/FileSelection.java     | 21 +++++
 .../org/apache/drill/TestPartitionFilter.java   | 45 +++++++++++
 4 files changed, 151 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/18e16610/exec/java-exec/src/main/java/org/apache/drill/exec/planner/FileSystemPartitionDescriptor.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/FileSystemPartitionDescriptor.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/FileSystemPartitionDescriptor.java
index 370fb6e..7796212 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/FileSystemPartitionDescriptor.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/FileSystemPartitionDescriptor.java
@@ -40,6 +40,7 @@ import org.apache.drill.common.expression.SchemaPath;
 import org.apache.drill.common.types.TypeProtos;
 import org.apache.drill.common.types.Types;
 import org.apache.drill.exec.physical.base.FileGroupScan;
+import org.apache.drill.exec.planner.logical.DirPrunedEnumerableTableScan;
 import org.apache.drill.exec.planner.logical.DrillRel;
 import org.apache.drill.exec.planner.logical.DrillScanRel;
 import org.apache.drill.exec.planner.logical.DrillTable;
@@ -257,7 +258,8 @@ public class FileSystemPartitionDescriptor extends AbstractPartitionDescriptor {
             newFormatSelection));
     final RelOptTableImpl newOptTableImpl = RelOptTableImpl.create(t.getRelOptSchema(), t.getRowType(), newTable);
 
-    return EnumerableTableScan.create(oldScan.getCluster(), newOptTableImpl);
+    // return an EnumerableTableScan with fileSelection being part of digest of TableScan node.
+    return DirPrunedEnumerableTableScan.create(oldScan.getCluster(), newOptTableImpl, newFileSelection.toString());
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/drill/blob/18e16610/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DirPrunedEnumerableTableScan.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DirPrunedEnumerableTableScan.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DirPrunedEnumerableTableScan.java
new file mode 100644
index 0000000..af53a1f
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DirPrunedEnumerableTableScan.java
@@ -0,0 +1,82 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.drill.exec.planner.logical;
+
+import com.google.common.base.Supplier;
+import com.google.common.collect.ImmutableList;
+import org.apache.calcite.adapter.enumerable.EnumerableConvention;
+import org.apache.calcite.adapter.enumerable.EnumerableTableScan;
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelOptTable;
+import org.apache.calcite.plan.RelTraitSet;
+import org.apache.calcite.rel.RelCollation;
+import org.apache.calcite.rel.RelCollationTraitDef;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.RelWriter;
+import org.apache.calcite.schema.Table;
+
+import java.util.List;
+
+/**
+ * This class extends from EnumerableTableScan. It puts the file selection string into it's digest.
+ * When directory-based partition pruning applied, file selection could be different for the same
+ * table.
+ */
+public class DirPrunedEnumerableTableScan extends EnumerableTableScan {
+  private final String digestFromSelection;
+
+  public DirPrunedEnumerableTableScan(RelOptCluster cluster, RelTraitSet traitSet,
+      RelOptTable table, Class elementType, String digestFromSelection) {
+    super(cluster, traitSet, table, elementType);
+    this.digestFromSelection = digestFromSelection;
+  }
+
+  @Override
+  public RelNode copy(RelTraitSet traitSet, List<RelNode> inputs) {
+    final Table tbl = this.table.unwrap(Table.class);
+    Class elementType = EnumerableTableScan.deduceElementType(tbl);
+
+    return new DirPrunedEnumerableTableScan(getCluster(), traitSet, table, elementType, digestFromSelection);
+  }
+
+  /** Creates an DirPrunedEnumerableTableScan. */
+  public static EnumerableTableScan create(RelOptCluster cluster,
+      RelOptTable relOptTable, String digestFromSelection) {
+    final Table table = relOptTable.unwrap(Table.class);
+    Class elementType = EnumerableTableScan.deduceElementType(table);
+    final RelTraitSet traitSet =
+        cluster.traitSetOf(EnumerableConvention.INSTANCE)
+            .replaceIfs(RelCollationTraitDef.INSTANCE,
+                new Supplier<List<RelCollation>>() {
+                  public List<RelCollation> get() {
+                    if (table != null) {
+                      return table.getStatistic().getCollations();
+                    }
+                    return ImmutableList.of();
+                  }
+                });
+    return new DirPrunedEnumerableTableScan(cluster, traitSet, relOptTable, elementType, digestFromSelection);
+  }
+
+  @Override
+  public RelWriter explainTerms(RelWriter pw) {
+    return super.explainTerms(pw).item("selection", this.digestFromSelection);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/drill/blob/18e16610/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java
index 8654749..8904c82 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java
@@ -402,4 +402,25 @@ public class FileSelection {
     return cacheFileRoot;
   }
 
+  @Override
+  public String toString() {
+    final StringBuilder sb = new StringBuilder();
+    sb.append("root=" + this.selectionRoot);
+
+    sb.append("files=[");
+    boolean isFirst = true;
+    for (final String file : this.files) {
+      if (isFirst) {
+        isFirst = false;
+        sb.append(file);
+      } else {
+        sb.append(",");
+        sb.append(file);
+      }
+    }
+    sb.append("]");
+
+    return sb.toString();
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/drill/blob/18e16610/exec/java-exec/src/test/java/org/apache/drill/TestPartitionFilter.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/TestPartitionFilter.java b/exec/java-exec/src/test/java/org/apache/drill/TestPartitionFilter.java
index 48ecf21..b1d833b 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/TestPartitionFilter.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/TestPartitionFilter.java
@@ -386,4 +386,49 @@ public class TestPartitionFilter extends PlanTestBase {
     test("alter session set `planner.in_subquery_threshold` = 10");
     testExcludeFilter(query, 4, "Filter", 40);
   }
+
+
+  @Test // DRILL-4825: querying same table with different filter in UNION ALL.
+  public void testPruneSameTableInUnionAll() throws Exception {
+    final String query = String.format("select count(*) as cnt from "
+        + "( select dir0 from dfs_test.`%s/multilevel/parquet` where dir0 in ('1994') union all "
+        + "  select dir0 from dfs_test.`%s/multilevel/parquet` where dir0 in ('1995', '1996') )",
+        TEST_RES_PATH, TEST_RES_PATH);
+
+    String [] excluded = {"Filter"};
+
+    // verify plan that filter is applied in partition pruning.
+    testPlanMatchingPatterns(query, null, excluded);
+
+    // verify we get correct count(*).
+    testBuilder()
+        .sqlQuery(query)
+        .unOrdered()
+        .baselineColumns("cnt")
+        .baselineValues((long)120)
+        .build()
+        .run();
+  }
+
+  @Test // DRILL-4825: querying same table with different filter in Join.
+  public void testPruneSameTableInJoin() throws Exception {
+    final String query = String.format("select *  from "
+            + "( select sum(o_custkey) as x from dfs_test.`%s/multilevel/parquet` where dir0 in ('1994') ) join "
+            + " ( select sum(o_custkey) as y from dfs_test.`%s/multilevel/parquet` where dir0 in ('1995', '1996')) "
+            + " on x = y ",
+        TEST_RES_PATH, TEST_RES_PATH);
+
+    String [] excluded = {"Filter"};
+    // verify plan that filter is applied in partition pruning.
+    testPlanMatchingPatterns(query, null, excluded);
+
+    // verify we get empty result.
+    testBuilder()
+        .sqlQuery(query)
+        .expectsEmptyResultSet()
+        .build()
+        .run();
+
+  }
+
 }