You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by jn...@apache.org on 2015/01/16 00:22:26 UTC

drill git commit: DRILL-1889: Fix star column prefix and subsume logic, so that query planner handle single table * query properly.

Repository: drill
Updated Branches:
  refs/heads/master 937802814 -> 5a280666e


DRILL-1889:  Fix star column prefix and subsume logic, so that query planner handle single table * query properly.

Code revision based on review comment.


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/5a280666
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/5a280666
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/5a280666

Branch: refs/heads/master
Commit: 5a280666ecdb236b52de73f57a504bb521e3ea89
Parents: 9378028
Author: Jinfeng Ni <jn...@maprtech.com>
Authored: Mon Jan 12 21:16:47 2015 -0800
Committer: Jinfeng Ni <jn...@maprtech.com>
Committed: Thu Jan 15 15:19:58 2015 -0800

----------------------------------------------------------------------
 .../drill/exec/planner/StarColumnHelper.java    | 25 +++++--
 .../planner/common/DrillProjectRelBase.java     | 27 +++----
 .../physical/visitor/StarColumnConverter.java   | 75 ++++++++++++--------
 .../java/org/apache/drill/TestStarQueries.java  |  8 +++
 4 files changed, 86 insertions(+), 49 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/5a280666/exec/java-exec/src/main/java/org/apache/drill/exec/planner/StarColumnHelper.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/StarColumnHelper.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/StarColumnHelper.java
index 9beef39..b9dfda3 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/StarColumnHelper.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/StarColumnHelper.java
@@ -19,6 +19,7 @@
 package org.apache.drill.exec.planner;
 
 import java.util.List;
+import java.util.Map;
 import java.util.Set;
 
 import org.eigenbase.reltype.RelDataType;
@@ -32,6 +33,10 @@ public class StarColumnHelper {
   public final static String PREFIXED_STAR_COLUMN = PREFIX_DELIMITER + STAR_COLUMN;
 
   public static boolean containsStarColumn(RelDataType type) {
+    if (! type.isStruct()) {
+      return false;
+    }
+
     List<String> fieldNames = type.getFieldNames();
 
     for (String s : fieldNames) {
@@ -55,6 +60,14 @@ public class StarColumnHelper {
     return isPrefixedStarColumn(fieldName) || isNonPrefixedStarColumn(fieldName);
   }
 
+  // Expression in some sense is similar to regular columns. Expression (i.e. C1 + C2 + 10) is not
+  // associated with an alias, the project will have (C1 + C2 + 10) --> f1, column "f1" could be
+  // viewed as a regular column, and does not require prefix. If user put an alias, then,
+  // the project will have (C1 + C2 + 10) -> alias.
+  public static boolean isRegularColumnOrExp(String fieldName) {
+    return ! isStarColumn(fieldName);
+  }
+
   public static String extractStarColumnPrefix(String fieldName) {
 
     assert (isPrefixedStarColumn(fieldName));
@@ -71,12 +84,14 @@ public class StarColumnHelper {
   }
 
   // Given a set of prefixes, check if a regular column is subsumed by any of the prefixed star column in the set.
-  public static boolean subsumeRegColumn(Set<String> prefixes, String fieldName) {
-    if (isPrefixedStarColumn(fieldName)) {
-      return false;  // only applies to regular column.
-    }
+  public static boolean subsumeColumn(Map<String, String> prefixMap, String fieldName) {
+    String prefix = extractColumnPrefix(fieldName);
 
-    return prefixes.contains(extractColumnPrefix(fieldName));
+    if (isRegularColumnOrExp(fieldName)) {
+      return false;  // regular column or expression is not subsumed by any star column.
+    } else {
+      return prefixMap.containsKey(prefix) && ! fieldName.equals(prefixMap.get(prefix)); // t1*0 is subsumed by t1*.
+    }
   }
 
 }

http://git-wip-us.apache.org/repos/asf/drill/blob/5a280666/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillProjectRelBase.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillProjectRelBase.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillProjectRelBase.java
index f440c29..7cf98cd 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillProjectRelBase.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillProjectRelBase.java
@@ -17,6 +17,7 @@
  */
 package org.apache.drill.exec.planner.common;
 
+import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 
@@ -71,27 +72,27 @@ public abstract class DrillProjectRelBase extends ProjectRelBase implements Dril
     return Pair.zip(exps, getRowType().getFieldNames());
   }
 
-  // By default, the project will not allow duplicate columns, caused by expanding from * column.
-  // For example, if we have T1_*, T1_Col1, T1_Col2, Col1 and Col2 will have two copies if we expand
-  // * into a list of regular columns.  For the intermediate project, the duplicate columns are not
-  // necessary; it will impact performance.
   protected List<NamedExpression> getProjectExpressions(DrillParseContext context) {
     List<NamedExpression> expressions = Lists.newArrayList();
 
-    HashSet<String> starColPrefixes = new HashSet<String>();
+    HashMap<String, String> starColPrefixes = new HashMap<String, String>();
 
-    // To remove duplicate columns caused by expanding from * column, we'll keep track of
-    // all the prefix in the project expressions. If a regular column C1 have the same prefix, that
-    // regular column is not included in the project expression, since at execution time, * will be
-    // expanded into a list of column, including column C1.
-    for (String fieldName : getRowType().getFieldNames()) {
-      if (StarColumnHelper.isPrefixedStarColumn(fieldName)) {
-        starColPrefixes.add(StarColumnHelper.extractStarColumnPrefix(fieldName));
+    // T1.* will subsume T1.*0, but will not subsume any regular column/expression.
+    // Select *, col1, *, col2 : the intermediate will output one set of regular columns expanded from star with prefix,
+    // plus col1 and col2 without prefix.
+    // This will allow us to differentiate the regular expanded from *, and the regular column referenced in the query.
+    for (Pair<RexNode, String> pair : projects()) {
+      if (StarColumnHelper.isPrefixedStarColumn(pair.right)) {
+        String prefix = StarColumnHelper.extractStarColumnPrefix(pair.right);
+
+        if (! starColPrefixes.containsKey(prefix)) {
+          starColPrefixes.put(prefix, pair.right);
+        }
       }
     }
 
     for (Pair<RexNode, String> pair : projects()) {
-      if (! StarColumnHelper.subsumeRegColumn(starColPrefixes, pair.right)) {
+      if (! StarColumnHelper.subsumeColumn(starColPrefixes, pair.right)) {
         LogicalExpression expr = DrillOptiq.toDrill(context, getChild(), pair.left);
         expressions.add(new NamedExpression(expr, FieldReference.getWithQuotedRef(pair.right)));
       }

http://git-wip-us.apache.org/repos/asf/drill/blob/5a280666/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/visitor/StarColumnConverter.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/visitor/StarColumnConverter.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/visitor/StarColumnConverter.java
index 159099d..1511e49 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/visitor/StarColumnConverter.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/visitor/StarColumnConverter.java
@@ -22,6 +22,7 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.concurrent.atomic.AtomicLong;
 
+import com.google.common.base.Preconditions;
 import org.apache.drill.exec.planner.StarColumnHelper;
 import org.apache.drill.exec.planner.physical.JoinPrel;
 import org.apache.drill.exec.planner.physical.Prel;
@@ -45,18 +46,24 @@ public class StarColumnConverter extends BasePrelVisitor<Prel, boolean[], Runtim
   private static final AtomicLong tableNumber = new AtomicLong(0);
 
   public static Prel insertRenameProject(Prel root, RelDataType origRowType) {
-    // Insert top project to do rename only when : 1) there is a join
-    // 2) there is a SCAN with * column.  We pass two boolean to keep track of
-    // these two conditions.
-    boolean [] renamedForStar = new boolean [2];
-    renamedForStar[0] = false;
-    renamedForStar[1] = false;
+    // Prefixing columns for columns expanded from star column :
+    // Insert one project under screen (PUS) to remove prefix, and one project above scan (PAS) to add prefix.
+    // PUS AND PAS are required, when
+    //   Any non-SCAN prel produces regular column / expression AND star column,
+    //   or multiple star columns.
+    //   This is because we have to use prefix to distinguish columns expanded
+    //   from star column, from those regular column referenced in the query.
+
+    // We use an array of boolean to keep track this condition.
+    boolean [] prefixedForStar = new boolean [1];
+    prefixedForStar[0] = false;
 
     //root should be screen / writer : no need to rename for the root.
+    Prel child = ((Prel) root.getInput(0)).accept(INSTANCE, prefixedForStar);
 
-    Prel child = ((Prel) root.getInput(0)).accept(INSTANCE, renamedForStar);
+    int fieldCount = root.getRowType().isStruct()? root.getRowType().getFieldCount():1;
 
-    if (renamedForStar[0] && renamedForStar[1]) {
+    if (prefixedForStar[0]) {
       List<RexNode> exprs = Lists.newArrayList();
       for (int i = 0; i < origRowType.getFieldCount(); i++) {
         RexNode expr = child.getCluster().getRexBuilder().makeInputRef(origRowType.getFieldList().get(i).getType(), i);
@@ -65,25 +72,33 @@ public class StarColumnConverter extends BasePrelVisitor<Prel, boolean[], Runtim
 
       RelDataType newRowType = RexUtil.createStructType(child.getCluster().getTypeFactory(), exprs, origRowType.getFieldNames());
 
-      // Insert a top project which allows duplicate columns.
-      child = new ProjectAllowDupPrel(child.getCluster(), child.getTraitSet(), child, exprs, newRowType);
+      // Insert PUS : remove the prefix and keep the orignal field name.
+      if (fieldCount > 1) { // // no point in allowing duplicates if we only have one column
+        child = new ProjectAllowDupPrel(child.getCluster(), child.getTraitSet(), child, exprs, newRowType);
+      } else {
+        child = new ProjectPrel(child.getCluster(), child.getTraitSet(), child, exprs, newRowType);
+      }
 
       List<RelNode> children = Lists.newArrayList();
       children.add( child);
       return (Prel) root.copy(root.getTraitSet(), children);
 
-    }else{
+    } else {
       return root;
     }
 
   }
 
-
   @Override
-  public Prel visitPrel(Prel prel, boolean [] renamedForStar) throws RuntimeException {
+  public Prel visitPrel(Prel prel, boolean [] prefixedForStar) throws RuntimeException {
+    // there exists other expression, in addition to a star column: require prefixRename.
+    if (StarColumnHelper.containsStarColumn(prel.getRowType()) && prel.getRowType().getFieldNames().size() > 1) {
+      prefixedForStar[0] = true;
+    }
+
     List<RelNode> children = Lists.newArrayList();
     for (Prel child : prel) {
-      child = child.accept(this, renamedForStar);
+      child = child.accept(this, prefixedForStar);
       children.add(child);
     }
 
@@ -91,11 +106,14 @@ public class StarColumnConverter extends BasePrelVisitor<Prel, boolean[], Runtim
     // when the project expression is RexInPutRef. This is necessary since Optiq may use
     // an arbitrary name for the project's field name.
     if (prel instanceof ProjectPrel) {
+
+      ProjectPrel proj = (ProjectPrel) prel;
+
       RelNode child = children.get(0);
 
       List<String> fieldNames = Lists.newArrayList();
 
-      for (Pair<String, RexNode> pair : Pair.zip(prel.getRowType().getFieldNames(), ((ProjectPrel) prel).getProjects())) {
+      for (Pair<String, RexNode> pair : Pair.zip(prel.getRowType().getFieldNames(), proj.getProjects())) {
         if (pair.right instanceof RexInputRef) {
           String name = child.getRowType().getFieldNames().get(((RexInputRef) pair.right).getIndex());
           fieldNames.add(name);
@@ -107,27 +125,17 @@ public class StarColumnConverter extends BasePrelVisitor<Prel, boolean[], Runtim
       // Make sure the field names are unique : Optiq does not allow duplicate field names in a rowType.
       fieldNames = makeUniqueNames(fieldNames);
 
-      RelDataType rowType = RexUtil.createStructType(prel.getCluster().getTypeFactory(), ((ProjectPrel) prel).getProjects(), fieldNames);
+      RelDataType rowType = RexUtil.createStructType(prel.getCluster().getTypeFactory(), proj.getProjects(), fieldNames);
 
-      return (Prel) new ProjectPrel(prel.getCluster(), prel.getTraitSet(), children.get(0), ((ProjectPrel) prel).getProjects(), rowType);
+      return (Prel) proj.copy(proj.getTraitSet(),children.get(0), proj.getProjects(), rowType);
     } else {
       return (Prel) prel.copy(prel.getTraitSet(), children);
     }
   }
 
-
   @Override
-  public Prel visitJoin(JoinPrel prel, boolean [] renamedForStar) throws RuntimeException {
-    renamedForStar[0] = true;    // indicate there is a join, which may require top rename projet operator.
-    return visitPrel(prel, renamedForStar);
-  }
-
-
-  @Override
-  public Prel visitScan(ScanPrel scanPrel, boolean [] renamedForStar) throws RuntimeException {
-    if (StarColumnHelper.containsStarColumn(scanPrel.getRowType()) && renamedForStar[0] ) {
-
-      renamedForStar[1] = true;  // indicate there is * for a SCAN operator.
+  public Prel visitScan(ScanPrel scanPrel, boolean [] prefixedForStar) throws RuntimeException {
+    if (StarColumnHelper.containsStarColumn(scanPrel.getRowType()) && prefixedForStar[0] ) {
 
       List<RexNode> exprs = Lists.newArrayList();
 
@@ -141,15 +149,20 @@ public class StarColumnConverter extends BasePrelVisitor<Prel, boolean[], Runtim
       long tableId = tableNumber.getAndIncrement();
 
       for (String name : scanPrel.getRowType().getFieldNames()) {
-        fieldNames.add("T" +  tableId + StarColumnHelper.PREFIX_DELIMITER + name);
+        if (StarColumnHelper.isNonPrefixedStarColumn(name)) {
+          fieldNames.add("T" +  tableId + StarColumnHelper.PREFIX_DELIMITER + name);  // Add prefix to * column.
+        } else {
+          fieldNames.add(name);  // Keep regular column as it is.
+        }
       }
       RelDataType rowType = RexUtil.createStructType(scanPrel.getCluster().getTypeFactory(), exprs, fieldNames);
 
+      // insert a PAS.
       ProjectPrel proj = new ProjectPrel(scanPrel.getCluster(), scanPrel.getTraitSet(), scanPrel, exprs, rowType);
 
       return proj;
     } else {
-      return visitPrel(scanPrel, renamedForStar);
+      return visitPrel(scanPrel, prefixedForStar);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/drill/blob/5a280666/exec/java-exec/src/test/java/org/apache/drill/TestStarQueries.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/TestStarQueries.java b/exec/java-exec/src/test/java/org/apache/drill/TestStarQueries.java
index 9ce2d8c..ee55c5c 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/TestStarQueries.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/TestStarQueries.java
@@ -219,4 +219,12 @@ public class TestStarQueries extends BaseTestQuery{
         "order by cnt;" );
   }
 
+  @Test  // DRILL-1889
+  public void testStarWithOtherExpression() throws Exception {
+    test("select *  from cp.`tpch/nation.parquet` order by substr(n_name, 2, 5) limit 3");
+    test("select *, n_nationkey + 5 from cp.`tpch/nation.parquet` limit 3");
+    test("select *  from cp.`tpch/nation.parquet` where n_nationkey + 5 > 10 limit 3");
+    test("select * from cp.`tpch/nation.parquet` order by random()");
+  }
+
 }