You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by jn...@apache.org on 2015/01/16 00:22:26 UTC
drill git commit: DRILL-1889: Fix star column prefix and subsume
logic, so that query planner handle single table * query properly.
Repository: drill
Updated Branches:
refs/heads/master 937802814 -> 5a280666e
DRILL-1889: Fix star column prefix and subsume logic, so that query planner handle single table * query properly.
Code revision based on review comment.
Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/5a280666
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/5a280666
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/5a280666
Branch: refs/heads/master
Commit: 5a280666ecdb236b52de73f57a504bb521e3ea89
Parents: 9378028
Author: Jinfeng Ni <jn...@maprtech.com>
Authored: Mon Jan 12 21:16:47 2015 -0800
Committer: Jinfeng Ni <jn...@maprtech.com>
Committed: Thu Jan 15 15:19:58 2015 -0800
----------------------------------------------------------------------
.../drill/exec/planner/StarColumnHelper.java | 25 +++++--
.../planner/common/DrillProjectRelBase.java | 27 +++----
.../physical/visitor/StarColumnConverter.java | 75 ++++++++++++--------
.../java/org/apache/drill/TestStarQueries.java | 8 +++
4 files changed, 86 insertions(+), 49 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/drill/blob/5a280666/exec/java-exec/src/main/java/org/apache/drill/exec/planner/StarColumnHelper.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/StarColumnHelper.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/StarColumnHelper.java
index 9beef39..b9dfda3 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/StarColumnHelper.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/StarColumnHelper.java
@@ -19,6 +19,7 @@
package org.apache.drill.exec.planner;
import java.util.List;
+import java.util.Map;
import java.util.Set;
import org.eigenbase.reltype.RelDataType;
@@ -32,6 +33,10 @@ public class StarColumnHelper {
public final static String PREFIXED_STAR_COLUMN = PREFIX_DELIMITER + STAR_COLUMN;
public static boolean containsStarColumn(RelDataType type) {
+ if (! type.isStruct()) {
+ return false;
+ }
+
List<String> fieldNames = type.getFieldNames();
for (String s : fieldNames) {
@@ -55,6 +60,14 @@ public class StarColumnHelper {
return isPrefixedStarColumn(fieldName) || isNonPrefixedStarColumn(fieldName);
}
+ // Expression in some sense is similar to regular columns. Expression (i.e. C1 + C2 + 10) is not
+ // associated with an alias, the project will have (C1 + C2 + 10) --> f1, column "f1" could be
+ // viewed as a regular column, and does not require prefix. If user put an alias, then,
+ // the project will have (C1 + C2 + 10) -> alias.
+ public static boolean isRegularColumnOrExp(String fieldName) {
+ return ! isStarColumn(fieldName);
+ }
+
public static String extractStarColumnPrefix(String fieldName) {
assert (isPrefixedStarColumn(fieldName));
@@ -71,12 +84,14 @@ public class StarColumnHelper {
}
// Given a set of prefixes, check if a regular column is subsumed by any of the prefixed star column in the set.
- public static boolean subsumeRegColumn(Set<String> prefixes, String fieldName) {
- if (isPrefixedStarColumn(fieldName)) {
- return false; // only applies to regular column.
- }
+ public static boolean subsumeColumn(Map<String, String> prefixMap, String fieldName) {
+ String prefix = extractColumnPrefix(fieldName);
- return prefixes.contains(extractColumnPrefix(fieldName));
+ if (isRegularColumnOrExp(fieldName)) {
+ return false; // regular column or expression is not subsumed by any star column.
+ } else {
+ return prefixMap.containsKey(prefix) && ! fieldName.equals(prefixMap.get(prefix)); // t1*0 is subsumed by t1*.
+ }
}
}
http://git-wip-us.apache.org/repos/asf/drill/blob/5a280666/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillProjectRelBase.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillProjectRelBase.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillProjectRelBase.java
index f440c29..7cf98cd 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillProjectRelBase.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillProjectRelBase.java
@@ -17,6 +17,7 @@
*/
package org.apache.drill.exec.planner.common;
+import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
@@ -71,27 +72,27 @@ public abstract class DrillProjectRelBase extends ProjectRelBase implements Dril
return Pair.zip(exps, getRowType().getFieldNames());
}
- // By default, the project will not allow duplicate columns, caused by expanding from * column.
- // For example, if we have T1_*, T1_Col1, T1_Col2, Col1 and Col2 will have two copies if we expand
- // * into a list of regular columns. For the intermediate project, the duplicate columns are not
- // necessary; it will impact performance.
protected List<NamedExpression> getProjectExpressions(DrillParseContext context) {
List<NamedExpression> expressions = Lists.newArrayList();
- HashSet<String> starColPrefixes = new HashSet<String>();
+ HashMap<String, String> starColPrefixes = new HashMap<String, String>();
- // To remove duplicate columns caused by expanding from * column, we'll keep track of
- // all the prefix in the project expressions. If a regular column C1 have the same prefix, that
- // regular column is not included in the project expression, since at execution time, * will be
- // expanded into a list of column, including column C1.
- for (String fieldName : getRowType().getFieldNames()) {
- if (StarColumnHelper.isPrefixedStarColumn(fieldName)) {
- starColPrefixes.add(StarColumnHelper.extractStarColumnPrefix(fieldName));
+ // T1.* will subsume T1.*0, but will not subsume any regular column/expression.
+ // Select *, col1, *, col2 : the intermediate will output one set of regular columns expanded from star with prefix,
+ // plus col1 and col2 without prefix.
+ // This will allow us to differentiate the regular expanded from *, and the regular column referenced in the query.
+ for (Pair<RexNode, String> pair : projects()) {
+ if (StarColumnHelper.isPrefixedStarColumn(pair.right)) {
+ String prefix = StarColumnHelper.extractStarColumnPrefix(pair.right);
+
+ if (! starColPrefixes.containsKey(prefix)) {
+ starColPrefixes.put(prefix, pair.right);
+ }
}
}
for (Pair<RexNode, String> pair : projects()) {
- if (! StarColumnHelper.subsumeRegColumn(starColPrefixes, pair.right)) {
+ if (! StarColumnHelper.subsumeColumn(starColPrefixes, pair.right)) {
LogicalExpression expr = DrillOptiq.toDrill(context, getChild(), pair.left);
expressions.add(new NamedExpression(expr, FieldReference.getWithQuotedRef(pair.right)));
}
http://git-wip-us.apache.org/repos/asf/drill/blob/5a280666/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/visitor/StarColumnConverter.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/visitor/StarColumnConverter.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/visitor/StarColumnConverter.java
index 159099d..1511e49 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/visitor/StarColumnConverter.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/visitor/StarColumnConverter.java
@@ -22,6 +22,7 @@ import java.util.HashSet;
import java.util.List;
import java.util.concurrent.atomic.AtomicLong;
+import com.google.common.base.Preconditions;
import org.apache.drill.exec.planner.StarColumnHelper;
import org.apache.drill.exec.planner.physical.JoinPrel;
import org.apache.drill.exec.planner.physical.Prel;
@@ -45,18 +46,24 @@ public class StarColumnConverter extends BasePrelVisitor<Prel, boolean[], Runtim
private static final AtomicLong tableNumber = new AtomicLong(0);
public static Prel insertRenameProject(Prel root, RelDataType origRowType) {
- // Insert top project to do rename only when : 1) there is a join
- // 2) there is a SCAN with * column. We pass two boolean to keep track of
- // these two conditions.
- boolean [] renamedForStar = new boolean [2];
- renamedForStar[0] = false;
- renamedForStar[1] = false;
+ // Prefixing columns for columns expanded from star column :
+ // Insert one project under screen (PUS) to remove prefix, and one project above scan (PAS) to add prefix.
+ // PUS AND PAS are required, when
+ // Any non-SCAN prel produces regular column / expression AND star column,
+ // or multiple star columns.
+ // This is because we have to use prefix to distinguish columns expanded
+ // from star column, from those regular column referenced in the query.
+
+ // We use an array of boolean to keep track this condition.
+ boolean [] prefixedForStar = new boolean [1];
+ prefixedForStar[0] = false;
//root should be screen / writer : no need to rename for the root.
+ Prel child = ((Prel) root.getInput(0)).accept(INSTANCE, prefixedForStar);
- Prel child = ((Prel) root.getInput(0)).accept(INSTANCE, renamedForStar);
+ int fieldCount = root.getRowType().isStruct()? root.getRowType().getFieldCount():1;
- if (renamedForStar[0] && renamedForStar[1]) {
+ if (prefixedForStar[0]) {
List<RexNode> exprs = Lists.newArrayList();
for (int i = 0; i < origRowType.getFieldCount(); i++) {
RexNode expr = child.getCluster().getRexBuilder().makeInputRef(origRowType.getFieldList().get(i).getType(), i);
@@ -65,25 +72,33 @@ public class StarColumnConverter extends BasePrelVisitor<Prel, boolean[], Runtim
RelDataType newRowType = RexUtil.createStructType(child.getCluster().getTypeFactory(), exprs, origRowType.getFieldNames());
- // Insert a top project which allows duplicate columns.
- child = new ProjectAllowDupPrel(child.getCluster(), child.getTraitSet(), child, exprs, newRowType);
+ // Insert PUS : remove the prefix and keep the orignal field name.
+ if (fieldCount > 1) { // // no point in allowing duplicates if we only have one column
+ child = new ProjectAllowDupPrel(child.getCluster(), child.getTraitSet(), child, exprs, newRowType);
+ } else {
+ child = new ProjectPrel(child.getCluster(), child.getTraitSet(), child, exprs, newRowType);
+ }
List<RelNode> children = Lists.newArrayList();
children.add( child);
return (Prel) root.copy(root.getTraitSet(), children);
- }else{
+ } else {
return root;
}
}
-
@Override
- public Prel visitPrel(Prel prel, boolean [] renamedForStar) throws RuntimeException {
+ public Prel visitPrel(Prel prel, boolean [] prefixedForStar) throws RuntimeException {
+ // there exists other expression, in addition to a star column: require prefixRename.
+ if (StarColumnHelper.containsStarColumn(prel.getRowType()) && prel.getRowType().getFieldNames().size() > 1) {
+ prefixedForStar[0] = true;
+ }
+
List<RelNode> children = Lists.newArrayList();
for (Prel child : prel) {
- child = child.accept(this, renamedForStar);
+ child = child.accept(this, prefixedForStar);
children.add(child);
}
@@ -91,11 +106,14 @@ public class StarColumnConverter extends BasePrelVisitor<Prel, boolean[], Runtim
// when the project expression is RexInPutRef. This is necessary since Optiq may use
// an arbitrary name for the project's field name.
if (prel instanceof ProjectPrel) {
+
+ ProjectPrel proj = (ProjectPrel) prel;
+
RelNode child = children.get(0);
List<String> fieldNames = Lists.newArrayList();
- for (Pair<String, RexNode> pair : Pair.zip(prel.getRowType().getFieldNames(), ((ProjectPrel) prel).getProjects())) {
+ for (Pair<String, RexNode> pair : Pair.zip(prel.getRowType().getFieldNames(), proj.getProjects())) {
if (pair.right instanceof RexInputRef) {
String name = child.getRowType().getFieldNames().get(((RexInputRef) pair.right).getIndex());
fieldNames.add(name);
@@ -107,27 +125,17 @@ public class StarColumnConverter extends BasePrelVisitor<Prel, boolean[], Runtim
// Make sure the field names are unique : Optiq does not allow duplicate field names in a rowType.
fieldNames = makeUniqueNames(fieldNames);
- RelDataType rowType = RexUtil.createStructType(prel.getCluster().getTypeFactory(), ((ProjectPrel) prel).getProjects(), fieldNames);
+ RelDataType rowType = RexUtil.createStructType(prel.getCluster().getTypeFactory(), proj.getProjects(), fieldNames);
- return (Prel) new ProjectPrel(prel.getCluster(), prel.getTraitSet(), children.get(0), ((ProjectPrel) prel).getProjects(), rowType);
+ return (Prel) proj.copy(proj.getTraitSet(),children.get(0), proj.getProjects(), rowType);
} else {
return (Prel) prel.copy(prel.getTraitSet(), children);
}
}
-
@Override
- public Prel visitJoin(JoinPrel prel, boolean [] renamedForStar) throws RuntimeException {
- renamedForStar[0] = true; // indicate there is a join, which may require top rename projet operator.
- return visitPrel(prel, renamedForStar);
- }
-
-
- @Override
- public Prel visitScan(ScanPrel scanPrel, boolean [] renamedForStar) throws RuntimeException {
- if (StarColumnHelper.containsStarColumn(scanPrel.getRowType()) && renamedForStar[0] ) {
-
- renamedForStar[1] = true; // indicate there is * for a SCAN operator.
+ public Prel visitScan(ScanPrel scanPrel, boolean [] prefixedForStar) throws RuntimeException {
+ if (StarColumnHelper.containsStarColumn(scanPrel.getRowType()) && prefixedForStar[0] ) {
List<RexNode> exprs = Lists.newArrayList();
@@ -141,15 +149,20 @@ public class StarColumnConverter extends BasePrelVisitor<Prel, boolean[], Runtim
long tableId = tableNumber.getAndIncrement();
for (String name : scanPrel.getRowType().getFieldNames()) {
- fieldNames.add("T" + tableId + StarColumnHelper.PREFIX_DELIMITER + name);
+ if (StarColumnHelper.isNonPrefixedStarColumn(name)) {
+ fieldNames.add("T" + tableId + StarColumnHelper.PREFIX_DELIMITER + name); // Add prefix to * column.
+ } else {
+ fieldNames.add(name); // Keep regular column as it is.
+ }
}
RelDataType rowType = RexUtil.createStructType(scanPrel.getCluster().getTypeFactory(), exprs, fieldNames);
+ // insert a PAS.
ProjectPrel proj = new ProjectPrel(scanPrel.getCluster(), scanPrel.getTraitSet(), scanPrel, exprs, rowType);
return proj;
} else {
- return visitPrel(scanPrel, renamedForStar);
+ return visitPrel(scanPrel, prefixedForStar);
}
}
http://git-wip-us.apache.org/repos/asf/drill/blob/5a280666/exec/java-exec/src/test/java/org/apache/drill/TestStarQueries.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/TestStarQueries.java b/exec/java-exec/src/test/java/org/apache/drill/TestStarQueries.java
index 9ce2d8c..ee55c5c 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/TestStarQueries.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/TestStarQueries.java
@@ -219,4 +219,12 @@ public class TestStarQueries extends BaseTestQuery{
"order by cnt;" );
}
+ @Test // DRILL-1889
+ public void testStarWithOtherExpression() throws Exception {
+ test("select * from cp.`tpch/nation.parquet` order by substr(n_name, 2, 5) limit 3");
+ test("select *, n_nationkey + 5 from cp.`tpch/nation.parquet` limit 3");
+ test("select * from cp.`tpch/nation.parquet` where n_nationkey + 5 > 10 limit 3");
+ test("select * from cp.`tpch/nation.parquet` order by random()");
+ }
+
}