You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2014/10/18 02:02:12 UTC
svn commit: r1632698 - in /hive/branches/branch-0.14/ql/src:
java/org/apache/hadoop/hive/ql/parse/ test/queries/clientpositive/
test/results/clientpositive/ test/results/clientpositive/tez/
Author: sershe
Date: Sat Oct 18 00:02:11 2014
New Revision: 1632698
URL: http://svn.apache.org/r1632698
Log:
HIVE-8462 : CBO duplicates columns (with HIVE-8511 addendum) (Sergey Shelukhin, reviewed by Ashutosh Chauhan)
Modified:
hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
hive/branches/branch-0.14/ql/src/test/queries/clientpositive/cbo_correctness.q
hive/branches/branch-0.14/ql/src/test/results/clientpositive/cbo_correctness.q.out
hive/branches/branch-0.14/ql/src/test/results/clientpositive/tez/cbo_correctness.q.out
Modified: hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1632698&r1=1632697&r2=1632698&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Sat Oct 18 00:02:11 2014
@@ -2705,7 +2705,7 @@ public class SemanticAnalyzer extends Ba
@SuppressWarnings("nls")
private Integer genColListRegex(String colRegex, String tabAlias,
- ASTNode sel, ArrayList<ExprNodeDesc> col_list,
+ ASTNode sel, ArrayList<ExprNodeDesc> col_list, HashSet<ColumnInfo> excludeCols,
RowResolver input, Integer pos, RowResolver output, List<String> aliases)
throws SemanticException {
@@ -2747,6 +2747,9 @@ public class SemanticAnalyzer extends Ba
// from the input schema
for (Map.Entry<String, ColumnInfo> entry : fMap.entrySet()) {
ColumnInfo colInfo = entry.getValue();
+ if (excludeCols != null && excludeCols.contains(colInfo)) {
+ continue; // This was added during plan generation.
+ }
String name = colInfo.getInternalName();
String[] tmp = input.reverseLookup(name);
@@ -3426,7 +3429,7 @@ public class SemanticAnalyzer extends Ba
}
if (isUDTF && (selectStar = udtfExprType == HiveParser.TOK_FUNCTIONSTAR)) {
genColListRegex(".*", null, (ASTNode) udtfExpr.getChild(0),
- col_list, inputRR, pos, out_rwsch, qb.getAliases());
+ col_list, null, inputRR, pos, out_rwsch, qb.getAliases());
}
}
@@ -3548,7 +3551,7 @@ public class SemanticAnalyzer extends Ba
if (expr.getType() == HiveParser.TOK_ALLCOLREF) {
pos = genColListRegex(".*", expr.getChildCount() == 0 ? null
: getUnescapedName((ASTNode) expr.getChild(0)).toLowerCase(),
- expr, col_list, inputRR, pos, out_rwsch, qb.getAliases());
+ expr, col_list, null, inputRR, pos, out_rwsch, qb.getAliases());
selectStar = true;
} else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL && !hasAsClause
&& !inputRR.getIsExprResolver()
@@ -3557,7 +3560,7 @@ public class SemanticAnalyzer extends Ba
// This can only happen without AS clause
// We don't allow this for ExprResolver - the Group By case
pos = genColListRegex(unescapeIdentifier(expr.getChild(0).getText()),
- null, expr, col_list, inputRR, pos, out_rwsch, qb.getAliases());
+ null, expr, col_list, null, inputRR, pos, out_rwsch, qb.getAliases());
} else if (expr.getType() == HiveParser.DOT
&& expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL
&& inputRR.hasTableAlias(unescapeIdentifier(expr.getChild(0)
@@ -3568,9 +3571,8 @@ public class SemanticAnalyzer extends Ba
// This can only happen without AS clause
// We don't allow this for ExprResolver - the Group By case
pos = genColListRegex(unescapeIdentifier(expr.getChild(1).getText()),
- unescapeIdentifier(expr.getChild(0).getChild(0).getText()
- .toLowerCase()), expr, col_list, inputRR, pos, out_rwsch,
- qb.getAliases());
+ unescapeIdentifier(expr.getChild(0).getChild(0).getText().toLowerCase()),
+ expr, col_list, null, inputRR, pos, out_rwsch, qb.getAliases());
} else {
// Case when this is an expression
TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR);
@@ -13657,50 +13659,46 @@ public class SemanticAnalyzer extends Ba
return new Pair(w, wHiveRetType);
}
- private RelNode genSelectForWindowing(QB qb, RelNode srcRel) throws SemanticException {
- RelNode selOpForWindow = null;
+ private RelNode genSelectForWindowing(
+ QB qb, RelNode srcRel, HashSet<ColumnInfo> newColumns) throws SemanticException {
QBParseInfo qbp = getQBParseInfo(qb);
WindowingSpec wSpec = (!qb.getAllWindowingSpecs().isEmpty()) ? qb.getAllWindowingSpecs()
.values().iterator().next() : null;
+ if (wSpec == null) return null;
+ // 1. Get valid Window Function Spec
+ wSpec.validateAndMakeEffective();
+ List<WindowExpressionSpec> windowExpressions = wSpec.getWindowExpressions();
+ if (windowExpressions == null || windowExpressions.isEmpty()) return null;
- if (wSpec != null) {
- // 1. Get valid Window Function Spec
- wSpec.validateAndMakeEffective();
- List<WindowExpressionSpec> windowExpressions = wSpec.getWindowExpressions();
-
- if (windowExpressions != null && !windowExpressions.isEmpty()) {
- RowResolver inputRR = this.relToHiveRR.get(srcRel);
- // 2. Get RexNodes for original Projections from below
- List<RexNode> projsForWindowSelOp = new ArrayList<RexNode>(
- HiveOptiqUtil.getProjsFromBelowAsInputRef(srcRel));
-
- // 3. Construct new Row Resolver with everything from below.
- RowResolver out_rwsch = new RowResolver();
- RowResolver.add(out_rwsch, inputRR, 0);
-
- // 4. Walk through Window Expressions & Construct RexNodes for those,
- // Update out_rwsch
- for (WindowExpressionSpec wExprSpec : windowExpressions) {
- if (out_rwsch.getExpression(wExprSpec.getExpression()) == null) {
- Pair<RexNode, TypeInfo> wtp = genWindowingProj(qb, wExprSpec, srcRel);
- projsForWindowSelOp.add(wtp.getKey());
-
- // 6.2.2 Update Output Row Schema
- ColumnInfo oColInfo = new ColumnInfo(
- getColumnInternalName(projsForWindowSelOp.size()), wtp.getValue(), null, false);
- if (false) {
- out_rwsch.put(null, wExprSpec.getAlias(), oColInfo);
- } else {
- out_rwsch.putExpression(wExprSpec.getExpression(), oColInfo);
- }
- }
- }
+ RowResolver inputRR = this.relToHiveRR.get(srcRel);
+ // 2. Get RexNodes for original Projections from below
+ List<RexNode> projsForWindowSelOp = new ArrayList<RexNode>(
+ HiveOptiqUtil.getProjsFromBelowAsInputRef(srcRel));
- selOpForWindow = genSelectRelNode(projsForWindowSelOp, out_rwsch, srcRel);
+ // 3. Construct new Row Resolver with everything from below.
+ RowResolver out_rwsch = new RowResolver();
+ RowResolver.add(out_rwsch, inputRR, 0);
+
+ // 4. Walk through Window Expressions & Construct RexNodes for those,
+ // Update out_rwsch
+ for (WindowExpressionSpec wExprSpec : windowExpressions) {
+ if (out_rwsch.getExpression(wExprSpec.getExpression()) == null) {
+ Pair<RexNode, TypeInfo> wtp = genWindowingProj(qb, wExprSpec, srcRel);
+ projsForWindowSelOp.add(wtp.getKey());
+
+ // 6.2.2 Update Output Row Schema
+ ColumnInfo oColInfo = new ColumnInfo(
+ getColumnInternalName(projsForWindowSelOp.size()), wtp.getValue(), null, false);
+ if (false) {
+ out_rwsch.put(null, wExprSpec.getAlias(), oColInfo);
+ } else {
+ out_rwsch.putExpression(wExprSpec.getExpression(), oColInfo);
+ }
+ newColumns.add(oColInfo);
}
}
- return selOpForWindow;
+ return genSelectRelNode(projsForWindowSelOp, out_rwsch, srcRel);
}
private RelNode genSelectRelNode(List<RexNode> optiqColLst, RowResolver out_rwsch,
@@ -13789,9 +13787,10 @@ public class SemanticAnalyzer extends Ba
* @throws SemanticException
*/
private RelNode genSelectLogicalPlan(QB qb, RelNode srcRel) throws SemanticException {
-
// 0. Generate a Select Node for Windowing
- RelNode selForWindow = genSelectForWindowing(qb, srcRel);
+ // Exclude the newly-generated select columns from */etc. resolution.
+ HashSet<ColumnInfo> excludedColumns = new HashSet<ColumnInfo>();
+ RelNode selForWindow = genSelectForWindowing(qb, srcRel, excludedColumns);
srcRel = (selForWindow == null) ? srcRel : selForWindow;
boolean subQuery;
@@ -13882,7 +13881,8 @@ public class SemanticAnalyzer extends Ba
if (expr.getType() == HiveParser.TOK_ALLCOLREF) {
pos = genColListRegex(".*",
expr.getChildCount() == 0 ? null : getUnescapedName((ASTNode) expr.getChild(0))
- .toLowerCase(), expr, col_list, inputRR, pos, out_rwsch, tabAliasesForAllProjs);
+ .toLowerCase(), expr, col_list, excludedColumns, inputRR, pos, out_rwsch,
+ tabAliasesForAllProjs);
selectStar = true;
} else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL && !hasAsClause
&& !inputRR.getIsExprResolver()
@@ -13891,7 +13891,7 @@ public class SemanticAnalyzer extends Ba
// This can only happen without AS clause
// We don't allow this for ExprResolver - the Group By case
pos = genColListRegex(unescapeIdentifier(expr.getChild(0).getText()), null, expr,
- col_list, inputRR, pos, out_rwsch, tabAliasesForAllProjs);
+ col_list, excludedColumns, inputRR, pos, out_rwsch, tabAliasesForAllProjs);
} else if (expr.getType() == HiveParser.DOT
&& expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL
&& inputRR.hasTableAlias(unescapeIdentifier(expr.getChild(0).getChild(0).getText()
@@ -13902,7 +13902,7 @@ public class SemanticAnalyzer extends Ba
// We don't allow this for ExprResolver - the Group By case
pos = genColListRegex(unescapeIdentifier(expr.getChild(1).getText()),
unescapeIdentifier(expr.getChild(0).getChild(0).getText().toLowerCase()), expr,
- col_list, inputRR, pos, out_rwsch, tabAliasesForAllProjs);
+ col_list, excludedColumns, inputRR, pos, out_rwsch, tabAliasesForAllProjs);
} else if (expr.toStringTree().contains("TOK_FUNCTIONDI") && !(srcRel instanceof HiveAggregateRel)) {
// Likely a malformed query eg, select hash(distinct c1) from t1;
throw new OptiqSemanticException("Distinct without an aggreggation.");
Modified: hive/branches/branch-0.14/ql/src/test/queries/clientpositive/cbo_correctness.q
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/test/queries/clientpositive/cbo_correctness.q?rev=1632698&r1=1632697&r2=1632698&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/test/queries/clientpositive/cbo_correctness.q (original)
+++ hive/branches/branch-0.14/ql/src/test/queries/clientpositive/cbo_correctness.q Sat Oct 18 00:02:11 2014
@@ -482,3 +482,7 @@ select unionsrc.key, count(1) FROM (sele
UNION ALL
select 'avg' as key, avg(c_int) as value from t3 s3) unionsrc group by unionsrc.key order by unionsrc.key;
+-- Windowing
+select *, rank() over(partition by key order by value) as rr from src1;
+
+select *, rank() over(partition by key order by value) from src1;
\ No newline at end of file
Modified: hive/branches/branch-0.14/ql/src/test/results/clientpositive/cbo_correctness.q.out
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/test/results/clientpositive/cbo_correctness.q.out?rev=1632698&r1=1632697&r2=1632698&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/test/results/clientpositive/cbo_correctness.q.out (original)
+++ hive/branches/branch-0.14/ql/src/test/results/clientpositive/cbo_correctness.q.out Sat Oct 18 00:02:11 2014
@@ -19037,3 +19037,71 @@ POSTHOOK: Input: default@t3
avg 1
max 1
min 1
+PREHOOK: query: -- Windowing
+select *, rank() over(partition by key order by value) as rr from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: -- Windowing
+select *, rank() over(partition by key order by value) as rr from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+ 1
+ 1
+ 1
+ 1
+ val_165 5
+ val_193 6
+ val_265 7
+ val_27 8
+ val_409 9
+ val_484 10
+128 1
+146 val_146 1
+150 val_150 1
+213 val_213 1
+224 1
+238 val_238 1
+255 val_255 1
+273 val_273 1
+278 val_278 1
+311 val_311 1
+369 1
+401 val_401 1
+406 val_406 1
+66 val_66 1
+98 val_98 1
+PREHOOK: query: select *, rank() over(partition by key order by value) from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select *, rank() over(partition by key order by value) from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+ 1
+ 1
+ 1
+ 1
+ val_165 5
+ val_193 6
+ val_265 7
+ val_27 8
+ val_409 9
+ val_484 10
+128 1
+146 val_146 1
+150 val_150 1
+213 val_213 1
+224 1
+238 val_238 1
+255 val_255 1
+273 val_273 1
+278 val_278 1
+311 val_311 1
+369 1
+401 val_401 1
+406 val_406 1
+66 val_66 1
+98 val_98 1
Modified: hive/branches/branch-0.14/ql/src/test/results/clientpositive/tez/cbo_correctness.q.out
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/test/results/clientpositive/tez/cbo_correctness.q.out?rev=1632698&r1=1632697&r2=1632698&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/test/results/clientpositive/tez/cbo_correctness.q.out (original)
+++ hive/branches/branch-0.14/ql/src/test/results/clientpositive/tez/cbo_correctness.q.out Sat Oct 18 00:02:11 2014
@@ -19037,3 +19037,71 @@ POSTHOOK: Input: default@t3
avg 1
max 1
min 1
+PREHOOK: query: -- Windowing
+select *, rank() over(partition by key order by value) as rr from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: -- Windowing
+select *, rank() over(partition by key order by value) as rr from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+ 1
+ 1
+ 1
+ 1
+ val_165 5
+ val_193 6
+ val_265 7
+ val_27 8
+ val_409 9
+ val_484 10
+128 1
+146 val_146 1
+150 val_150 1
+213 val_213 1
+224 1
+238 val_238 1
+255 val_255 1
+273 val_273 1
+278 val_278 1
+311 val_311 1
+369 1
+401 val_401 1
+406 val_406 1
+66 val_66 1
+98 val_98 1
+PREHOOK: query: select *, rank() over(partition by key order by value) from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select *, rank() over(partition by key order by value) from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+ 1
+ 1
+ 1
+ 1
+ val_165 5
+ val_193 6
+ val_265 7
+ val_27 8
+ val_409 9
+ val_484 10
+128 1
+146 val_146 1
+150 val_150 1
+213 val_213 1
+224 1
+238 val_238 1
+255 val_255 1
+273 val_273 1
+278 val_278 1
+311 val_311 1
+369 1
+401 val_401 1
+406 val_406 1
+66 val_66 1
+98 val_98 1