You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2014/10/18 02:02:12 UTC

svn commit: r1632698 - in /hive/branches/branch-0.14/ql/src: java/org/apache/hadoop/hive/ql/parse/ test/queries/clientpositive/ test/results/clientpositive/ test/results/clientpositive/tez/

Author: sershe
Date: Sat Oct 18 00:02:11 2014
New Revision: 1632698

URL: http://svn.apache.org/r1632698
Log:
HIVE-8462 : CBO duplicates columns (with HIVE-8511 addendum) (Sergey Shelukhin, reviewed by Ashutosh Chauhan)

Modified:
    hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    hive/branches/branch-0.14/ql/src/test/queries/clientpositive/cbo_correctness.q
    hive/branches/branch-0.14/ql/src/test/results/clientpositive/cbo_correctness.q.out
    hive/branches/branch-0.14/ql/src/test/results/clientpositive/tez/cbo_correctness.q.out

Modified: hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1632698&r1=1632697&r2=1632698&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Sat Oct 18 00:02:11 2014
@@ -2705,7 +2705,7 @@ public class SemanticAnalyzer extends Ba
 
   @SuppressWarnings("nls")
   private Integer genColListRegex(String colRegex, String tabAlias,
-      ASTNode sel, ArrayList<ExprNodeDesc> col_list,
+      ASTNode sel, ArrayList<ExprNodeDesc> col_list, HashSet<ColumnInfo> excludeCols,
       RowResolver input, Integer pos, RowResolver output, List<String> aliases)
       throws SemanticException {
 
@@ -2747,6 +2747,9 @@ public class SemanticAnalyzer extends Ba
       // from the input schema
       for (Map.Entry<String, ColumnInfo> entry : fMap.entrySet()) {
         ColumnInfo colInfo = entry.getValue();
+        if (excludeCols != null && excludeCols.contains(colInfo)) {
+          continue; // This was added during plan generation.
+        }
         String name = colInfo.getInternalName();
         String[] tmp = input.reverseLookup(name);
 
@@ -3426,7 +3429,7 @@ public class SemanticAnalyzer extends Ba
       }
       if (isUDTF && (selectStar = udtfExprType == HiveParser.TOK_FUNCTIONSTAR)) {
         genColListRegex(".*", null, (ASTNode) udtfExpr.getChild(0),
-            col_list, inputRR, pos, out_rwsch, qb.getAliases());
+            col_list, null, inputRR, pos, out_rwsch, qb.getAliases());
       }
     }
 
@@ -3548,7 +3551,7 @@ public class SemanticAnalyzer extends Ba
       if (expr.getType() == HiveParser.TOK_ALLCOLREF) {
         pos = genColListRegex(".*", expr.getChildCount() == 0 ? null
             : getUnescapedName((ASTNode) expr.getChild(0)).toLowerCase(),
-            expr, col_list, inputRR, pos, out_rwsch, qb.getAliases());
+            expr, col_list, null, inputRR, pos, out_rwsch, qb.getAliases());
         selectStar = true;
       } else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL && !hasAsClause
           && !inputRR.getIsExprResolver()
@@ -3557,7 +3560,7 @@ public class SemanticAnalyzer extends Ba
         // This can only happen without AS clause
         // We don't allow this for ExprResolver - the Group By case
         pos = genColListRegex(unescapeIdentifier(expr.getChild(0).getText()),
-            null, expr, col_list, inputRR, pos, out_rwsch, qb.getAliases());
+            null, expr, col_list, null, inputRR, pos, out_rwsch, qb.getAliases());
       } else if (expr.getType() == HiveParser.DOT
           && expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL
           && inputRR.hasTableAlias(unescapeIdentifier(expr.getChild(0)
@@ -3568,9 +3571,8 @@ public class SemanticAnalyzer extends Ba
         // This can only happen without AS clause
         // We don't allow this for ExprResolver - the Group By case
         pos = genColListRegex(unescapeIdentifier(expr.getChild(1).getText()),
-            unescapeIdentifier(expr.getChild(0).getChild(0).getText()
-                .toLowerCase()), expr, col_list, inputRR, pos, out_rwsch,
-            qb.getAliases());
+            unescapeIdentifier(expr.getChild(0).getChild(0).getText().toLowerCase()),
+             expr, col_list, null, inputRR, pos, out_rwsch, qb.getAliases());
       } else {
         // Case when this is an expression
         TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR);
@@ -13657,50 +13659,46 @@ public class SemanticAnalyzer extends Ba
       return new Pair(w, wHiveRetType);
     }
 
-    private RelNode genSelectForWindowing(QB qb, RelNode srcRel) throws SemanticException {
-      RelNode selOpForWindow = null;
+    private RelNode genSelectForWindowing(
+        QB qb, RelNode srcRel, HashSet<ColumnInfo> newColumns) throws SemanticException {
       QBParseInfo qbp = getQBParseInfo(qb);
       WindowingSpec wSpec = (!qb.getAllWindowingSpecs().isEmpty()) ? qb.getAllWindowingSpecs()
           .values().iterator().next() : null;
+      if (wSpec == null) return null;
+      // 1. Get valid Window Function Spec
+      wSpec.validateAndMakeEffective();
+      List<WindowExpressionSpec> windowExpressions = wSpec.getWindowExpressions();
+      if (windowExpressions == null || windowExpressions.isEmpty()) return null;
 
-      if (wSpec != null) {
-        // 1. Get valid Window Function Spec
-        wSpec.validateAndMakeEffective();
-        List<WindowExpressionSpec> windowExpressions = wSpec.getWindowExpressions();
-
-        if (windowExpressions != null && !windowExpressions.isEmpty()) {
-          RowResolver inputRR = this.relToHiveRR.get(srcRel);
-          // 2. Get RexNodes for original Projections from below
-          List<RexNode> projsForWindowSelOp = new ArrayList<RexNode>(
-              HiveOptiqUtil.getProjsFromBelowAsInputRef(srcRel));
-
-          // 3. Construct new Row Resolver with everything from below.
-          RowResolver out_rwsch = new RowResolver();
-          RowResolver.add(out_rwsch, inputRR, 0);
-
-          // 4. Walk through Window Expressions & Construct RexNodes for those,
-          // Update out_rwsch
-          for (WindowExpressionSpec wExprSpec : windowExpressions) {
-            if (out_rwsch.getExpression(wExprSpec.getExpression()) == null) {
-              Pair<RexNode, TypeInfo> wtp = genWindowingProj(qb, wExprSpec, srcRel);
-              projsForWindowSelOp.add(wtp.getKey());
-
-              // 6.2.2 Update Output Row Schema
-              ColumnInfo oColInfo = new ColumnInfo(
-                  getColumnInternalName(projsForWindowSelOp.size()), wtp.getValue(), null, false);
-              if (false) {
-                out_rwsch.put(null, wExprSpec.getAlias(), oColInfo);
-              } else {
-                out_rwsch.putExpression(wExprSpec.getExpression(), oColInfo);
-              }
-            }
-          }
+      RowResolver inputRR = this.relToHiveRR.get(srcRel);
+      // 2. Get RexNodes for original Projections from below
+      List<RexNode> projsForWindowSelOp = new ArrayList<RexNode>(
+          HiveOptiqUtil.getProjsFromBelowAsInputRef(srcRel));
 
-          selOpForWindow = genSelectRelNode(projsForWindowSelOp, out_rwsch, srcRel);
+      // 3. Construct new Row Resolver with everything from below.
+      RowResolver out_rwsch = new RowResolver();
+      RowResolver.add(out_rwsch, inputRR, 0);
+
+      // 4. Walk through Window Expressions & Construct RexNodes for those,
+      // Update out_rwsch
+      for (WindowExpressionSpec wExprSpec : windowExpressions) {
+        if (out_rwsch.getExpression(wExprSpec.getExpression()) == null) {
+          Pair<RexNode, TypeInfo> wtp = genWindowingProj(qb, wExprSpec, srcRel);
+          projsForWindowSelOp.add(wtp.getKey());
+
+          // 6.2.2 Update Output Row Schema
+          ColumnInfo oColInfo = new ColumnInfo(
+              getColumnInternalName(projsForWindowSelOp.size()), wtp.getValue(), null, false);
+          if (false) {
+            out_rwsch.put(null, wExprSpec.getAlias(), oColInfo);
+          } else {
+            out_rwsch.putExpression(wExprSpec.getExpression(), oColInfo);
+          }
+          newColumns.add(oColInfo);
         }
       }
 
-      return selOpForWindow;
+      return genSelectRelNode(projsForWindowSelOp, out_rwsch, srcRel);
     }
 
     private RelNode genSelectRelNode(List<RexNode> optiqColLst, RowResolver out_rwsch,
@@ -13789,9 +13787,10 @@ public class SemanticAnalyzer extends Ba
      * @throws SemanticException
      */
     private RelNode genSelectLogicalPlan(QB qb, RelNode srcRel) throws SemanticException {
-
       // 0. Generate a Select Node for Windowing
-      RelNode selForWindow = genSelectForWindowing(qb, srcRel);
+      //    Exclude the newly-generated select columns from */etc. resolution.
+      HashSet<ColumnInfo> excludedColumns = new HashSet<ColumnInfo>();
+      RelNode selForWindow = genSelectForWindowing(qb, srcRel, excludedColumns);
       srcRel = (selForWindow == null) ? srcRel : selForWindow;
 
       boolean subQuery;
@@ -13882,7 +13881,8 @@ public class SemanticAnalyzer extends Ba
         if (expr.getType() == HiveParser.TOK_ALLCOLREF) {
           pos = genColListRegex(".*",
               expr.getChildCount() == 0 ? null : getUnescapedName((ASTNode) expr.getChild(0))
-                  .toLowerCase(), expr, col_list, inputRR, pos, out_rwsch, tabAliasesForAllProjs);
+                  .toLowerCase(), expr, col_list, excludedColumns, inputRR, pos, out_rwsch,
+                  tabAliasesForAllProjs);
           selectStar = true;
         } else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL && !hasAsClause
             && !inputRR.getIsExprResolver()
@@ -13891,7 +13891,7 @@ public class SemanticAnalyzer extends Ba
           // This can only happen without AS clause
           // We don't allow this for ExprResolver - the Group By case
           pos = genColListRegex(unescapeIdentifier(expr.getChild(0).getText()), null, expr,
-              col_list, inputRR, pos, out_rwsch, tabAliasesForAllProjs);
+              col_list, excludedColumns, inputRR, pos, out_rwsch, tabAliasesForAllProjs);
         } else if (expr.getType() == HiveParser.DOT
             && expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL
             && inputRR.hasTableAlias(unescapeIdentifier(expr.getChild(0).getChild(0).getText()
@@ -13902,7 +13902,7 @@ public class SemanticAnalyzer extends Ba
           // We don't allow this for ExprResolver - the Group By case
           pos = genColListRegex(unescapeIdentifier(expr.getChild(1).getText()),
               unescapeIdentifier(expr.getChild(0).getChild(0).getText().toLowerCase()), expr,
-              col_list, inputRR, pos, out_rwsch, tabAliasesForAllProjs);
+              col_list, excludedColumns, inputRR, pos, out_rwsch, tabAliasesForAllProjs);
         } else if (expr.toStringTree().contains("TOK_FUNCTIONDI") && !(srcRel instanceof HiveAggregateRel)) {
           // Likely a malformed query eg, select hash(distinct c1) from t1;
           throw new OptiqSemanticException("Distinct without an aggreggation.");

Modified: hive/branches/branch-0.14/ql/src/test/queries/clientpositive/cbo_correctness.q
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/test/queries/clientpositive/cbo_correctness.q?rev=1632698&r1=1632697&r2=1632698&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/test/queries/clientpositive/cbo_correctness.q (original)
+++ hive/branches/branch-0.14/ql/src/test/queries/clientpositive/cbo_correctness.q Sat Oct 18 00:02:11 2014
@@ -482,3 +482,7 @@ select unionsrc.key, count(1) FROM (sele
     UNION ALL
         select 'avg' as key,  avg(c_int) as value from t3 s3) unionsrc group by unionsrc.key order by unionsrc.key;
 
+-- Windowing
+select *, rank() over(partition by key order by value) as rr from src1;
+
+select *, rank() over(partition by key order by value) from src1;
\ No newline at end of file

Modified: hive/branches/branch-0.14/ql/src/test/results/clientpositive/cbo_correctness.q.out
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/test/results/clientpositive/cbo_correctness.q.out?rev=1632698&r1=1632697&r2=1632698&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/test/results/clientpositive/cbo_correctness.q.out (original)
+++ hive/branches/branch-0.14/ql/src/test/results/clientpositive/cbo_correctness.q.out Sat Oct 18 00:02:11 2014
@@ -19037,3 +19037,71 @@ POSTHOOK: Input: default@t3
 avg	1
 max	1
 min	1
+PREHOOK: query: -- Windowing
+select *, rank() over(partition by key order by value) as rr from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: -- Windowing
+select *, rank() over(partition by key order by value) as rr from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+		1
+		1
+		1
+		1
+	val_165	5
+	val_193	6
+	val_265	7
+	val_27	8
+	val_409	9
+	val_484	10
+128		1
+146	val_146	1
+150	val_150	1
+213	val_213	1
+224		1
+238	val_238	1
+255	val_255	1
+273	val_273	1
+278	val_278	1
+311	val_311	1
+369		1
+401	val_401	1
+406	val_406	1
+66	val_66	1
+98	val_98	1
+PREHOOK: query: select *, rank() over(partition by key order by value) from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select *, rank() over(partition by key order by value) from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+		1
+		1
+		1
+		1
+	val_165	5
+	val_193	6
+	val_265	7
+	val_27	8
+	val_409	9
+	val_484	10
+128		1
+146	val_146	1
+150	val_150	1
+213	val_213	1
+224		1
+238	val_238	1
+255	val_255	1
+273	val_273	1
+278	val_278	1
+311	val_311	1
+369		1
+401	val_401	1
+406	val_406	1
+66	val_66	1
+98	val_98	1

Modified: hive/branches/branch-0.14/ql/src/test/results/clientpositive/tez/cbo_correctness.q.out
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/test/results/clientpositive/tez/cbo_correctness.q.out?rev=1632698&r1=1632697&r2=1632698&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/test/results/clientpositive/tez/cbo_correctness.q.out (original)
+++ hive/branches/branch-0.14/ql/src/test/results/clientpositive/tez/cbo_correctness.q.out Sat Oct 18 00:02:11 2014
@@ -19037,3 +19037,71 @@ POSTHOOK: Input: default@t3
 avg	1
 max	1
 min	1
+PREHOOK: query: -- Windowing
+select *, rank() over(partition by key order by value) as rr from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: -- Windowing
+select *, rank() over(partition by key order by value) as rr from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+		1
+		1
+		1
+		1
+	val_165	5
+	val_193	6
+	val_265	7
+	val_27	8
+	val_409	9
+	val_484	10
+128		1
+146	val_146	1
+150	val_150	1
+213	val_213	1
+224		1
+238	val_238	1
+255	val_255	1
+273	val_273	1
+278	val_278	1
+311	val_311	1
+369		1
+401	val_401	1
+406	val_406	1
+66	val_66	1
+98	val_98	1
+PREHOOK: query: select *, rank() over(partition by key order by value) from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select *, rank() over(partition by key order by value) from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+		1
+		1
+		1
+		1
+	val_165	5
+	val_193	6
+	val_265	7
+	val_27	8
+	val_409	9
+	val_484	10
+128		1
+146	val_146	1
+150	val_150	1
+213	val_213	1
+224		1
+238	val_238	1
+255	val_255	1
+273	val_273	1
+278	val_278	1
+311	val_311	1
+369		1
+401	val_401	1
+406	val_406	1
+66	val_66	1
+98	val_98	1