You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/06/30 18:35:29 UTC

svn commit: r1498148 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/parse/ test/queries/clientpositive/ test/results/clientpositive/

Author: hashutosh
Date: Sun Jun 30 16:35:29 2013
New Revision: 1498148

URL: http://svn.apache.org/r1498148
Log:
HIVE-4692 : Constant agg parameters will be replaced by ExprNodeColumnDesc with single-sourced multi-gby cases (Navis via Ashutosh Chauhan)

Added:
    hive/trunk/ql/src/test/queries/clientpositive/multi_insert_gby2.q
    hive/trunk/ql/src/test/results/clientpositive/multi_insert_gby2.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    hive/trunk/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out
    hive/trunk/ql/src/test/results/clientpositive/union31.q.out

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1498148&r1=1498147&r2=1498148&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Sun Jun 30 16:35:29 2013
@@ -2902,11 +2902,11 @@ public class SemanticAnalyzer extends Ba
    */
   @SuppressWarnings("nls")
   private Operator genGroupByPlanGroupByOperator(QBParseInfo parseInfo,
-      String dest, Operator reduceSinkOperatorInfo, GroupByDesc.Mode mode,
+      String dest, Operator input, ReduceSinkOperator rs, GroupByDesc.Mode mode,
       Map<String, GenericUDAFEvaluator> genericUDAFEvaluators)
       throws SemanticException {
     RowResolver groupByInputRowResolver = opParseCtx
-        .get(reduceSinkOperatorInfo).getRowResolver();
+        .get(input).getRowResolver();
     RowResolver groupByOutputRowResolver = new RowResolver();
     groupByOutputRowResolver.setIsExprResolver(true);
     ArrayList<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>();
@@ -2937,15 +2937,11 @@ public class SemanticAnalyzer extends Ba
     // get the last colName for the reduce KEY
     // it represents the column name corresponding to distinct aggr, if any
     String lastKeyColName = null;
-    List<ExprNodeDesc> reduceValues = null;
-    if (reduceSinkOperatorInfo.getConf() instanceof ReduceSinkDesc) {
-      List<String> inputKeyCols = ((ReduceSinkDesc)
-          reduceSinkOperatorInfo.getConf()).getOutputKeyColumnNames();
-      if (inputKeyCols.size() > 0) {
-        lastKeyColName = inputKeyCols.get(inputKeyCols.size() - 1);
-      }
-      reduceValues = ((ReduceSinkDesc) reduceSinkOperatorInfo.getConf()).getValueCols();
+    List<String> inputKeyCols = ((ReduceSinkDesc) rs.getConf()).getOutputKeyColumnNames();
+    if (inputKeyCols.size() > 0) {
+      lastKeyColName = inputKeyCols.get(inputKeyCols.size() - 1);
     }
+    List<ExprNodeDesc> reduceValues = ((ReduceSinkDesc) rs.getConf()).getValueCols();
     int numDistinctUDFs = 0;
     for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
       ASTNode value = entry.getValue();
@@ -3022,7 +3018,7 @@ public class SemanticAnalyzer extends Ba
         new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations,
             false, groupByMemoryUsage, memoryThreshold, null, false, 0, numDistinctUDFs > 0),
         new RowSchema(groupByOutputRowResolver.getColumnInfos()),
-        reduceSinkOperatorInfo), groupByOutputRowResolver);
+        input), groupByOutputRowResolver);
     op.setColumnExprMap(colExprMap);
     return op;
   }
@@ -3490,7 +3486,7 @@ public class SemanticAnalyzer extends Ba
    * @throws SemanticException
    */
   @SuppressWarnings("nls")
-  private Operator genGroupByPlanReduceSinkOperator(QB qb,
+  private ReduceSinkOperator genGroupByPlanReduceSinkOperator(QB qb,
       String dest,
       Operator inputOperatorInfo,
       List<ASTNode> grpByExprs,
@@ -3680,7 +3676,7 @@ public class SemanticAnalyzer extends Ba
   }
 
   @SuppressWarnings("nls")
-  private Operator genCommonGroupByPlanReduceSinkOperator(QB qb, List<String> dests,
+  private ReduceSinkOperator genCommonGroupByPlanReduceSinkOperator(QB qb, List<String> dests,
       Operator inputOperatorInfo) throws SemanticException {
 
     RowResolver reduceSinkInputRowResolver = opParseCtx.get(inputOperatorInfo)
@@ -4027,7 +4023,7 @@ public class SemanticAnalyzer extends Ba
     }
 
     // ////// 1. Generate ReduceSinkOperator
-    Operator reduceSinkOperatorInfo =
+    ReduceSinkOperator reduceSinkOperatorInfo =
         genGroupByPlanReduceSinkOperator(qb,
             dest,
             input,
@@ -4040,7 +4036,7 @@ public class SemanticAnalyzer extends Ba
 
     // ////// 2. Generate GroupbyOperator
     Operator groupByOperatorInfo = genGroupByPlanGroupByOperator(parseInfo,
-        dest, reduceSinkOperatorInfo, GroupByDesc.Mode.COMPLETE, null);
+        dest, reduceSinkOperatorInfo, reduceSinkOperatorInfo, GroupByDesc.Mode.COMPLETE, null);
 
     return groupByOperatorInfo;
   }
@@ -4110,7 +4106,8 @@ public class SemanticAnalyzer extends Ba
     Operator select = insertSelectAllPlanForGroupBy(selectInput);
 
     // Generate ReduceSinkOperator
-    Operator reduceSinkOperatorInfo = genCommonGroupByPlanReduceSinkOperator(qb, dests, select);
+    ReduceSinkOperator reduceSinkOperatorInfo =
+        genCommonGroupByPlanReduceSinkOperator(qb, dests, select);
 
     // It is assumed throughout the code that a reducer has a single child, add a
     // ForwardOperator so that we can add multiple filter/group by operators as children
@@ -4130,7 +4127,7 @@ public class SemanticAnalyzer extends Ba
 
       // Generate GroupbyOperator
       Operator groupByOperatorInfo = genGroupByPlanGroupByOperator(parseInfo,
-          dest, curr, GroupByDesc.Mode.COMPLETE, null);
+          dest, curr, reduceSinkOperatorInfo, GroupByDesc.Mode.COMPLETE, null);
 
       curr = genPostGroupByBodyPlan(groupByOperatorInfo, dest, qb);
     }
@@ -4272,7 +4269,7 @@ public class SemanticAnalyzer extends Ba
     // DISTINCT
     // operator. We set the numPartitionColumns to -1 for this purpose. This is
     // captured by WritableComparableHiveObject.hashCode() function.
-    Operator reduceSinkOperatorInfo =
+    ReduceSinkOperator reduceSinkOperatorInfo =
         genGroupByPlanReduceSinkOperator(qb,
             dest,
             input,
@@ -4287,7 +4284,7 @@ public class SemanticAnalyzer extends Ba
     Map<String, GenericUDAFEvaluator> genericUDAFEvaluators =
         new LinkedHashMap<String, GenericUDAFEvaluator>();
     GroupByOperator groupByOperatorInfo = (GroupByOperator) genGroupByPlanGroupByOperator(
-        parseInfo, dest, reduceSinkOperatorInfo, GroupByDesc.Mode.PARTIAL1,
+        parseInfo, dest, reduceSinkOperatorInfo, reduceSinkOperatorInfo, GroupByDesc.Mode.PARTIAL1,
         genericUDAFEvaluators);
 
     int numReducers = -1;

Added: hive/trunk/ql/src/test/queries/clientpositive/multi_insert_gby2.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/multi_insert_gby2.q?rev=1498148&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/multi_insert_gby2.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/multi_insert_gby2.q Sun Jun 30 16:35:29 2013
@@ -0,0 +1,19 @@
+--HIVE-3699 Multiple insert overwrite into multiple tables query stores same results in all tables
+create table e1 (count int);
+create table e2 (percentile double);
+
+explain
+FROM (select key, cast(key as double) as value from src order by key) a
+INSERT OVERWRITE TABLE e1
+    SELECT COUNT(*)
+INSERT OVERWRITE TABLE e2
+    SELECT percentile_approx(value, 0.5);
+
+FROM (select key, cast(key as double) as value from src order by key) a
+INSERT OVERWRITE TABLE e1
+    SELECT COUNT(*)
+INSERT OVERWRITE TABLE e2
+    SELECT percentile_approx(value, 0.5);
+
+select * from e1;
+select * from e2;

Modified: hive/trunk/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out?rev=1498148&r1=1498147&r2=1498148&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out Sun Jun 30 16:35:29 2013
@@ -83,7 +83,7 @@ STAGE PLANS:
                 type: boolean
             Group By Operator
               aggregations:
-                    expr: count(DISTINCT KEY._col1:1._col0)
+                    expr: count(DISTINCT KEY._col1:0._col0)
                     expr: sum(KEY._col1:1._col0)
                     expr: sum(DISTINCT KEY._col1:1._col0)
                     expr: count(VALUE._col0)
@@ -120,7 +120,7 @@ STAGE PLANS:
                 type: boolean
             Group By Operator
               aggregations:
-                    expr: count(DISTINCT KEY._col1:1._col0)
+                    expr: count(DISTINCT KEY._col1:0._col0)
                     expr: sum(KEY._col1:1._col0)
                     expr: sum(DISTINCT KEY._col1:1._col0)
                     expr: count(VALUE._col0)
@@ -153,7 +153,7 @@ STAGE PLANS:
                       name: default.dest_g3
           Group By Operator
             aggregations:
-                  expr: count(DISTINCT KEY._col1:1._col0)
+                  expr: count(DISTINCT KEY._col1:0._col0)
                   expr: sum(KEY._col1:1._col0)
                   expr: sum(DISTINCT KEY._col1:1._col0)
                   expr: count(VALUE._col0)
@@ -445,7 +445,7 @@ STAGE PLANS:
                 type: boolean
             Group By Operator
               aggregations:
-                    expr: count(DISTINCT KEY._col1:1._col0)
+                    expr: count(DISTINCT KEY._col1:0._col0)
                     expr: sum(KEY._col1:1._col0)
                     expr: sum(DISTINCT KEY._col1:1._col0)
                     expr: count(VALUE._col0)
@@ -482,7 +482,7 @@ STAGE PLANS:
                 type: boolean
             Group By Operator
               aggregations:
-                    expr: count(DISTINCT KEY._col1:1._col0)
+                    expr: count(DISTINCT KEY._col1:0._col0)
                     expr: sum(KEY._col1:1._col0)
                     expr: sum(DISTINCT KEY._col1:1._col0)
                     expr: count(VALUE._col0)
@@ -515,7 +515,7 @@ STAGE PLANS:
                       name: default.dest_g3
           Group By Operator
             aggregations:
-                  expr: count(DISTINCT KEY._col1:1._col0)
+                  expr: count(DISTINCT KEY._col1:0._col0)
                   expr: sum(KEY._col1:1._col0)
                   expr: sum(DISTINCT KEY._col1:1._col0)
                   expr: count(VALUE._col0)

Added: hive/trunk/ql/src/test/results/clientpositive/multi_insert_gby2.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/multi_insert_gby2.q.out?rev=1498148&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/multi_insert_gby2.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/multi_insert_gby2.q.out Sun Jun 30 16:35:29 2013
@@ -0,0 +1,198 @@
+PREHOOK: query: --HIVE-3699 Multiple insert overwrite into multiple tables query stores same results in all tables
+create table e1 (count int)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: --HIVE-3699 Multiple insert overwrite into multiple tables query stores same results in all tables
+create table e1 (count int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@e1
+PREHOOK: query: create table e2 (percentile double)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table e2 (percentile double)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@e2
+PREHOOK: query: explain
+FROM (select key, cast(key as double) as value from src order by key) a
+INSERT OVERWRITE TABLE e1
+    SELECT COUNT(*)
+INSERT OVERWRITE TABLE e2
+    SELECT percentile_approx(value, 0.5)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM (select key, cast(key as double) as value from src order by key) a
+INSERT OVERWRITE TABLE e1
+    SELECT COUNT(*)
+INSERT OVERWRITE TABLE e2
+    SELECT percentile_approx(value, 0.5)
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (TOK_TABLE_OR_COL key)) value)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME e1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR COUNT)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME e2))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION percentile_approx (TOK_TABLE_OR_COL value) 0.5)))))
+
+STAGE DEPENDENCIES:
+  Stage-2 is a root stage
+  Stage-3 depends on stages: Stage-2
+  Stage-0 depends on stages: Stage-3
+  Stage-4 depends on stages: Stage-0
+  Stage-1 depends on stages: Stage-3
+  Stage-5 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+        a:src 
+          TableScan
+            alias: src
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+                    expr: UDFToDouble(key)
+                    type: double
+              outputColumnNames: _col0, _col1
+              Reduce Output Operator
+                key expressions:
+                      expr: _col0
+                      type: string
+                sort order: +
+                tag: -1
+                value expressions:
+                      expr: _col0
+                      type: string
+                      expr: _col1
+                      type: double
+      Reduce Operator Tree:
+        Extract
+          Select Operator
+            expressions:
+                  expr: _col1
+                  type: double
+            outputColumnNames: _col1
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+  Stage: Stage-3
+    Map Reduce
+      Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+            Reduce Output Operator
+              sort order: 
+              tag: -1
+              value expressions:
+                    expr: _col1
+                    type: double
+                    expr: 0.5
+                    type: double
+      Reduce Operator Tree:
+        Forward
+          Group By Operator
+            aggregations:
+                  expr: count()
+            bucketGroup: false
+            mode: complete
+            outputColumnNames: _col0
+            Select Operator
+              expressions:
+                    expr: UDFToInteger(_col0)
+                    type: int
+              outputColumnNames: _col0
+              File Output Operator
+                compressed: false
+                GlobalTableId: 1
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.e1
+          Group By Operator
+            aggregations:
+                  expr: percentile_approx(VALUE._col0, 0.5)
+            bucketGroup: false
+            mode: complete
+            outputColumnNames: _col0
+            Select Operator
+              expressions:
+                    expr: _col0
+                    type: double
+              outputColumnNames: _col0
+              File Output Operator
+                compressed: false
+                GlobalTableId: 2
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.e2
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.e1
+
+  Stage: Stage-4
+    Stats-Aggr Operator
+
+  Stage: Stage-1
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.e2
+
+  Stage: Stage-5
+    Stats-Aggr Operator
+
+
+PREHOOK: query: FROM (select key, cast(key as double) as value from src order by key) a
+INSERT OVERWRITE TABLE e1
+    SELECT COUNT(*)
+INSERT OVERWRITE TABLE e2
+    SELECT percentile_approx(value, 0.5)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@e1
+PREHOOK: Output: default@e2
+POSTHOOK: query: FROM (select key, cast(key as double) as value from src order by key) a
+INSERT OVERWRITE TABLE e1
+    SELECT COUNT(*)
+INSERT OVERWRITE TABLE e2
+    SELECT percentile_approx(value, 0.5)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@e1
+POSTHOOK: Output: default@e2
+POSTHOOK: Lineage: e1.count EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: e2.percentile EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: select * from e1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@e1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from e1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@e1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: e1.count EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: e2.percentile EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+500
+PREHOOK: query: select * from e2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@e2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from e2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@e2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: e1.count EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: e2.percentile EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+255.5

Modified: hive/trunk/ql/src/test/results/clientpositive/union31.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/union31.q.out?rev=1498148&r1=1498147&r2=1498148&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/union31.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/union31.q.out Sun Jun 30 16:35:29 2013
@@ -965,7 +965,7 @@ STAGE PLANS:
         Forward
           Group By Operator
             aggregations:
-                  expr: count(VALUE._col0)
+                  expr: count(1)
             bucketGroup: false
             keys:
                   expr: KEY._col0
@@ -989,7 +989,7 @@ STAGE PLANS:
                     name: default.t7
           Group By Operator
             aggregations:
-                  expr: count(VALUE._col0)
+                  expr: count(1)
             bucketGroup: false
             keys:
                   expr: KEY._col0