You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2012/01/26 12:54:08 UTC

svn commit: r1236150 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java test/queries/clientpositive/groupby_multi_single_reducer2.q test/results/clientpositive/groupby_multi_single_reducer2.q.out

Author: namit
Date: Thu Jan 26 11:54:07 2012
New Revision: 1236150

URL: http://svn.apache.org/viewvc?rev=1236150&view=rev
Log:
HIVE-2750 Hive multi group by single reducer optimization causes invalid column
reference error (Kevin Wilfong via namit)


Added:
    hive/trunk/ql/src/test/queries/clientpositive/groupby_multi_single_reducer2.q
    hive/trunk/ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1236150&r1=1236149&r2=1236150&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Thu Jan 26 11:54:07 2012
@@ -3081,7 +3081,7 @@ public class SemanticAnalyzer extends Ba
     // them
     for (String destination : dests) {
 
-      getReduceValuesForReduceSinkNoMapAgg(parseInfo, dest, reduceSinkInputRowResolver,
+      getReduceValuesForReduceSinkNoMapAgg(parseInfo, destination, reduceSinkInputRowResolver,
           reduceSinkOutputRowResolver, outputValueColumnNames, reduceValues);
 
       // Need to pass all of the columns used in the where clauses as reduce values

Added: hive/trunk/ql/src/test/queries/clientpositive/groupby_multi_single_reducer2.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/groupby_multi_single_reducer2.q?rev=1236150&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/groupby_multi_single_reducer2.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/groupby_multi_single_reducer2.q Thu Jan 26 11:54:07 2012
@@ -0,0 +1,19 @@
+set hive.multigroupby.singlereducer=true;
+
+CREATE TABLE dest_g2(key STRING, c1 INT) STORED AS TEXTFILE;
+CREATE TABLE dest_g3(key STRING, c1 INT, c2 INT) STORED AS TEXTFILE;
+
+EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT src.key) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT src.key), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1);
+
+FROM src
+INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT src.key) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT src.key), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1);
+
+SELECT * FROM dest_g2;
+SELECT * FROM dest_g3;
+
+DROP TABLE dest_g2;
+DROP TABLE dest_g3;

Added: hive/trunk/ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out?rev=1236150&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out Thu Jan 26 11:54:07 2012
@@ -0,0 +1,250 @@
+PREHOOK: query: CREATE TABLE dest_g2(key STRING, c1 INT) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE dest_g2(key STRING, c1 INT) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@dest_g2
+PREHOOK: query: CREATE TABLE dest_g3(key STRING, c1 INT, c2 INT) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE dest_g3(key STRING, c1 INT, c2 INT) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@dest_g3
+PREHOOK: query: EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT src.key) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT src.key), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT src.key) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT src.key), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1)
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_g2))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (. (TOK_TABLE_OR_COL src) key)))) (TOK_WHERE (>= (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) 5)) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_g3))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (. (TOK_TABLE_OR_COL src) key))) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) value)))) (TOK_WHERE (< (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) 5)) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1))))
+
+STAGE DEPENDENCIES:
+  Stage-2 is a root stage
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-0
+  Stage-1 depends on stages: Stage-2
+  Stage-4 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+        src 
+          TableScan
+            alias: src
+            Filter Operator
+              predicate:
+                  expr: ((substr(key, 1, 1) >= 5) or (substr(key, 1, 1) < 5))
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: key
+                      type: string
+                      expr: value
+                      type: string
+                outputColumnNames: key, value
+                Reduce Output Operator
+                  key expressions:
+                        expr: substr(key, 1, 1)
+                        type: string
+                        expr: key
+                        type: string
+                  sort order: ++
+                  Map-reduce partition columns:
+                        expr: substr(key, 1, 1)
+                        type: string
+                  tag: -1
+                  value expressions:
+                        expr: value
+                        type: string
+      Reduce Operator Tree:
+        Forward
+          Group By Operator
+            aggregations:
+                  expr: count(DISTINCT KEY._col1:0._col0)
+            bucketGroup: false
+            keys:
+                  expr: KEY._col0
+                  type: string
+            mode: complete
+            outputColumnNames: _col0, _col1
+            Select Operator
+              expressions:
+                    expr: _col0
+                    type: string
+                    expr: _col1
+                    type: bigint
+              outputColumnNames: _col0, _col1
+              Select Operator
+                expressions:
+                      expr: _col0
+                      type: string
+                      expr: UDFToInteger(_col1)
+                      type: int
+                outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 1
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      name: default.dest_g2
+          Group By Operator
+            aggregations:
+                  expr: count(DISTINCT KEY._col1:0._col0)
+                  expr: count(VALUE._col0)
+            bucketGroup: false
+            keys:
+                  expr: KEY._col0
+                  type: string
+            mode: complete
+            outputColumnNames: _col0, _col1, _col2
+            Select Operator
+              expressions:
+                    expr: _col0
+                    type: string
+                    expr: _col1
+                    type: bigint
+                    expr: _col2
+                    type: bigint
+              outputColumnNames: _col0, _col1, _col2
+              Select Operator
+                expressions:
+                      expr: _col0
+                      type: string
+                      expr: UDFToInteger(_col1)
+                      type: int
+                      expr: UDFToInteger(_col2)
+                      type: int
+                outputColumnNames: _col0, _col1, _col2
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 2
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      name: default.dest_g3
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.dest_g2
+
+  Stage: Stage-3
+    Stats-Aggr Operator
+
+  Stage: Stage-1
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.dest_g3
+
+  Stage: Stage-4
+    Stats-Aggr Operator
+
+
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT src.key) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT src.key), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest_g2
+PREHOOK: Output: default@dest_g3
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT src.key) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT src.key), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest_g2
+POSTHOOK: Output: default@dest_g3
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: SELECT * FROM dest_g2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM dest_g2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+0	1
+1	71
+2	69
+3	62
+4	74
+5	6
+6	5
+7	6
+8	8
+9	7
+PREHOOK: query: SELECT * FROM dest_g3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g3
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM dest_g3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g3
+#### A masked pattern was here ####
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+0	1	3
+1	71	115
+2	69	111
+3	62	99
+4	74	124
+5	6	10
+6	5	6
+7	6	10
+8	8	10
+9	7	12
+PREHOOK: query: DROP TABLE dest_g2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@dest_g2
+PREHOOK: Output: default@dest_g2
+POSTHOOK: query: DROP TABLE dest_g2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@dest_g2
+POSTHOOK: Output: default@dest_g2
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DROP TABLE dest_g3
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@dest_g3
+PREHOOK: Output: default@dest_g3
+POSTHOOK: query: DROP TABLE dest_g3
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@dest_g3
+POSTHOOK: Output: default@dest_g3
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]