You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2012/01/26 12:54:08 UTC
svn commit: r1236150 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
test/queries/clientpositive/groupby_multi_single_reducer2.q
test/results/clientpositive/groupby_multi_single_reducer2.q.out
Author: namit
Date: Thu Jan 26 11:54:07 2012
New Revision: 1236150
URL: http://svn.apache.org/viewvc?rev=1236150&view=rev
Log:
HIVE-2750 Hive multi group by single reducer optimization causes invalid column
reference error (Kevin Wilfong via namit)
Added:
hive/trunk/ql/src/test/queries/clientpositive/groupby_multi_single_reducer2.q
hive/trunk/ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1236150&r1=1236149&r2=1236150&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Thu Jan 26 11:54:07 2012
@@ -3081,7 +3081,7 @@ public class SemanticAnalyzer extends Ba
// them
for (String destination : dests) {
- getReduceValuesForReduceSinkNoMapAgg(parseInfo, dest, reduceSinkInputRowResolver,
+ getReduceValuesForReduceSinkNoMapAgg(parseInfo, destination, reduceSinkInputRowResolver,
reduceSinkOutputRowResolver, outputValueColumnNames, reduceValues);
// Need to pass all of the columns used in the where clauses as reduce values
Added: hive/trunk/ql/src/test/queries/clientpositive/groupby_multi_single_reducer2.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/groupby_multi_single_reducer2.q?rev=1236150&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/groupby_multi_single_reducer2.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/groupby_multi_single_reducer2.q Thu Jan 26 11:54:07 2012
@@ -0,0 +1,19 @@
+set hive.multigroupby.singlereducer=true;
+
+CREATE TABLE dest_g2(key STRING, c1 INT) STORED AS TEXTFILE;
+CREATE TABLE dest_g3(key STRING, c1 INT, c2 INT) STORED AS TEXTFILE;
+
+EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT src.key) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT src.key), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1);
+
+FROM src
+INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT src.key) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT src.key), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1);
+
+SELECT * FROM dest_g2;
+SELECT * FROM dest_g3;
+
+DROP TABLE dest_g2;
+DROP TABLE dest_g3;
Added: hive/trunk/ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out?rev=1236150&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out Thu Jan 26 11:54:07 2012
@@ -0,0 +1,250 @@
+PREHOOK: query: CREATE TABLE dest_g2(key STRING, c1 INT) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE dest_g2(key STRING, c1 INT) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@dest_g2
+PREHOOK: query: CREATE TABLE dest_g3(key STRING, c1 INT, c2 INT) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE dest_g3(key STRING, c1 INT, c2 INT) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@dest_g3
+PREHOOK: query: EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT src.key) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT src.key), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT src.key) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT src.key), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1)
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_g2))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (. (TOK_TABLE_OR_COL src) key)))) (TOK_WHERE (>= (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) 5)) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_g3))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (. (TOK_TABLE_OR_COL src) key))) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) value)))) (TOK_WHERE (< (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) 5)) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1))))
+
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+ Stage-1 depends on stages: Stage-2
+ Stage-4 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+ src
+ TableScan
+ alias: src
+ Filter Operator
+ predicate:
+ expr: ((substr(key, 1, 1) >= 5) or (substr(key, 1, 1) < 5))
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: key, value
+ Reduce Output Operator
+ key expressions:
+ expr: substr(key, 1, 1)
+ type: string
+ expr: key
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: substr(key, 1, 1)
+ type: string
+ tag: -1
+ value expressions:
+ expr: value
+ type: string
+ Reduce Operator Tree:
+ Forward
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT KEY._col1:0._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: complete
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: UDFToInteger(_col1)
+ type: int
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest_g2
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT KEY._col1:0._col0)
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ expr: _col2
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: UDFToInteger(_col1)
+ type: int
+ expr: UDFToInteger(_col2)
+ type: int
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 2
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest_g3
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest_g2
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest_g3
+
+ Stage: Stage-4
+ Stats-Aggr Operator
+
+
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT src.key) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT src.key), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest_g2
+PREHOOK: Output: default@dest_g3
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT src.key) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT src.key), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest_g2
+POSTHOOK: Output: default@dest_g3
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: SELECT * FROM dest_g2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM dest_g2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+0 1
+1 71
+2 69
+3 62
+4 74
+5 6
+6 5
+7 6
+8 8
+9 7
+PREHOOK: query: SELECT * FROM dest_g3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g3
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM dest_g3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g3
+#### A masked pattern was here ####
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+0 1 3
+1 71 115
+2 69 111
+3 62 99
+4 74 124
+5 6 10
+6 5 6
+7 6 10
+8 8 10
+9 7 12
+PREHOOK: query: DROP TABLE dest_g2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@dest_g2
+PREHOOK: Output: default@dest_g2
+POSTHOOK: query: DROP TABLE dest_g2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@dest_g2
+POSTHOOK: Output: default@dest_g2
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DROP TABLE dest_g3
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@dest_g3
+PREHOOK: Output: default@dest_g3
+POSTHOOK: query: DROP TABLE dest_g3
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@dest_g3
+POSTHOOK: Output: default@dest_g3
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]