You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ke...@apache.org on 2012/10/31 00:39:19 UTC
svn commit: r1403928 [6/7] - in /hive/trunk:
common/src/java/org/apache/hadoop/hive/conf/ conf/
ql/src/java/org/apache/hadoop/hive/ql/
ql/src/java/org/apache/hadoop/hive/ql/exec/
ql/src/java/org/apache/hadoop/hive/ql/metadata/ ql/src/java/org/apache/ha...
Added: hive/trunk/ql/src/test/results/clientpositive/union_remove_3.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/union_remove_3.q.out?rev=1403928&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/union_remove_3.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/union_remove_3.q.out Tue Oct 30 23:39:17 2012
@@ -0,0 +1,267 @@
+PREHOOK: query: -- This is to test the union->remove->filesink optimization
+-- Union of 3 subqueries is performed (all of which are map-only queries)
+-- followed by select star and a file sink.
+-- There is no need for any optimization, since the whole query can be processed in
+-- a single map-only job
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- off
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- This is to test the union->remove->filesink optimization
+-- Union of 3 subqueries is performed (all of which are map-only queries)
+-- followed by select star and a file sink.
+-- There is no need for any optimization, since the whole query can be processed in
+-- a single map-only job
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- off
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table outputTbl1(key string, values bigint) stored as textfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table outputTbl1(key string, values bigint) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@outputTbl1
+PREHOOK: query: load data local inpath '../data/files/T1.txt' into table inputTbl1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: load data local inpath '../data/files/T1.txt' into table inputTbl1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@inputtbl1
+PREHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT *
+FROM (
+ SELECT key, 1 as values from inputTbl1
+ UNION ALL
+ SELECT key, 2 as values from inputTbl1
+ UNION ALL
+ SELECT key, 3 as values from inputTbl1
+) a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT *
+FROM (
+ SELECT key, 1 as values from inputTbl1
+ UNION ALL
+ SELECT key, 2 as values from inputTbl1
+ UNION ALL
+ SELECT key, 3 as values from inputTbl1
+) a
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 1 values)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 2 values))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 3 values))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ null-subquery1-subquery1:a-subquery1-subquery1:inputtbl1
+ TableScan
+ alias: inputtbl1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: 1
+ type: int
+ outputColumnNames: _col0, _col1
+ Union
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: int
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: UDFToLong(_col1)
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+ null-subquery1-subquery2:a-subquery1-subquery2:inputtbl1
+ TableScan
+ alias: inputtbl1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: 2
+ type: int
+ outputColumnNames: _col0, _col1
+ Union
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: int
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: UDFToLong(_col1)
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+ null-subquery2:a-subquery2:inputtbl1
+ TableScan
+ alias: inputtbl1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: 3
+ type: int
+ outputColumnNames: _col0, _col1
+ Union
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: int
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: UDFToLong(_col1)
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+
+
+PREHOOK: query: insert overwrite table outputTbl1
+SELECT *
+FROM (
+ SELECT key, 1 as values from inputTbl1
+ UNION ALL
+ SELECT key, 2 as values from inputTbl1
+ UNION ALL
+ SELECT key, 3 as values from inputTbl1
+) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: insert overwrite table outputTbl1
+SELECT *
+FROM (
+ SELECT key, 1 as values from inputTbl1
+ UNION ALL
+ SELECT key, 2 as values from inputTbl1
+ UNION ALL
+ SELECT key, 3 as values from inputTbl1
+) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION []
+PREHOOK: query: desc formatted outputTbl1
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: desc formatted outputTbl1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION []
+# col_name data_type comment
+
+key string None
+values bigint None
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Protect Mode: None
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: select * from outputTbl1 order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION []
+1 1
+1 2
+1 3
+2 1
+2 2
+2 3
+3 1
+3 2
+3 3
+7 1
+7 2
+7 3
+8 1
+8 1
+8 2
+8 2
+8 3
+8 3
Added: hive/trunk/ql/src/test/results/clientpositive/union_remove_4.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/union_remove_4.q.out?rev=1403928&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/union_remove_4.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/union_remove_4.q.out Tue Oct 30 23:39:17 2012
@@ -0,0 +1,323 @@
+PREHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 map-reduce subqueries is performed followed by select star and a file sink
+-- There is no need to write the temporary results of the sub-queries, and then read them
+-- again to process the union. The union can be removed completely.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- on
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 map-reduce subqueries is performed followed by select star and a file sink
+-- There is no need to write the temporary results of the sub-queries, and then read them
+-- again to process the union. The union can be removed completely.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- on
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table outputTbl1(key string, values bigint) stored as textfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table outputTbl1(key string, values bigint) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@outputTbl1
+PREHOOK: query: load data local inpath '../data/files/T1.txt' into table inputTbl1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: load data local inpath '../data/files/T1.txt' into table inputTbl1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@inputtbl1
+PREHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT *
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as values from inputTbl1 group by key
+) a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT *
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as values from inputTbl1 group by key
+) a
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1) values)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1) values)) (TOK_GROUPBY (TOK_TABLE_OR_COL key))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-6 depends on stages: Stage-1, Stage-7 , consists of Stage-3, Stage-2, Stage-4
+ Stage-3
+ Stage-0 depends on stages: Stage-3, Stage-2, Stage-5
+ Stage-2
+ Stage-4
+ Stage-5 depends on stages: Stage-4
+ Stage-7 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ null-subquery2:a-subquery2:inputtbl1
+ TableScan
+ alias: inputtbl1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ outputColumnNames: key
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-6
+ Conditional Operator
+
+ Stage: Stage-3
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-4
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-5
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-7
+ Map Reduce
+ Alias -> Map Operator Tree:
+ null-subquery1:a-subquery1:inputtbl1
+ TableScan
+ alias: inputtbl1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ outputColumnNames: key
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+
+
+PREHOOK: query: insert overwrite table outputTbl1
+SELECT *
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as values from inputTbl1 group by key
+) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: insert overwrite table outputTbl1
+SELECT *
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as values from inputTbl1 group by key
+) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+PREHOOK: query: desc formatted outputTbl1
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: desc formatted outputTbl1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+# col_name data_type comment
+
+key string None
+values bigint None
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Protect Mode: None
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: select * from outputTbl1 order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+1 1
+1 1
+2 1
+2 1
+3 1
+3 1
+7 1
+7 1
+8 2
+8 2
Added: hive/trunk/ql/src/test/results/clientpositive/union_remove_5.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/union_remove_5.q.out?rev=1403928&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/union_remove_5.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/union_remove_5.q.out Tue Oct 30 23:39:17 2012
@@ -0,0 +1,350 @@
+PREHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 3 subqueries is performed (exactly one of which requires a map-reduce job)
+-- followed by select star and a file sink.
+-- There is no need to write the temporary results of the sub-queries, and then read them
+-- again to process the union. The union can be removed completely.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- on
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 3 subqueries is performed (exactly one of which requires a map-reduce job)
+-- followed by select star and a file sink.
+-- There is no need to write the temporary results of the sub-queries, and then read them
+-- again to process the union. The union can be removed completely.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- on
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table outputTbl1(key string, values bigint) stored as textfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table outputTbl1(key string, values bigint) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@outputTbl1
+PREHOOK: query: load data local inpath '../data/files/T1.txt' into table inputTbl1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: load data local inpath '../data/files/T1.txt' into table inputTbl1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@inputtbl1
+PREHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT *
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, 1 as values from inputTbl1
+ UNION ALL
+ SELECT key, 2 as values from inputTbl1
+) a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT *
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, 1 as values from inputTbl1
+ UNION ALL
+ SELECT key, 2 as values from inputTbl1
+) a
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1) values)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 1 values))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 2 values))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-6 depends on stages: Stage-1, Stage-7, Stage-8 , consists of Stage-3, Stage-2, Stage-4
+ Stage-3
+ Stage-0 depends on stages: Stage-3, Stage-2, Stage-5
+ Stage-2
+ Stage-4
+ Stage-5 depends on stages: Stage-4
+ Stage-7 is a root stage
+ Stage-8 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ null-subquery2:a-subquery2:inputtbl1
+ TableScan
+ alias: inputtbl1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: 2
+ type: int
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: UDFToLong(_col1)
+ type: bigint
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-6
+ Conditional Operator
+
+ Stage: Stage-3
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-4
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-5
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-7
+ Map Reduce
+ Alias -> Map Operator Tree:
+ null-subquery1-subquery1:a-subquery1-subquery1:inputtbl1
+ TableScan
+ alias: inputtbl1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ outputColumnNames: key
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-8
+ Map Reduce
+ Alias -> Map Operator Tree:
+ null-subquery1-subquery2:a-subquery1-subquery2:inputtbl1
+ TableScan
+ alias: inputtbl1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: 1
+ type: int
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: UDFToLong(_col1)
+ type: bigint
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+
+
+PREHOOK: query: insert overwrite table outputTbl1
+SELECT *
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, 1 as values from inputTbl1
+ UNION ALL
+ SELECT key, 2 as values from inputTbl1
+) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: insert overwrite table outputTbl1
+SELECT *
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, 1 as values from inputTbl1
+ UNION ALL
+ SELECT key, 2 as values from inputTbl1
+) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, ]
+PREHOOK: query: desc formatted outputTbl1
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: desc formatted outputTbl1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, ]
+# col_name data_type comment
+
+key string None
+values bigint None
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Protect Mode: None
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: select * from outputTbl1 order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, ]
+1 1
+1 1
+1 2
+2 1
+2 1
+2 2
+3 1
+3 1
+3 2
+7 1
+7 1
+7 2
+8 1
+8 1
+8 2
+8 2
+8 2
Added: hive/trunk/ql/src/test/results/clientpositive/union_remove_6.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/union_remove_6.q.out?rev=1403928&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/union_remove_6.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/union_remove_6.q.out Tue Oct 30 23:39:17 2012
@@ -0,0 +1,337 @@
+PREHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 subqueries is performed (all of which are mapred queries)
+-- followed by select star and a file sink in 2 output tables.
+-- The optimiaztion does not take affect since it is a multi-table insert.
+-- It does not matter, whether the output is merged or not. In this case,
+-- merging is turned off
+
+create table inputTbl1(key string, val string) stored as textfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 subqueries is performed (all of which are mapred queries)
+-- followed by select star and a file sink in 2 output tables.
+-- The optimiaztion does not take affect since it is a multi-table insert.
+-- It does not matter, whether the output is merged or not. In this case,
+-- merging is turned off
+
+create table inputTbl1(key string, val string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table outputTbl1(key string, values bigint) stored as textfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table outputTbl1(key string, values bigint) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@outputTbl1
+PREHOOK: query: create table outputTbl2(key string, values bigint) stored as textfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table outputTbl2(key string, values bigint) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@outputTbl2
+PREHOOK: query: load data local inpath '../data/files/T1.txt' into table inputTbl1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: load data local inpath '../data/files/T1.txt' into table inputTbl1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@inputtbl1
+PREHOOK: query: explain
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as values from inputTbl1 group by key
+) a
+insert overwrite table outputTbl1 select *
+insert overwrite table outputTbl2 select *
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as values from inputTbl1 group by key
+) a
+insert overwrite table outputTbl1 select *
+insert overwrite table outputTbl2 select *
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1) values)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1) values)) (TOK_GROUPBY (TOK_TABLE_OR_COL key))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl2))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2, Stage-4
+ Stage-0 depends on stages: Stage-3
+ Stage-1 depends on stages: Stage-3
+ Stage-4 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+ null-subquery2:a-subquery2:inputtbl1
+ TableScan
+ alias: inputtbl1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ outputColumnNames: key
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-3
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ TableScan
+ Union
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 2
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl2
+#### A masked pattern was here ####
+ TableScan
+ Union
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 2
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl2
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl2
+
+ Stage: Stage-4
+ Map Reduce
+ Alias -> Map Operator Tree:
+ null-subquery1:a-subquery1:inputtbl1
+ TableScan
+ alias: inputtbl1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ outputColumnNames: key
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+
+PREHOOK: query: FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as values from inputTbl1 group by key
+) a
+insert overwrite table outputTbl1 select *
+insert overwrite table outputTbl2 select *
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+PREHOOK: Output: default@outputtbl2
+POSTHOOK: query: FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as values from inputTbl1 group by key
+) a
+insert overwrite table outputTbl1 select *
+insert overwrite table outputTbl2 select *
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Output: default@outputtbl2
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+POSTHOOK: Lineage: outputtbl2.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl2.values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+PREHOOK: query: select * from outputTbl1 order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+POSTHOOK: Lineage: outputtbl2.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl2.values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+1 1
+1 1
+2 1
+2 1
+3 1
+3 1
+7 1
+7 1
+8 2
+8 2
+PREHOOK: query: select * from outputTbl2 order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl2 order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+POSTHOOK: Lineage: outputtbl2.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl2.values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+1 1
+1 1
+2 1
+2 1
+3 1
+3 1
+7 1
+7 1
+8 2
+8 2
Added: hive/trunk/ql/src/test/results/clientpositive/union_remove_7.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/union_remove_7.q.out?rev=1403928&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/union_remove_7.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/union_remove_7.q.out Tue Oct 30 23:39:17 2012
@@ -0,0 +1,281 @@
+PREHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 map-reduce subqueries is performed followed by select star and a file sink
+-- There is no need to write the temporary results of the sub-queries, and then read them
+-- again to process the union. The union can be removed completely.
+-- The final file format is different from the input and intermediate file format.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- off
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 map-reduce subqueries is performed followed by select star and a file sink
+-- There is no need to write the temporary results of the sub-queries, and then read them
+-- again to process the union. The union can be removed completely.
+-- The final file format is different from the input and intermediate file format.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- off
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table outputTbl1(key string, values bigint) stored as rcfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table outputTbl1(key string, values bigint) stored as rcfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@outputTbl1
+PREHOOK: query: load data local inpath '../data/files/T1.txt' into table inputTbl1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: load data local inpath '../data/files/T1.txt' into table inputTbl1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@inputtbl1
+PREHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT *
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as values from inputTbl1 group by key
+) a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT *
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as values from inputTbl1 group by key
+) a
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1) values)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1) values)) (TOK_GROUPBY (TOK_TABLE_OR_COL key))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1, Stage-2
+ Stage-2 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ null-subquery2:a-subquery2:inputtbl1
+ TableScan
+ alias: inputtbl1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ outputColumnNames: key
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+ null-subquery1:a-subquery1:inputtbl1
+ TableScan
+ alias: inputtbl1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ outputColumnNames: key
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.outputtbl1
+
+
+PREHOOK: query: insert overwrite table outputTbl1
+SELECT *
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as values from inputTbl1 group by key
+) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: insert overwrite table outputTbl1
+SELECT *
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as values from inputTbl1 group by key
+) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+PREHOOK: query: desc formatted outputTbl1
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: desc formatted outputTbl1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+# col_name data_type comment
+
+key string None
+values bigint None
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Protect Mode: None
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: select * from outputTbl1 order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+1 1
+1 1
+2 1
+2 1
+3 1
+3 1
+7 1
+7 1
+8 2
+8 2
Added: hive/trunk/ql/src/test/results/clientpositive/union_remove_8.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/union_remove_8.q.out?rev=1403928&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/union_remove_8.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/union_remove_8.q.out Tue Oct 30 23:39:17 2012
@@ -0,0 +1,306 @@
+PREHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 3 subqueries is performed (exactly one of which requires a map-reduce job)
+-- followed by select star and a file sink.
+-- There is no need to write the temporary results of the sub-queries, and then read them
+-- again to process the union. The union can be removed completely.
+-- The final file format is different from the input and intermediate file format.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- off
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 3 subqueries is performed (exactly one of which requires a map-reduce job)
+-- followed by select star and a file sink.
+-- There is no need to write the temporary results of the sub-queries, and then read them
+-- again to process the union. The union can be removed completely.
+-- The final file format is different from the input and intermediate file format.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- off
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table outputTbl1(key string, values bigint) stored as rcfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table outputTbl1(key string, values bigint) stored as rcfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@outputTbl1
+PREHOOK: query: load data local inpath '../data/files/T1.txt' into table inputTbl1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: load data local inpath '../data/files/T1.txt' into table inputTbl1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@inputtbl1
+PREHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT *
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, 1 as values from inputTbl1
+ UNION ALL
+ SELECT key, 2 as values from inputTbl1
+) a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT *
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, 1 as values from inputTbl1
+ UNION ALL
+ SELECT key, 2 as values from inputTbl1
+) a
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1) values)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 1 values))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 2 values))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1, Stage-2, Stage-3
+ Stage-2 is a root stage
+ Stage-3 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ null-subquery2:a-subquery2:inputtbl1
+ TableScan
+ alias: inputtbl1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: 2
+ type: int
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: UDFToLong(_col1)
+ type: bigint
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+ null-subquery1-subquery1:a-subquery1-subquery1:inputtbl1
+ TableScan
+ alias: inputtbl1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ outputColumnNames: key
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-3
+ Map Reduce
+ Alias -> Map Operator Tree:
+ null-subquery1-subquery2:a-subquery1-subquery2:inputtbl1
+ TableScan
+ alias: inputtbl1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: 1
+ type: int
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: UDFToLong(_col1)
+ type: bigint
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.outputtbl1
+
+
+PREHOOK: query: insert overwrite table outputTbl1
+SELECT *
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, 1 as values from inputTbl1
+ UNION ALL
+ SELECT key, 2 as values from inputTbl1
+) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: insert overwrite table outputTbl1
+SELECT *
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, 1 as values from inputTbl1
+ UNION ALL
+ SELECT key, 2 as values from inputTbl1
+) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, ]
+PREHOOK: query: desc formatted outputTbl1
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: desc formatted outputTbl1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, ]
+# col_name data_type comment
+
+key string None
+values bigint None
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Protect Mode: None
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: select * from outputTbl1 order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, ]
+1 1
+1 1
+1 2
+2 1
+2 1
+2 2
+3 1
+3 1
+3 2
+7 1
+7 1
+7 2
+8 1
+8 1
+8 2
+8 2
+8 2