You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ke...@apache.org on 2012/10/31 00:39:19 UTC
svn commit: r1403928 [3/7] - in /hive/trunk:
common/src/java/org/apache/hadoop/hive/conf/ conf/
ql/src/java/org/apache/hadoop/hive/ql/
ql/src/java/org/apache/hadoop/hive/ql/exec/
ql/src/java/org/apache/hadoop/hive/ql/metadata/ ql/src/java/org/apache/ha...
Added: hive/trunk/ql/src/test/results/clientpositive/union_remove_10.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/union_remove_10.q.out?rev=1403928&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/union_remove_10.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/union_remove_10.q.out Tue Oct 30 23:39:17 2012
@@ -0,0 +1,366 @@
+PREHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 subqueries is performed (one of which is a map-only query, and the
+-- other one contains a nested union where one of the sub-queries requires a map-reduce
+-- job), followed by select star and a file sink.
+-- There is no need to write the temporary results of the sub-queries, and then read them
+-- again to process the union. The outer union can be removed completely.
+-- The final file format is different from the input and intermediate file format.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- on
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 subqueries is performed (one of which is a map-only query, and the
+-- other one contains a nested union where one of the sub-queries requires a map-reduce
+-- job), followed by select star and a file sink.
+-- There is no need to write the temporary results of the sub-queries, and then read them
+-- again to process the union. The outer union can be removed completely.
+-- The final file format is different from the input and intermediate file format.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- on
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table outputTbl1(key string, values bigint) stored as rcfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table outputTbl1(key string, values bigint) stored as rcfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@outputTbl1
+PREHOOK: query: load data local inpath '../data/files/T1.txt' into table inputTbl1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: load data local inpath '../data/files/T1.txt' into table inputTbl1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@inputtbl1
+PREHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select * FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, 2 as values from inputTbl1
+) a
+)b
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select * FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, 2 as values from inputTbl1
+) a
+)b
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 1 values)))) (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1) values)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 2 values))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))) b)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-7 depends on stages: Stage-2, Stage-8 , consists of Stage-4, Stage-3, Stage-5
+ Stage-4
+ Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
+ Stage-3
+ Stage-5
+ Stage-6 depends on stages: Stage-5
+ Stage-8 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ null-subquery2:b-subquery2-subquery1:a-subquery1:inputtbl1
+ TableScan
+ alias: inputtbl1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ outputColumnNames: key
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ TableScan
+ Union
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.outputtbl1
+ null-subquery2:b-subquery2-subquery2:a-subquery2:inputtbl1
+ TableScan
+ alias: inputtbl1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: 2
+ type: int
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: UDFToLong(_col1)
+ type: bigint
+ outputColumnNames: _col0, _col1
+ Union
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-7
+ Conditional Operator
+
+ Stage: Stage-4
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-3
+ Block level merge
+
+ Stage: Stage-5
+ Block level merge
+
+ Stage: Stage-6
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-8
+ Map Reduce
+ Alias -> Map Operator Tree:
+ null-subquery1:b-subquery1:inputtbl1
+ TableScan
+ alias: inputtbl1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: 1
+ type: int
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: UDFToLong(_col1)
+ type: bigint
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.outputtbl1
+
+
+PREHOOK: query: insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select * FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, 2 as values from inputTbl1
+) a
+)b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select * FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, 2 as values from inputTbl1
+) a
+)b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, ]
+PREHOOK: query: desc formatted outputTbl1
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: desc formatted outputTbl1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, ]
+# col_name data_type comment
+
+key string None
+values bigint None
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Protect Mode: None
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: select * from outputTbl1 order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, ]
+1 1
+1 1
+1 2
+2 1
+2 1
+2 2
+3 1
+3 1
+3 2
+7 1
+7 1
+7 2
+8 1
+8 1
+8 2
+8 2
+8 2
Added: hive/trunk/ql/src/test/results/clientpositive/union_remove_11.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/union_remove_11.q.out?rev=1403928&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/union_remove_11.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/union_remove_11.q.out Tue Oct 30 23:39:17 2012
@@ -0,0 +1,323 @@
+PREHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 subqueries is performed (one of which is a map-only query, and the
+-- other one contains a nested union where also contains map only sub-queries),
+-- followed by select star and a file sink.
+-- There is no need for the union optimization, since the whole query can be performed
+-- in a single map-only job
+-- The final file format is different from the input and intermediate file format.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- on
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 subqueries is performed (one of which is a map-only query, and the
+-- other one contains a nested union where also contains map only sub-queries),
+-- followed by select star and a file sink.
+-- There is no need for the union optimization, since the whole query can be performed
+-- in a single map-only job
+-- The final file format is different from the input and intermediate file format.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- on
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table outputTbl1(key string, values bigint) stored as rcfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table outputTbl1(key string, values bigint) stored as rcfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@outputTbl1
+PREHOOK: query: load data local inpath '../data/files/T1.txt' into table inputTbl1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: load data local inpath '../data/files/T1.txt' into table inputTbl1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@inputtbl1
+PREHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select * FROM (
+ SELECT key, 2 values from inputTbl1
+ UNION ALL
+ SELECT key, 3 as values from inputTbl1
+) a
+)b
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select * FROM (
+ SELECT key, 2 values from inputTbl1
+ UNION ALL
+ SELECT key, 3 as values from inputTbl1
+) a
+)b
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 1 values)))) (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 2 values)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 3 values))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))) b)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-6 depends on stages: Stage-1 , consists of Stage-3, Stage-2, Stage-4
+ Stage-3
+ Stage-0 depends on stages: Stage-3, Stage-2, Stage-5
+ Stage-2
+ Stage-4
+ Stage-5 depends on stages: Stage-4
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ null-subquery1:b-subquery1:inputtbl1
+ TableScan
+ alias: inputtbl1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: 1
+ type: int
+ outputColumnNames: _col0, _col1
+ Union
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: int
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: UDFToLong(_col1)
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.outputtbl1
+ null-subquery2:b-subquery2-subquery1:a-subquery1:inputtbl1
+ TableScan
+ alias: inputtbl1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: 2
+ type: int
+ outputColumnNames: _col0, _col1
+ Union
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: int
+ outputColumnNames: _col0, _col1
+ Union
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: int
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: UDFToLong(_col1)
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.outputtbl1
+ null-subquery2:b-subquery2-subquery2:a-subquery2:inputtbl1
+ TableScan
+ alias: inputtbl1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: 3
+ type: int
+ outputColumnNames: _col0, _col1
+ Union
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: int
+ outputColumnNames: _col0, _col1
+ Union
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: int
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: UDFToLong(_col1)
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-6
+ Conditional Operator
+
+ Stage: Stage-3
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-2
+ Block level merge
+
+ Stage: Stage-4
+ Block level merge
+
+ Stage: Stage-5
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+
+PREHOOK: query: insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select * FROM (
+ SELECT key, 2 as values from inputTbl1
+ UNION ALL
+ SELECT key, 3 as values from inputTbl1
+) a
+)b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select * FROM (
+ SELECT key, 2 as values from inputTbl1
+ UNION ALL
+ SELECT key, 3 as values from inputTbl1
+) a
+)b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION []
+PREHOOK: query: desc formatted outputTbl1
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: desc formatted outputTbl1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION []
+# col_name data_type comment
+
+key string None
+values bigint None
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Protect Mode: None
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: select * from outputTbl1 order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION []
+1 1
+1 2
+1 3
+2 1
+2 2
+2 3
+3 1
+3 2
+3 3
+7 1
+7 2
+7 3
+8 1
+8 1
+8 2
+8 2
+8 3
+8 3
Added: hive/trunk/ql/src/test/results/clientpositive/union_remove_12.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/union_remove_12.q.out?rev=1403928&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/union_remove_12.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/union_remove_12.q.out Tue Oct 30 23:39:17 2012
@@ -0,0 +1,320 @@
+PREHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 subqueries is performed (one of which is a map-only query, and the
+-- other one is a map-join query), followed by select star and a file sink.
+-- The union optimization is applied, and the union is removed.
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+-- The final file format is different from the input and intermediate file format.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- on
+
+create table inputTbl1(key string, val string) stored as textfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 subqueries is performed (one of which is a map-only query, and the
+-- other one is a map-join query), followed by select star and a file sink.
+-- The union optimization is applied, and the union is removed.
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+-- The final file format is different from the input and intermediate file format.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- on
+
+create table inputTbl1(key string, val string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table outputTbl1(key string, values bigint) stored as rcfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table outputTbl1(key string, values bigint) stored as rcfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@outputTbl1
+PREHOOK: query: load data local inpath '../data/files/T1.txt' into table inputTbl1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: load data local inpath '../data/files/T1.txt' into table inputTbl1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@inputtbl1
+PREHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select /*+ mapjoin(a) */ a.key as key, b.val as values
+FROM inputTbl1 a join inputTbl1 b on a.key=b.key
+)c
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select /*+ mapjoin(a) */ a.key as key, b.val as values
+FROM inputTbl1 a join inputTbl1 b on a.key=b.key
+)c
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 1 values)))) (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME inputTbl1) a) (TOK_TABREF (TOK_TABNAME inputTbl1) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) val) values))))) c)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+ Stage-9 is a root stage
+ Stage-7 depends on stages: Stage-2, Stage-9 , consists of Stage-4, Stage-3, Stage-5
+ Stage-4
+ Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
+ Stage-3
+ Stage-5
+ Stage-6 depends on stages: Stage-5
+ Stage-10 is a root stage
+ Stage-1 depends on stages: Stage-10
+ Stage-2 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-9
+ Map Reduce
+ Alias -> Map Operator Tree:
+ null-subquery1:c-subquery1:inputtbl1
+ TableScan
+ alias: inputtbl1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: 1
+ type: int
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: UDFToString(_col1)
+ type: string
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: UDFToLong(_col1)
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-7
+ Conditional Operator
+
+ Stage: Stage-4
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-3
+ Block level merge
+
+ Stage: Stage-5
+ Block level merge
+
+ Stage: Stage-6
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-10
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ null-subquery2:c-subquery2:a
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ null-subquery2:c-subquery2:a
+ TableScan
+ alias: a
+ HashTable Sink Operator
+ condition expressions:
+ 0 {key}
+ 1 {val}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ Position of Big Table: 1
+
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ null-subquery2:c-subquery2:b
+ TableScan
+ alias: b
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key}
+ 1 {val}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ outputColumnNames: _col0, _col5
+ Position of Big Table: 1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col5
+ type: string
+ outputColumnNames: _col0, _col5
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col5
+ type: string
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: UDFToLong(_col1)
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.outputtbl1
+
+
+PREHOOK: query: insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select /*+ mapjoin(a) */ a.key as key, b.val as values
+FROM inputTbl1 a join inputTbl1 b on a.key=b.key
+)c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select /*+ mapjoin(a) */ a.key as key, b.val as values
+FROM inputTbl1 a join inputTbl1 b on a.key=b.key
+)c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)a.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)b.FieldSchema(name:val, type:string, comment:null), ]
+PREHOOK: query: desc formatted outputTbl1
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: desc formatted outputTbl1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)a.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)b.FieldSchema(name:val, type:string, comment:null), ]
+# col_name data_type comment
+
+key string None
+values bigint None
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Protect Mode: None
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: select * from outputTbl1 order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)a.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)b.FieldSchema(name:val, type:string, comment:null), ]
+1 1
+1 11
+2 1
+2 12
+3 1
+3 13
+7 1
+7 17
+8 1
+8 1
+8 18
+8 18
+8 28
+8 28
Added: hive/trunk/ql/src/test/results/clientpositive/union_remove_13.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/union_remove_13.q.out?rev=1403928&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/union_remove_13.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/union_remove_13.q.out Tue Oct 30 23:39:17 2012
@@ -0,0 +1,355 @@
+PREHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 subqueries is performed (one of which is a mapred query, and the
+-- other one is a map-join query), followed by select star and a file sink.
+-- The union selectstar optimization should be performed, and the union should be removed.
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+-- The final file format is different from the input and intermediate file format.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- on
+
+create table inputTbl1(key string, val string) stored as textfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 subqueries is performed (one of which is a mapred query, and the
+-- other one is a map-join query), followed by select star and a file sink.
+-- The union selectstar optimization should be performed, and the union should be removed.
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+-- The final file format is different from the input and intermediate file format.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- on
+
+create table inputTbl1(key string, val string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table outputTbl1(key string, values bigint) stored as rcfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table outputTbl1(key string, values bigint) stored as rcfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@outputTbl1
+PREHOOK: query: load data local inpath '../data/files/T1.txt' into table inputTbl1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: load data local inpath '../data/files/T1.txt' into table inputTbl1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@inputtbl1
+PREHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, count(1) as values from inputTbl1 group by key
+union all
+select /*+ mapjoin(a) */ a.key as key, b.val as values
+FROM inputTbl1 a join inputTbl1 b on a.key=b.key
+)c
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, count(1) as values from inputTbl1 group by key
+union all
+select /*+ mapjoin(a) */ a.key as key, b.val as values
+FROM inputTbl1 a join inputTbl1 b on a.key=b.key
+)c
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1) values)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME inputTbl1) a) (TOK_TABREF (TOK_TABNAME inputTbl1) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) val) values))))) c)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+ Stage-9 is a root stage
+ Stage-7 depends on stages: Stage-2, Stage-9 , consists of Stage-4, Stage-3, Stage-5
+ Stage-4
+ Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
+ Stage-3
+ Stage-5
+ Stage-6 depends on stages: Stage-5
+ Stage-10 is a root stage
+ Stage-1 depends on stages: Stage-10
+ Stage-2 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-9
+ Map Reduce
+ Alias -> Map Operator Tree:
+ null-subquery1:c-subquery1:inputtbl1
+ TableScan
+ alias: inputtbl1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ outputColumnNames: key
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: UDFToString(_col1)
+ type: string
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: UDFToLong(_col1)
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-7
+ Conditional Operator
+
+ Stage: Stage-4
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-3
+ Block level merge
+
+ Stage: Stage-5
+ Block level merge
+
+ Stage: Stage-6
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-10
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ null-subquery2:c-subquery2:a
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ null-subquery2:c-subquery2:a
+ TableScan
+ alias: a
+ HashTable Sink Operator
+ condition expressions:
+ 0 {key}
+ 1 {val}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ Position of Big Table: 1
+
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ null-subquery2:c-subquery2:b
+ TableScan
+ alias: b
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key}
+ 1 {val}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ outputColumnNames: _col0, _col5
+ Position of Big Table: 1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col5
+ type: string
+ outputColumnNames: _col0, _col5
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col5
+ type: string
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: UDFToLong(_col1)
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.outputtbl1
+
+
+PREHOOK: query: insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, count(1) as values from inputTbl1 group by key
+union all
+select /*+ mapjoin(a) */ a.key as key, b.val as values
+FROM inputTbl1 a join inputTbl1 b on a.key=b.key
+)c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, count(1) as values from inputTbl1 group by key
+union all
+select /*+ mapjoin(a) */ a.key as key, b.val as values
+FROM inputTbl1 a join inputTbl1 b on a.key=b.key
+)c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)a.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)b.FieldSchema(name:val, type:string, comment:null), (inputtbl1)inputtbl1.null, ]
+PREHOOK: query: desc formatted outputTbl1
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: desc formatted outputTbl1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)a.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)b.FieldSchema(name:val, type:string, comment:null), (inputtbl1)inputtbl1.null, ]
+# col_name data_type comment
+
+key string None
+values bigint None
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Protect Mode: None
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: select * from outputTbl1 order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)a.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)b.FieldSchema(name:val, type:string, comment:null), (inputtbl1)inputtbl1.null, ]
+1 1
+1 11
+2 1
+2 12
+3 1
+3 13
+7 1
+7 17
+8 2
+8 18
+8 18
+8 28
+8 28
Added: hive/trunk/ql/src/test/results/clientpositive/union_remove_14.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/union_remove_14.q.out?rev=1403928&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/union_remove_14.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/union_remove_14.q.out Tue Oct 30 23:39:17 2012
@@ -0,0 +1,452 @@
+PREHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 subqueries is performed (one of which is a map-only query, and the
+-- other one contains a join, which should be performed as a map-join query at runtime),
+-- followed by select star and a file sink.
+-- The union selectstar optimization should be performed, and the union should be removed.
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+-- The final file format is different from the input and intermediate file format.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- on
+
+create table inputTbl1(key string, val string) stored as textfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 subqueries is performed (one of which is a map-only query, and the
+-- other one contains a join, which should be performed as a map-join query at runtime),
+-- followed by select star and a file sink.
+-- The union selectstar optimization should be performed, and the union should be removed.
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+-- The final file format is different from the input and intermediate file format.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- on
+
+create table inputTbl1(key string, val string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table outputTbl1(key string, values bigint) stored as rcfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table outputTbl1(key string, values bigint) stored as rcfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@outputTbl1
+PREHOOK: query: load data local inpath '../data/files/T1.txt' into table inputTbl1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: load data local inpath '../data/files/T1.txt' into table inputTbl1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@inputtbl1
+PREHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select a.key as key, b.val as values
+FROM inputTbl1 a join inputTbl1 b on a.key=b.key
+)c
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select a.key as key, b.val as values
+FROM inputTbl1 a join inputTbl1 b on a.key=b.key
+)c
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 1 values)))) (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME inputTbl1) a) (TOK_TABREF (TOK_TABNAME inputTbl1) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) val) values))))) c)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+ Stage-8 is a root stage
+ Stage-6 depends on stages: Stage-1, Stage-8, Stage-9, Stage-10 , consists of Stage-3, Stage-2, Stage-4
+ Stage-3
+ Stage-0 depends on stages: Stage-3, Stage-2, Stage-5
+ Stage-2
+ Stage-4
+ Stage-5 depends on stages: Stage-4
+ Stage-11 is a root stage , consists of Stage-12, Stage-13, Stage-1
+ Stage-12 has a backup stage: Stage-1
+ Stage-9 depends on stages: Stage-12
+ Stage-13 has a backup stage: Stage-1
+ Stage-10 depends on stages: Stage-13
+ Stage-1
+
+STAGE PLANS:
+ Stage: Stage-8
+ Map Reduce
+ Alias -> Map Operator Tree:
+ null-subquery1:c-subquery1:inputtbl1
+ TableScan
+ alias: inputtbl1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: 1
+ type: int
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: UDFToString(_col1)
+ type: string
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: UDFToLong(_col1)
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-6
+ Conditional Operator
+
+ Stage: Stage-3
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-2
+ Block level merge
+
+ Stage: Stage-4
+ Block level merge
+
+ Stage: Stage-5
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-11
+ Conditional Operator
+
+ Stage: Stage-12
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ null-subquery2:c-subquery2:b
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ null-subquery2:c-subquery2:b
+ TableScan
+ alias: b
+ HashTable Sink Operator
+ condition expressions:
+ 0 {key}
+ 1 {val}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ Position of Big Table: 0
+
+ Stage: Stage-9
+ Map Reduce
+ Alias -> Map Operator Tree:
+ null-subquery2:c-subquery2:a
+ TableScan
+ alias: a
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key}
+ 1 {val}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ outputColumnNames: _col0, _col5
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col5
+ type: string
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: UDFToLong(_col1)
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.outputtbl1
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-13
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ null-subquery2:c-subquery2:a
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ null-subquery2:c-subquery2:a
+ TableScan
+ alias: a
+ HashTable Sink Operator
+ condition expressions:
+ 0 {key}
+ 1 {val}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ Position of Big Table: 1
+
+ Stage: Stage-10
+ Map Reduce
+ Alias -> Map Operator Tree:
+ null-subquery2:c-subquery2:b
+ TableScan
+ alias: b
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key}
+ 1 {val}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ outputColumnNames: _col0, _col5
+ Position of Big Table: 1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col5
+ type: string
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: UDFToLong(_col1)
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.outputtbl1
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ null-subquery2:c-subquery2:a
+ TableScan
+ alias: a
+ Reduce Output Operator
+ key expressions:
+ expr: key
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: key
+ type: string
+ tag: 0
+ value expressions:
+ expr: key
+ type: string
+ null-subquery2:c-subquery2:b
+ TableScan
+ alias: b
+ Reduce Output Operator
+ key expressions:
+ expr: key
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: key
+ type: string
+ tag: 1
+ value expressions:
+ expr: val
+ type: string
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0}
+ 1 {VALUE._col1}
+ handleSkewJoin: false
+ outputColumnNames: _col0, _col5
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col5
+ type: string
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: UDFToLong(_col1)
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.outputtbl1
+
+
+PREHOOK: query: insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select a.key as key, b.val as values
+FROM inputTbl1 a join inputTbl1 b on a.key=b.key
+)c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select a.key as key, b.val as values
+FROM inputTbl1 a join inputTbl1 b on a.key=b.key
+)c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)a.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)b.FieldSchema(name:val, type:string, comment:null), ]
+PREHOOK: query: desc formatted outputTbl1
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: desc formatted outputTbl1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)a.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)b.FieldSchema(name:val, type:string, comment:null), ]
+# col_name data_type comment
+
+key string None
+values bigint None
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Protect Mode: None
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: select * from outputTbl1 order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)a.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)b.FieldSchema(name:val, type:string, comment:null), ]
+1 1
+1 11
+2 1
+2 12
+3 1
+3 13
+7 1
+7 17
+8 1
+8 1
+8 18
+8 18
+8 28
+8 28
Added: hive/trunk/ql/src/test/results/clientpositive/union_remove_15.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/union_remove_15.q.out?rev=1403928&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/union_remove_15.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/union_remove_15.q.out Tue Oct 30 23:39:17 2012
@@ -0,0 +1,327 @@
+PREHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 map-reduce subqueries is performed followed by select star and a file sink
+-- and the results are written to a table using dynamic partitions.
+-- There is no need to write the temporary results of the sub-queries, and then read them
+-- again to process the union. The union can be removed completely.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- off
+-- This tests demonstrates that this optimization works in the presence of dynamic partitions.
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 map-reduce subqueries is performed followed by select star and a file sink
+-- and the results are written to a table using dynamic partitions.
+-- There is no need to write the temporary results of the sub-queries, and then read them
+-- again to process the union. The union can be removed completely.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- off
+-- This tests demonstrates that this optimization works in the presence of dynamic partitions.
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table outputTbl1(key string, values bigint) partitioned by (ds string) stored as rcfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table outputTbl1(key string, values bigint) partitioned by (ds string) stored as rcfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@outputTbl1
+PREHOOK: query: load data local inpath '../data/files/T1.txt' into table inputTbl1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: load data local inpath '../data/files/T1.txt' into table inputTbl1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@inputtbl1
+PREHOOK: query: explain
+insert overwrite table outputTbl1 partition (ds)
+SELECT *
+FROM (
+ SELECT key, count(1) as values, '1' as ds from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as values, '2' as ds from inputTbl1 group by key
+) a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table outputTbl1 partition (ds)
+SELECT *
+FROM (
+ SELECT key, count(1) as values, '1' as ds from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as values, '2' as ds from inputTbl1 group by key
+) a
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1) values) (TOK_SELEXPR '1' ds)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1) values) (TOK_SELEXPR '2' ds)) (TOK_GROUPBY (TOK_TABLE_OR_COL key))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1) (TOK_PARTSPEC (TOK_PARTVAL ds)))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1, Stage-2
+ Stage-2 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ null-subquery2:a-subquery2:inputtbl1
+ TableScan
+ alias: inputtbl1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ outputColumnNames: key
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ expr: '2'
+ type: string
+ outputColumnNames: _col0, _col1, _col2
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ expr: _col2
+ type: string
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds
+ replace: true
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+ null-subquery1:a-subquery1:inputtbl1
+ TableScan
+ alias: inputtbl1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ outputColumnNames: key
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ expr: '1'
+ type: string
+ outputColumnNames: _col0, _col1, _col2
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ expr: _col2
+ type: string
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.outputtbl1
+
+
+PREHOOK: query: insert overwrite table outputTbl1 partition (ds)
+SELECT *
+FROM (
+ SELECT key, count(1) as values, '1' as ds from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as values, '2' as ds from inputTbl1 group by key
+) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: insert overwrite table outputTbl1 partition (ds)
+SELECT *
+FROM (
+ SELECT key, count(1) as values, '1' as ds from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as values, '2' as ds from inputTbl1 group by key
+) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1@ds=1
+POSTHOOK: Output: default@outputtbl1@ds=2
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=1).key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=1).values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=2).key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=2).values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+PREHOOK: query: desc formatted outputTbl1
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: desc formatted outputTbl1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=1).key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=1).values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=2).key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=2).values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+# col_name data_type comment
+
+key string None
+values bigint None
+
+# Partition Information
+# col_name data_type comment
+
+ds string None
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Protect Mode: None
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: show partitions outputTbl1
+PREHOOK: type: SHOWPARTITIONS
+POSTHOOK: query: show partitions outputTbl1
+POSTHOOK: type: SHOWPARTITIONS
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=1).key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=1).values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=2).key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=2).values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+ds=1
+ds=2
+PREHOOK: query: select * from outputTbl1 where ds = '1' order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 where ds = '1' order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=1).key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=1).values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=2).key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=2).values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+1 1 1
+2 1 1
+3 1 1
+7 1 1
+8 2 1
+PREHOOK: query: select * from outputTbl1 where ds = '2' order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1@ds=2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 where ds = '2' order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1@ds=2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=1).key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=1).values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=2).key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=2).values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+1 1 2
+2 1 2
+3 1 2
+7 1 2
+8 2 2