You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ke...@apache.org on 2012/10/31 00:39:19 UTC
svn commit: r1403928 [3/7] - in /hive/trunk: common/src/java/org/apache/hadoop/hive/conf/ conf/ ql/src/java/org/apache/hadoop/hive/ql/ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/metadata/ ql/src/java/org/apache/ha...

Added: hive/trunk/ql/src/test/results/clientpositive/union_remove_10.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/union_remove_10.q.out?rev=1403928&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/union_remove_10.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/union_remove_10.q.out Tue Oct 30 23:39:17 2012
@@ -0,0 +1,366 @@
+PREHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 subqueries is performed (one of which is a map-only query, and the
+-- other one contains a nested union where one of the sub-queries requires a map-reduce
+-- job), followed by select star and a file sink.
+-- There is no need to write the temporary results of the sub-queries, and then read them 
+-- again to process the union. The outer union can be removed completely.
+-- The final file format is different from the input and intermediate file format.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- on
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 subqueries is performed (one of which is a map-only query, and the
+-- other one contains a nested union where one of the sub-queries requires a map-reduce
+-- job), followed by select star and a file sink.
+-- There is no need to write the temporary results of the sub-queries, and then read them 
+-- again to process the union. The outer union can be removed completely.
+-- The final file format is different from the input and intermediate file format.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- on
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table outputTbl1(key string, values bigint) stored as rcfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table outputTbl1(key string, values bigint) stored as rcfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@outputTbl1
+PREHOOK: query: load data local inpath '../data/files/T1.txt' into table inputTbl1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: load data local inpath '../data/files/T1.txt' into table inputTbl1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@inputtbl1
+PREHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select * FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, 2 as values from inputTbl1
+) a
+)b
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select * FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, 2 as values from inputTbl1
+) a
+)b
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 1 values)))) (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1) values)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 2 values))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))) b)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-7 depends on stages: Stage-2, Stage-8 , consists of Stage-4, Stage-3, Stage-5
+  Stage-4
+  Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
+  Stage-3
+  Stage-5
+  Stage-6 depends on stages: Stage-5
+  Stage-8 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        null-subquery2:b-subquery2-subquery1:a-subquery1:inputtbl1 
+          TableScan
+            alias: inputtbl1
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+              outputColumnNames: key
+              Group By Operator
+                aggregations:
+                      expr: count(1)
+                bucketGroup: false
+                keys:
+                      expr: key
+                      type: string
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Reduce Output Operator
+                  key expressions:
+                        expr: _col0
+                        type: string
+                  sort order: +
+                  Map-reduce partition columns:
+                        expr: _col0
+                        type: string
+                  tag: -1
+                  value expressions:
+                        expr: _col1
+                        type: bigint
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(VALUE._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: bigint
+            outputColumnNames: _col0, _col1
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+          TableScan
+            Union
+              Select Operator
+                expressions:
+                      expr: _col0
+                      type: string
+                      expr: _col1
+                      type: bigint
+                outputColumnNames: _col0, _col1
+                Select Operator
+                  expressions:
+                        expr: _col0
+                        type: string
+                        expr: _col1
+                        type: bigint
+                  outputColumnNames: _col0, _col1
+                  File Output Operator
+                    compressed: false
+                    GlobalTableId: 1
+                    table:
+                        input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+                        name: default.outputtbl1
+        null-subquery2:b-subquery2-subquery2:a-subquery2:inputtbl1 
+          TableScan
+            alias: inputtbl1
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+                    expr: 2
+                    type: int
+              outputColumnNames: _col0, _col1
+              Select Operator
+                expressions:
+                      expr: _col0
+                      type: string
+                      expr: UDFToLong(_col1)
+                      type: bigint
+                outputColumnNames: _col0, _col1
+                Union
+                  Select Operator
+                    expressions:
+                          expr: _col0
+                          type: string
+                          expr: _col1
+                          type: bigint
+                    outputColumnNames: _col0, _col1
+                    Select Operator
+                      expressions:
+                            expr: _col0
+                            type: string
+                            expr: _col1
+                            type: bigint
+                      outputColumnNames: _col0, _col1
+                      File Output Operator
+                        compressed: false
+                        GlobalTableId: 1
+                        table:
+                            input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+                            serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+                            name: default.outputtbl1
+
+  Stage: Stage-7
+    Conditional Operator
+
+  Stage: Stage-4
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+              serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+              name: default.outputtbl1
+
+  Stage: Stage-3
+    Block level merge
+
+  Stage: Stage-5
+    Block level merge
+
+  Stage: Stage-6
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+  Stage: Stage-8
+    Map Reduce
+      Alias -> Map Operator Tree:
+        null-subquery1:b-subquery1:inputtbl1 
+          TableScan
+            alias: inputtbl1
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+                    expr: 1
+                    type: int
+              outputColumnNames: _col0, _col1
+              Select Operator
+                expressions:
+                      expr: _col0
+                      type: string
+                      expr: UDFToLong(_col1)
+                      type: bigint
+                outputColumnNames: _col0, _col1
+                Select Operator
+                  expressions:
+                        expr: _col0
+                        type: string
+                        expr: _col1
+                        type: bigint
+                  outputColumnNames: _col0, _col1
+                  File Output Operator
+                    compressed: false
+                    GlobalTableId: 1
+                    table:
+                        input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+                        name: default.outputtbl1
+
+
+PREHOOK: query: insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select * FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, 2 as values from inputTbl1
+) a
+)b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select * FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, 2 as values from inputTbl1
+) a
+)b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, ]
+PREHOOK: query: desc formatted outputTbl1
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: desc formatted outputTbl1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, ]
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	None                
+values              	bigint              	None                
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Protect Mode:       	None                	 
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe	 
+InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: select * from outputTbl1 order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, ]
+1	1
+1	1
+1	2
+2	1
+2	1
+2	2
+3	1
+3	1
+3	2
+7	1
+7	1
+7	2
+8	1
+8	1
+8	2
+8	2
+8	2

Added: hive/trunk/ql/src/test/results/clientpositive/union_remove_11.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/union_remove_11.q.out?rev=1403928&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/union_remove_11.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/union_remove_11.q.out Tue Oct 30 23:39:17 2012
@@ -0,0 +1,323 @@
+PREHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 subqueries is performed (one of which is a map-only query, and the
+-- other one contains a nested union where also contains map only sub-queries),
+-- followed by select star and a file sink.
+-- There is no need for the union optimization, since the whole query can be performed
+-- in a single map-only job
+-- The final file format is different from the input and intermediate file format.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- on
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 subqueries is performed (one of which is a map-only query, and the
+-- other one contains a nested union where also contains map only sub-queries),
+-- followed by select star and a file sink.
+-- There is no need for the union optimization, since the whole query can be performed
+-- in a single map-only job
+-- The final file format is different from the input and intermediate file format.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- on
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table outputTbl1(key string, values bigint) stored as rcfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table outputTbl1(key string, values bigint) stored as rcfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@outputTbl1
+PREHOOK: query: load data local inpath '../data/files/T1.txt' into table inputTbl1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: load data local inpath '../data/files/T1.txt' into table inputTbl1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@inputtbl1
+PREHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select * FROM (
+  SELECT key, 2 values from inputTbl1 
+  UNION ALL
+  SELECT key, 3 as values from inputTbl1
+) a
+)b
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select * FROM (
+  SELECT key, 2 values from inputTbl1 
+  UNION ALL
+  SELECT key, 3 as values from inputTbl1
+) a
+)b
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 1 values)))) (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 2 values)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 3 values))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))) b)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-6 depends on stages: Stage-1 , consists of Stage-3, Stage-2, Stage-4
+  Stage-3
+  Stage-0 depends on stages: Stage-3, Stage-2, Stage-5
+  Stage-2
+  Stage-4
+  Stage-5 depends on stages: Stage-4
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        null-subquery1:b-subquery1:inputtbl1 
+          TableScan
+            alias: inputtbl1
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+                    expr: 1
+                    type: int
+              outputColumnNames: _col0, _col1
+              Union
+                Select Operator
+                  expressions:
+                        expr: _col0
+                        type: string
+                        expr: _col1
+                        type: int
+                  outputColumnNames: _col0, _col1
+                  Select Operator
+                    expressions:
+                          expr: _col0
+                          type: string
+                          expr: UDFToLong(_col1)
+                          type: bigint
+                    outputColumnNames: _col0, _col1
+                    File Output Operator
+                      compressed: false
+                      GlobalTableId: 1
+                      table:
+                          input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+                          name: default.outputtbl1
+        null-subquery2:b-subquery2-subquery1:a-subquery1:inputtbl1 
+          TableScan
+            alias: inputtbl1
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+                    expr: 2
+                    type: int
+              outputColumnNames: _col0, _col1
+              Union
+                Select Operator
+                  expressions:
+                        expr: _col0
+                        type: string
+                        expr: _col1
+                        type: int
+                  outputColumnNames: _col0, _col1
+                  Union
+                    Select Operator
+                      expressions:
+                            expr: _col0
+                            type: string
+                            expr: _col1
+                            type: int
+                      outputColumnNames: _col0, _col1
+                      Select Operator
+                        expressions:
+                              expr: _col0
+                              type: string
+                              expr: UDFToLong(_col1)
+                              type: bigint
+                        outputColumnNames: _col0, _col1
+                        File Output Operator
+                          compressed: false
+                          GlobalTableId: 1
+                          table:
+                              input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+                              output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+                              serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+                              name: default.outputtbl1
+        null-subquery2:b-subquery2-subquery2:a-subquery2:inputtbl1 
+          TableScan
+            alias: inputtbl1
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+                    expr: 3
+                    type: int
+              outputColumnNames: _col0, _col1
+              Union
+                Select Operator
+                  expressions:
+                        expr: _col0
+                        type: string
+                        expr: _col1
+                        type: int
+                  outputColumnNames: _col0, _col1
+                  Union
+                    Select Operator
+                      expressions:
+                            expr: _col0
+                            type: string
+                            expr: _col1
+                            type: int
+                      outputColumnNames: _col0, _col1
+                      Select Operator
+                        expressions:
+                              expr: _col0
+                              type: string
+                              expr: UDFToLong(_col1)
+                              type: bigint
+                        outputColumnNames: _col0, _col1
+                        File Output Operator
+                          compressed: false
+                          GlobalTableId: 1
+                          table:
+                              input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+                              output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+                              serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+                              name: default.outputtbl1
+
+  Stage: Stage-6
+    Conditional Operator
+
+  Stage: Stage-3
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+              serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+              name: default.outputtbl1
+
+  Stage: Stage-2
+    Block level merge
+
+  Stage: Stage-4
+    Block level merge
+
+  Stage: Stage-5
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+
+PREHOOK: query: insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select * FROM (
+  SELECT key, 2 as values from inputTbl1 
+  UNION ALL
+  SELECT key, 3 as values from inputTbl1
+) a
+)b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select * FROM (
+  SELECT key, 2 as values from inputTbl1 
+  UNION ALL
+  SELECT key, 3 as values from inputTbl1
+) a
+)b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION []
+PREHOOK: query: desc formatted outputTbl1
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: desc formatted outputTbl1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION []
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	None                
+values              	bigint              	None                
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Protect Mode:       	None                	 
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe	 
+InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: select * from outputTbl1 order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION []
+1	1
+1	2
+1	3
+2	1
+2	2
+2	3
+3	1
+3	2
+3	3
+7	1
+7	2
+7	3
+8	1
+8	1
+8	2
+8	2
+8	3
+8	3

Added: hive/trunk/ql/src/test/results/clientpositive/union_remove_12.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/union_remove_12.q.out?rev=1403928&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/union_remove_12.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/union_remove_12.q.out Tue Oct 30 23:39:17 2012
@@ -0,0 +1,320 @@
+PREHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 subqueries is performed (one of which is a map-only query, and the
+-- other one is a map-join query), followed by select star and a file sink.
+-- The union optimization is applied, and the union is removed.
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+-- The final file format is different from the input and intermediate file format.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- on
+
+create table inputTbl1(key string, val string) stored as textfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 subqueries is performed (one of which is a map-only query, and the
+-- other one is a map-join query), followed by select star and a file sink.
+-- The union optimization is applied, and the union is removed.
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+-- The final file format is different from the input and intermediate file format.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- on
+
+create table inputTbl1(key string, val string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table outputTbl1(key string, values bigint) stored as rcfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table outputTbl1(key string, values bigint) stored as rcfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@outputTbl1
+PREHOOK: query: load data local inpath '../data/files/T1.txt' into table inputTbl1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: load data local inpath '../data/files/T1.txt' into table inputTbl1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@inputtbl1
+PREHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select /*+ mapjoin(a) */ a.key as key, b.val as values
+FROM inputTbl1 a join inputTbl1 b on a.key=b.key
+)c
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select /*+ mapjoin(a) */ a.key as key, b.val as values
+FROM inputTbl1 a join inputTbl1 b on a.key=b.key
+)c
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 1 values)))) (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME inputTbl1) a) (TOK_TABREF (TOK_TABNAME inputTbl1) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) val) values))))) c)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+  Stage-9 is a root stage
+  Stage-7 depends on stages: Stage-2, Stage-9 , consists of Stage-4, Stage-3, Stage-5
+  Stage-4
+  Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
+  Stage-3
+  Stage-5
+  Stage-6 depends on stages: Stage-5
+  Stage-10 is a root stage
+  Stage-1 depends on stages: Stage-10
+  Stage-2 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-9
+    Map Reduce
+      Alias -> Map Operator Tree:
+        null-subquery1:c-subquery1:inputtbl1 
+          TableScan
+            alias: inputtbl1
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+                    expr: 1
+                    type: int
+              outputColumnNames: _col0, _col1
+              Select Operator
+                expressions:
+                      expr: _col0
+                      type: string
+                      expr: UDFToString(_col1)
+                      type: string
+                outputColumnNames: _col0, _col1
+                Select Operator
+                  expressions:
+                        expr: _col0
+                        type: string
+                        expr: _col1
+                        type: string
+                  outputColumnNames: _col0, _col1
+                  Select Operator
+                    expressions:
+                          expr: _col0
+                          type: string
+                          expr: UDFToLong(_col1)
+                          type: bigint
+                    outputColumnNames: _col0, _col1
+                    File Output Operator
+                      compressed: false
+                      GlobalTableId: 1
+                      table:
+                          input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+                          name: default.outputtbl1
+
+  Stage: Stage-7
+    Conditional Operator
+
+  Stage: Stage-4
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+              serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+              name: default.outputtbl1
+
+  Stage: Stage-3
+    Block level merge
+
+  Stage: Stage-5
+    Block level merge
+
+  Stage: Stage-6
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+  Stage: Stage-10
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        null-subquery2:c-subquery2:a 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        null-subquery2:c-subquery2:a 
+          TableScan
+            alias: a
+            HashTable Sink Operator
+              condition expressions:
+                0 {key}
+                1 {val}
+              handleSkewJoin: false
+              keys:
+                0 [Column[key]]
+                1 [Column[key]]
+              Position of Big Table: 1
+
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        null-subquery2:c-subquery2:b 
+          TableScan
+            alias: b
+            Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              condition expressions:
+                0 {key}
+                1 {val}
+              handleSkewJoin: false
+              keys:
+                0 [Column[key]]
+                1 [Column[key]]
+              outputColumnNames: _col0, _col5
+              Position of Big Table: 1
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col5
+                  type: string
+            outputColumnNames: _col0, _col5
+            Select Operator
+              expressions:
+                    expr: _col0
+                    type: string
+                    expr: _col5
+                    type: string
+              outputColumnNames: _col0, _col1
+              Select Operator
+                expressions:
+                      expr: _col0
+                      type: string
+                      expr: _col1
+                      type: string
+                outputColumnNames: _col0, _col1
+                Select Operator
+                  expressions:
+                        expr: _col0
+                        type: string
+                        expr: UDFToLong(_col1)
+                        type: bigint
+                  outputColumnNames: _col0, _col1
+                  File Output Operator
+                    compressed: false
+                    GlobalTableId: 1
+                    table:
+                        input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+                        name: default.outputtbl1
+
+
+PREHOOK: query: insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select /*+ mapjoin(a) */ a.key as key, b.val as values
+FROM inputTbl1 a join inputTbl1 b on a.key=b.key
+)c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select /*+ mapjoin(a) */ a.key as key, b.val as values
+FROM inputTbl1 a join inputTbl1 b on a.key=b.key
+)c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)a.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)b.FieldSchema(name:val, type:string, comment:null), ]
+PREHOOK: query: desc formatted outputTbl1
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: desc formatted outputTbl1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)a.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)b.FieldSchema(name:val, type:string, comment:null), ]
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	None                
+values              	bigint              	None                
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Protect Mode:       	None                	 
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe	 
+InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: select * from outputTbl1 order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)a.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)b.FieldSchema(name:val, type:string, comment:null), ]
+1	1
+1	11
+2	1
+2	12
+3	1
+3	13
+7	1
+7	17
+8	1
+8	1
+8	18
+8	18
+8	28
+8	28

Added: hive/trunk/ql/src/test/results/clientpositive/union_remove_13.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/union_remove_13.q.out?rev=1403928&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/union_remove_13.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/union_remove_13.q.out Tue Oct 30 23:39:17 2012
@@ -0,0 +1,355 @@
+PREHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 subqueries is performed (one of which is a mapred query, and the
+-- other one is a map-join query), followed by select star and a file sink.
+-- The union selectstar optimization should be performed, and the union should be removed.
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+-- The final file format is different from the input and intermediate file format.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- on
+
+create table inputTbl1(key string, val string) stored as textfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 subqueries is performed (one of which is a mapred query, and the
+-- other one is a map-join query), followed by select star and a file sink.
+-- The union selectstar optimization should be performed, and the union should be removed.
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+-- The final file format is different from the input and intermediate file format.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- on
+
+create table inputTbl1(key string, val string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table outputTbl1(key string, values bigint) stored as rcfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table outputTbl1(key string, values bigint) stored as rcfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@outputTbl1
+PREHOOK: query: load data local inpath '../data/files/T1.txt' into table inputTbl1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: load data local inpath '../data/files/T1.txt' into table inputTbl1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@inputtbl1
+PREHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, count(1) as values from inputTbl1 group by key
+union all
+select /*+ mapjoin(a) */ a.key as key, b.val as values
+FROM inputTbl1 a join inputTbl1 b on a.key=b.key
+)c
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, count(1) as values from inputTbl1 group by key
+union all
+select /*+ mapjoin(a) */ a.key as key, b.val as values
+FROM inputTbl1 a join inputTbl1 b on a.key=b.key
+)c
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1) values)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME inputTbl1) a) (TOK_TABREF (TOK_TABNAME inputTbl1) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) val) values))))) c)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+  Stage-9 is a root stage
+  Stage-7 depends on stages: Stage-2, Stage-9 , consists of Stage-4, Stage-3, Stage-5
+  Stage-4
+  Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
+  Stage-3
+  Stage-5
+  Stage-6 depends on stages: Stage-5
+  Stage-10 is a root stage
+  Stage-1 depends on stages: Stage-10
+  Stage-2 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-9
+    Map Reduce
+      Alias -> Map Operator Tree:
+        null-subquery1:c-subquery1:inputtbl1 
+          TableScan
+            alias: inputtbl1
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+              outputColumnNames: key
+              Group By Operator
+                aggregations:
+                      expr: count(1)
+                bucketGroup: false
+                keys:
+                      expr: key
+                      type: string
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Reduce Output Operator
+                  key expressions:
+                        expr: _col0
+                        type: string
+                  sort order: +
+                  Map-reduce partition columns:
+                        expr: _col0
+                        type: string
+                  tag: -1
+                  value expressions:
+                        expr: _col1
+                        type: bigint
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(VALUE._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: bigint
+            outputColumnNames: _col0, _col1
+            Select Operator
+              expressions:
+                    expr: _col0
+                    type: string
+                    expr: UDFToString(_col1)
+                    type: string
+              outputColumnNames: _col0, _col1
+              Select Operator
+                expressions:
+                      expr: _col0
+                      type: string
+                      expr: _col1
+                      type: string
+                outputColumnNames: _col0, _col1
+                Select Operator
+                  expressions:
+                        expr: _col0
+                        type: string
+                        expr: UDFToLong(_col1)
+                        type: bigint
+                  outputColumnNames: _col0, _col1
+                  File Output Operator
+                    compressed: false
+                    GlobalTableId: 1
+                    table:
+                        input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+                        name: default.outputtbl1
+
+  Stage: Stage-7
+    Conditional Operator
+
+  Stage: Stage-4
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+              serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+              name: default.outputtbl1
+
+  Stage: Stage-3
+    Block level merge
+
+  Stage: Stage-5
+    Block level merge
+
+  Stage: Stage-6
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+  Stage: Stage-10
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        null-subquery2:c-subquery2:a 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        null-subquery2:c-subquery2:a 
+          TableScan
+            alias: a
+            HashTable Sink Operator
+              condition expressions:
+                0 {key}
+                1 {val}
+              handleSkewJoin: false
+              keys:
+                0 [Column[key]]
+                1 [Column[key]]
+              Position of Big Table: 1
+
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        null-subquery2:c-subquery2:b 
+          TableScan
+            alias: b
+            Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              condition expressions:
+                0 {key}
+                1 {val}
+              handleSkewJoin: false
+              keys:
+                0 [Column[key]]
+                1 [Column[key]]
+              outputColumnNames: _col0, _col5
+              Position of Big Table: 1
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col5
+                  type: string
+            outputColumnNames: _col0, _col5
+            Select Operator
+              expressions:
+                    expr: _col0
+                    type: string
+                    expr: _col5
+                    type: string
+              outputColumnNames: _col0, _col1
+              Select Operator
+                expressions:
+                      expr: _col0
+                      type: string
+                      expr: _col1
+                      type: string
+                outputColumnNames: _col0, _col1
+                Select Operator
+                  expressions:
+                        expr: _col0
+                        type: string
+                        expr: UDFToLong(_col1)
+                        type: bigint
+                  outputColumnNames: _col0, _col1
+                  File Output Operator
+                    compressed: false
+                    GlobalTableId: 1
+                    table:
+                        input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+                        name: default.outputtbl1
+
+
+PREHOOK: query: insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, count(1) as values from inputTbl1 group by key
+union all
+select /*+ mapjoin(a) */ a.key as key, b.val as values
+FROM inputTbl1 a join inputTbl1 b on a.key=b.key
+)c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, count(1) as values from inputTbl1 group by key
+union all
+select /*+ mapjoin(a) */ a.key as key, b.val as values
+FROM inputTbl1 a join inputTbl1 b on a.key=b.key
+)c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)a.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)b.FieldSchema(name:val, type:string, comment:null), (inputtbl1)inputtbl1.null, ]
+PREHOOK: query: desc formatted outputTbl1
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: desc formatted outputTbl1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)a.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)b.FieldSchema(name:val, type:string, comment:null), (inputtbl1)inputtbl1.null, ]
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	None                
+values              	bigint              	None                
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Protect Mode:       	None                	 
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe	 
+InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: select * from outputTbl1 order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)a.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)b.FieldSchema(name:val, type:string, comment:null), (inputtbl1)inputtbl1.null, ]
+1	1
+1	11
+2	1
+2	12
+3	1
+3	13
+7	1
+7	17
+8	2
+8	18
+8	18
+8	28
+8	28

Added: hive/trunk/ql/src/test/results/clientpositive/union_remove_14.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/union_remove_14.q.out?rev=1403928&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/union_remove_14.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/union_remove_14.q.out Tue Oct 30 23:39:17 2012
@@ -0,0 +1,452 @@
+PREHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 subqueries is performed (one of which is a map-only query, and the
+-- other one contains a join, which should be performed as a map-join query at runtime),
+-- followed by select star and a file sink.
+-- The union selectstar optimization should be performed, and the union should be removed.
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+-- The final file format is different from the input and intermediate file format.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- on
+
+create table inputTbl1(key string, val string) stored as textfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 subqueries is performed (one of which is a map-only query, and the
+-- other one contains a join, which should be performed as a map-join query at runtime),
+-- followed by select star and a file sink.
+-- The union selectstar optimization should be performed, and the union should be removed.
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+-- The final file format is different from the input and intermediate file format.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- on
+
+create table inputTbl1(key string, val string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table outputTbl1(key string, values bigint) stored as rcfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table outputTbl1(key string, values bigint) stored as rcfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@outputTbl1
+PREHOOK: query: load data local inpath '../data/files/T1.txt' into table inputTbl1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: load data local inpath '../data/files/T1.txt' into table inputTbl1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@inputtbl1
+PREHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select a.key as key, b.val as values
+FROM inputTbl1 a join inputTbl1 b on a.key=b.key
+)c
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select a.key as key, b.val as values
+FROM inputTbl1 a join inputTbl1 b on a.key=b.key
+)c
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 1 values)))) (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME inputTbl1) a) (TOK_TABREF (TOK_TABNAME inputTbl1) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) val) values))))) c)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+  Stage-8 is a root stage
+  Stage-6 depends on stages: Stage-1, Stage-8, Stage-9, Stage-10 , consists of Stage-3, Stage-2, Stage-4
+  Stage-3
+  Stage-0 depends on stages: Stage-3, Stage-2, Stage-5
+  Stage-2
+  Stage-4
+  Stage-5 depends on stages: Stage-4
+  Stage-11 is a root stage , consists of Stage-12, Stage-13, Stage-1
+  Stage-12 has a backup stage: Stage-1
+  Stage-9 depends on stages: Stage-12
+  Stage-13 has a backup stage: Stage-1
+  Stage-10 depends on stages: Stage-13
+  Stage-1
+
+STAGE PLANS:
+  Stage: Stage-8
+    Map Reduce
+      Alias -> Map Operator Tree:
+        null-subquery1:c-subquery1:inputtbl1 
+          TableScan
+            alias: inputtbl1
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+                    expr: 1
+                    type: int
+              outputColumnNames: _col0, _col1
+              Select Operator
+                expressions:
+                      expr: _col0
+                      type: string
+                      expr: UDFToString(_col1)
+                      type: string
+                outputColumnNames: _col0, _col1
+                Select Operator
+                  expressions:
+                        expr: _col0
+                        type: string
+                        expr: _col1
+                        type: string
+                  outputColumnNames: _col0, _col1
+                  Select Operator
+                    expressions:
+                          expr: _col0
+                          type: string
+                          expr: UDFToLong(_col1)
+                          type: bigint
+                    outputColumnNames: _col0, _col1
+                    File Output Operator
+                      compressed: false
+                      GlobalTableId: 1
+                      table:
+                          input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+                          name: default.outputtbl1
+
+  Stage: Stage-6
+    Conditional Operator
+
+  Stage: Stage-3
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+              serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+              name: default.outputtbl1
+
+  Stage: Stage-2
+    Block level merge
+
+  Stage: Stage-4
+    Block level merge
+
+  Stage: Stage-5
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+  Stage: Stage-11
+    Conditional Operator
+
+  Stage: Stage-12
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        null-subquery2:c-subquery2:b 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        null-subquery2:c-subquery2:b 
+          TableScan
+            alias: b
+            HashTable Sink Operator
+              condition expressions:
+                0 {key}
+                1 {val}
+              handleSkewJoin: false
+              keys:
+                0 [Column[key]]
+                1 [Column[key]]
+              Position of Big Table: 0
+
+  Stage: Stage-9
+    Map Reduce
+      Alias -> Map Operator Tree:
+        null-subquery2:c-subquery2:a 
+          TableScan
+            alias: a
+            Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              condition expressions:
+                0 {key}
+                1 {val}
+              handleSkewJoin: false
+              keys:
+                0 [Column[key]]
+                1 [Column[key]]
+              outputColumnNames: _col0, _col5
+              Position of Big Table: 0
+              Select Operator
+                expressions:
+                      expr: _col0
+                      type: string
+                      expr: _col5
+                      type: string
+                outputColumnNames: _col0, _col1
+                Select Operator
+                  expressions:
+                        expr: _col0
+                        type: string
+                        expr: _col1
+                        type: string
+                  outputColumnNames: _col0, _col1
+                  Select Operator
+                    expressions:
+                          expr: _col0
+                          type: string
+                          expr: UDFToLong(_col1)
+                          type: bigint
+                    outputColumnNames: _col0, _col1
+                    File Output Operator
+                      compressed: false
+                      GlobalTableId: 1
+                      table:
+                          input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+                          name: default.outputtbl1
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-13
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        null-subquery2:c-subquery2:a 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        null-subquery2:c-subquery2:a 
+          TableScan
+            alias: a
+            HashTable Sink Operator
+              condition expressions:
+                0 {key}
+                1 {val}
+              handleSkewJoin: false
+              keys:
+                0 [Column[key]]
+                1 [Column[key]]
+              Position of Big Table: 1
+
+  Stage: Stage-10
+    Map Reduce
+      Alias -> Map Operator Tree:
+        null-subquery2:c-subquery2:b 
+          TableScan
+            alias: b
+            Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              condition expressions:
+                0 {key}
+                1 {val}
+              handleSkewJoin: false
+              keys:
+                0 [Column[key]]
+                1 [Column[key]]
+              outputColumnNames: _col0, _col5
+              Position of Big Table: 1
+              Select Operator
+                expressions:
+                      expr: _col0
+                      type: string
+                      expr: _col5
+                      type: string
+                outputColumnNames: _col0, _col1
+                Select Operator
+                  expressions:
+                        expr: _col0
+                        type: string
+                        expr: _col1
+                        type: string
+                  outputColumnNames: _col0, _col1
+                  Select Operator
+                    expressions:
+                          expr: _col0
+                          type: string
+                          expr: UDFToLong(_col1)
+                          type: bigint
+                    outputColumnNames: _col0, _col1
+                    File Output Operator
+                      compressed: false
+                      GlobalTableId: 1
+                      table:
+                          input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+                          name: default.outputtbl1
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        null-subquery2:c-subquery2:a 
+          TableScan
+            alias: a
+            Reduce Output Operator
+              key expressions:
+                    expr: key
+                    type: string
+              sort order: +
+              Map-reduce partition columns:
+                    expr: key
+                    type: string
+              tag: 0
+              value expressions:
+                    expr: key
+                    type: string
+        null-subquery2:c-subquery2:b 
+          TableScan
+            alias: b
+            Reduce Output Operator
+              key expressions:
+                    expr: key
+                    type: string
+              sort order: +
+              Map-reduce partition columns:
+                    expr: key
+                    type: string
+              tag: 1
+              value expressions:
+                    expr: val
+                    type: string
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          condition expressions:
+            0 {VALUE._col0}
+            1 {VALUE._col1}
+          handleSkewJoin: false
+          outputColumnNames: _col0, _col5
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col5
+                  type: string
+            outputColumnNames: _col0, _col1
+            Select Operator
+              expressions:
+                    expr: _col0
+                    type: string
+                    expr: _col1
+                    type: string
+              outputColumnNames: _col0, _col1
+              Select Operator
+                expressions:
+                      expr: _col0
+                      type: string
+                      expr: UDFToLong(_col1)
+                      type: bigint
+                outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 1
+                  table:
+                      input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+                      name: default.outputtbl1
+
+
+PREHOOK: query: insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select a.key as key, b.val as values
+FROM inputTbl1 a join inputTbl1 b on a.key=b.key
+)c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, 1 as values from inputTbl1
+union all
+select a.key as key, b.val as values
+FROM inputTbl1 a join inputTbl1 b on a.key=b.key
+)c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)a.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)b.FieldSchema(name:val, type:string, comment:null), ]
+PREHOOK: query: desc formatted outputTbl1
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: desc formatted outputTbl1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)a.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)b.FieldSchema(name:val, type:string, comment:null), ]
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	None                
+values              	bigint              	None                
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Protect Mode:       	None                	 
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe	 
+InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: select * from outputTbl1 order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)a.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)b.FieldSchema(name:val, type:string, comment:null), ]
+1	1
+1	11
+2	1
+2	12
+3	1
+3	13
+7	1
+7	17
+8	1
+8	1
+8	18
+8	18
+8	28
+8	28

Added: hive/trunk/ql/src/test/results/clientpositive/union_remove_15.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/union_remove_15.q.out?rev=1403928&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/union_remove_15.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/union_remove_15.q.out Tue Oct 30 23:39:17 2012
@@ -0,0 +1,327 @@
+PREHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 map-reduce subqueries is performed followed by select star and a file sink
+-- and the results are written to a table using dynamic partitions.
+-- There is no need to write the temporary results of the sub-queries, and then read them 
+-- again to process the union. The union can be removed completely.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- off
+-- This tests demonstrates that this optimization works in the presence of dynamic partitions.
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 map-reduce subqueries is performed followed by select star and a file sink
+-- and the results are written to a table using dynamic partitions.
+-- There is no need to write the temporary results of the sub-queries, and then read them 
+-- again to process the union. The union can be removed completely.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- off
+-- This tests demonstrates that this optimization works in the presence of dynamic partitions.
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table outputTbl1(key string, values bigint) partitioned by (ds string) stored as rcfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table outputTbl1(key string, values bigint) partitioned by (ds string) stored as rcfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@outputTbl1
+PREHOOK: query: load data local inpath '../data/files/T1.txt' into table inputTbl1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: load data local inpath '../data/files/T1.txt' into table inputTbl1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@inputtbl1
+PREHOOK: query: explain
+insert overwrite table outputTbl1 partition (ds)
+SELECT *
+FROM (
+  SELECT key, count(1) as values, '1' as ds from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values, '2' as ds from inputTbl1 group by key
+) a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table outputTbl1 partition (ds)
+SELECT *
+FROM (
+  SELECT key, count(1) as values, '1' as ds from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values, '2' as ds from inputTbl1 group by key
+) a
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1) values) (TOK_SELEXPR '1' ds)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1) values) (TOK_SELEXPR '2' ds)) (TOK_GROUPBY (TOK_TABLE_OR_COL key))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1) (TOK_PARTSPEC (TOK_PARTVAL ds)))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1, Stage-2
+  Stage-2 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        null-subquery2:a-subquery2:inputtbl1 
+          TableScan
+            alias: inputtbl1
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+              outputColumnNames: key
+              Group By Operator
+                aggregations:
+                      expr: count(1)
+                bucketGroup: false
+                keys:
+                      expr: key
+                      type: string
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Reduce Output Operator
+                  key expressions:
+                        expr: _col0
+                        type: string
+                  sort order: +
+                  Map-reduce partition columns:
+                        expr: _col0
+                        type: string
+                  tag: -1
+                  value expressions:
+                        expr: _col1
+                        type: bigint
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(VALUE._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: bigint
+                  expr: '2'
+                  type: string
+            outputColumnNames: _col0, _col1, _col2
+            Select Operator
+              expressions:
+                    expr: _col0
+                    type: string
+                    expr: _col1
+                    type: bigint
+                    expr: _col2
+                    type: string
+              outputColumnNames: _col0, _col1, _col2
+              File Output Operator
+                compressed: false
+                GlobalTableId: 1
+                table:
+                    input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+                    name: default.outputtbl1
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          partition:
+            ds 
+          replace: true
+          table:
+              input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+              serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+              name: default.outputtbl1
+
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+        null-subquery1:a-subquery1:inputtbl1 
+          TableScan
+            alias: inputtbl1
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+              outputColumnNames: key
+              Group By Operator
+                aggregations:
+                      expr: count(1)
+                bucketGroup: false
+                keys:
+                      expr: key
+                      type: string
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Reduce Output Operator
+                  key expressions:
+                        expr: _col0
+                        type: string
+                  sort order: +
+                  Map-reduce partition columns:
+                        expr: _col0
+                        type: string
+                  tag: -1
+                  value expressions:
+                        expr: _col1
+                        type: bigint
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(VALUE._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: bigint
+                  expr: '1'
+                  type: string
+            outputColumnNames: _col0, _col1, _col2
+            Select Operator
+              expressions:
+                    expr: _col0
+                    type: string
+                    expr: _col1
+                    type: bigint
+                    expr: _col2
+                    type: string
+              outputColumnNames: _col0, _col1, _col2
+              File Output Operator
+                compressed: false
+                GlobalTableId: 1
+                table:
+                    input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+                    name: default.outputtbl1
+
+
+PREHOOK: query: insert overwrite table outputTbl1 partition (ds)
+SELECT *
+FROM (
+  SELECT key, count(1) as values, '1' as ds from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values, '2' as ds from inputTbl1 group by key
+) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: insert overwrite table outputTbl1 partition (ds)
+SELECT *
+FROM (
+  SELECT key, count(1) as values, '1' as ds from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values, '2' as ds from inputTbl1 group by key
+) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1@ds=1
+POSTHOOK: Output: default@outputtbl1@ds=2
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=1).key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=1).values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=2).key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=2).values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+PREHOOK: query: desc formatted outputTbl1
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: desc formatted outputTbl1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=1).key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=1).values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=2).key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=2).values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	None                
+values              	bigint              	None                
+	 	 
+# Partition Information	 	 
+# col_name            	data_type           	comment             
+	 	 
+ds                  	string              	None                
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Protect Mode:       	None                	 
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe	 
+InputFormat:        	org.apache.hadoop.hive.ql.io.RCFileInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.RCFileOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: show partitions outputTbl1
+PREHOOK: type: SHOWPARTITIONS
+POSTHOOK: query: show partitions outputTbl1
+POSTHOOK: type: SHOWPARTITIONS
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=1).key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=1).values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=2).key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=2).values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+ds=1
+ds=2
+PREHOOK: query: select * from outputTbl1 where ds = '1' order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 where ds = '1' order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=1).key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=1).values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=2).key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=2).values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+1	1	1
+2	1	1
+3	1	1
+7	1	1
+8	2	1
+PREHOOK: query: select * from outputTbl1 where ds = '2' order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1@ds=2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 where ds = '2' order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1@ds=2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=1).key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=1).values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=2).key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1 PARTITION(ds=2).values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+1	1	2
+2	1	2
+3	1	2
+7	1	2
+8	2	2