You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ke...@apache.org on 2012/10/31 00:39:19 UTC
svn commit: r1403928 [2/7] - in /hive/trunk: common/src/java/org/apache/hadoop/hive/conf/ conf/ ql/src/java/org/apache/hadoop/hive/ql/ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/metadata/ ql/src/java/org/apache/ha...

Added: hive/trunk/ql/src/test/queries/clientpositive/union_remove_2.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_2.q?rev=1403928&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_2.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_2.q Tue Oct 30 23:39:17 2012
@@ -0,0 +1,50 @@
+set hive.stats.autogather=false;
+set hive.optimize.union.remove=true;
+set hive.mapred.supports.subdirectories=true;
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+set mapred.input.dir.recursive=true;
+
+-- This is to test the union->selectstar->filesink optimization
+-- Union of 3 subqueries is performed (exactly one of which requires a map-reduce job)
+-- followed by select star and a file sink.
+-- There is no need to write the temporary results of the sub-queries, and then read them 
+-- again to process the union. The union can be removed completely.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- off
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile;
+create table outputTbl1(key string, values bigint) stored as textfile;
+
+load data local inpath '../data/files/T1.txt' into table inputTbl1;
+
+explain
+insert overwrite table outputTbl1
+SELECT *
+FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, 1 as values from inputTbl1
+  UNION ALL
+  SELECT key, 2 as values from inputTbl1
+) a;
+
+insert overwrite table outputTbl1
+SELECT *
+FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, 1 as values from inputTbl1
+  UNION ALL
+  SELECT key, 2 as values from inputTbl1
+) a;
+
+desc formatted outputTbl1;
+
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+select * from outputTbl1 order by key, values;
+

Added: hive/trunk/ql/src/test/queries/clientpositive/union_remove_20.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_20.q?rev=1403928&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_20.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_20.q Tue Oct 30 23:39:17 2012
@@ -0,0 +1,45 @@
+set hive.stats.autogather=false;
+set hive.optimize.union.remove=true;
+set hive.mapred.supports.subdirectories=true;
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+set mapred.input.dir.recursive=true;
+
+-- This is to test the union->selectstar->filesink optimization
+-- Union of 2 map-reduce subqueries is performed followed by select and a file sink
+-- However, the order of the columns in the select list is different. So, union cannot
+-- be removed.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- off
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23. The union is removed, the select (which changes the order of
+-- columns being selected) is pushed above the union.
+
+create table inputTbl1(key string, val string) stored as textfile;
+create table outputTbl1(values bigint, key string) stored as textfile;
+
+load data local inpath '../data/files/T1.txt' into table inputTbl1;
+
+explain
+insert overwrite table outputTbl1
+SELECT a.values, a.key
+FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+) a;
+
+insert overwrite table outputTbl1
+SELECT a.values, a.key
+FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+) a;
+
+desc formatted outputTbl1;
+
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+select * from outputTbl1 order by key, values;

Added: hive/trunk/ql/src/test/queries/clientpositive/union_remove_21.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_21.q?rev=1403928&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_21.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_21.q Tue Oct 30 23:39:17 2012
@@ -0,0 +1,45 @@
+set hive.stats.autogather=false;
+set hive.optimize.union.remove=true;
+set hive.mapred.supports.subdirectories=true;
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+set mapred.input.dir.recursive=true;
+
+-- This is to test the union->selectstar->filesink optimization
+-- Union of 2 map-reduce subqueries is performed followed by select and a file sink
+-- However, all the columns are not selected. So, union cannot
+-- be removed.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- off
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23. The union is removed, the select (which changes the order of
+-- columns being selected) is pushed above the union.
+
+create table inputTbl1(key string, val string) stored as textfile;
+create table outputTbl1(key string) stored as textfile;
+
+load data local inpath '../data/files/T1.txt' into table inputTbl1;
+
+explain
+insert overwrite table outputTbl1
+SELECT a.key
+FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+) a;
+
+insert overwrite table outputTbl1
+SELECT a.key
+FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+) a;
+
+desc formatted outputTbl1;
+
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+select * from outputTbl1 order by key;

Added: hive/trunk/ql/src/test/queries/clientpositive/union_remove_22.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_22.q?rev=1403928&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_22.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_22.q Tue Oct 30 23:39:17 2012
@@ -0,0 +1,63 @@
+set hive.stats.autogather=false;
+set hive.optimize.union.remove=true;
+set hive.mapred.supports.subdirectories=true;
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+set mapred.input.dir.recursive=true;
+
+-- This is to test the union->selectstar->filesink optimization
+-- Union of 2 map-reduce subqueries is performed followed by select and a file sink
+-- However, some columns are repeated. So, union cannot be removed.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- off
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23. The union is removed, the select (which selects columns from
+-- both the sub-qeuries of the union) is pushed above the union.
+
+create table inputTbl1(key string, val string) stored as textfile;
+create table outputTbl1(key string, values bigint, values2 bigint) stored as textfile;
+
+load data local inpath '../data/files/T1.txt' into table inputTbl1;
+
+explain
+insert overwrite table outputTbl1
+SELECT a.key, a.values, a.values
+FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+) a;
+
+insert overwrite table outputTbl1
+SELECT a.key, a.values, a.values
+FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+) a;
+
+desc formatted outputTbl1;
+
+select * from outputTbl1;
+
+explain
+insert overwrite table outputTbl1
+SELECT a.key, concat(a.values, a.values), concat(a.values, a.values)
+FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+) a;
+
+insert overwrite table outputTbl1
+SELECT a.key, concat(a.values, a.values), concat(a.values, a.values)
+FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+) a;
+
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+select * from outputTbl1 order by key, values;

Added: hive/trunk/ql/src/test/queries/clientpositive/union_remove_23.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_23.q?rev=1403928&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_23.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_23.q Tue Oct 30 23:39:17 2012
@@ -0,0 +1,47 @@
+set hive.stats.autogather=false;
+set hive.optimize.union.remove=true;
+set hive.mapred.supports.subdirectories=true;
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+set mapred.input.dir.recursive=true;
+
+-- This is to test the union->selectstar->filesink optimization
+-- Union of 2 map-reduce subqueries is performed followed by select star and a file sink
+-- There is no need to write the temporary results of the sub-queries, and then read them 
+-- again to process the union. The union can be removed completely. One of the sub-queries
+-- would have multiple map-reduce jobs.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- off
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile;
+create table outputTbl1(key string, values bigint) stored as textfile;
+
+load data local inpath '../data/files/T1.txt' into table inputTbl1;
+
+explain
+insert overwrite table outputTbl1
+SELECT *
+FROM (
+  SELECT key, count(1) as values from  
+  (SELECT a.key, b.val from inputTbl1 a join inputTbl1 b on a.key=b.key) subq group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+) subq2;
+
+insert overwrite table outputTbl1
+SELECT *
+FROM (
+  SELECT key, count(1) as values from  
+  (SELECT a.key, b.val from inputTbl1 a join inputTbl1 b on a.key=b.key) subq group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+) subq2;
+
+desc formatted outputTbl1;
+
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+select * from outputTbl1 order by key, values;

Added: hive/trunk/ql/src/test/queries/clientpositive/union_remove_24.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_24.q?rev=1403928&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_24.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_24.q Tue Oct 30 23:39:17 2012
@@ -0,0 +1,43 @@
+set hive.stats.autogather=false;
+set hive.optimize.union.remove=true;
+set hive.mapred.supports.subdirectories=true;
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+set mapred.input.dir.recursive=true;
+
+-- This is to test the union->selectstar->filesink optimization
+-- Union of 2 map-reduce subqueries is performed followed by select star and a file sink
+-- There is no need to write the temporary results of the sub-queries, and then read them 
+-- again to process the union. The union can be removed completely.
+-- One sub-query has a double and the other sub-query has a bigint.
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile;
+create table outputTbl1(key double, values bigint) stored as textfile;
+
+load data local inpath '../data/files/T1.txt' into table inputTbl1;
+
+EXPLAIN
+INSERT OVERWRITE TABLE outputTbl1
+SELECT * FROM
+(
+  SELECT CAST(key AS DOUBLE) AS key, count(1) as values FROM inputTbl1 group by key
+  UNION ALL
+  SELECT CAST(key AS BIGINT) AS key, count(1) as values FROM inputTbl1 group by key
+) a;
+
+INSERT OVERWRITE TABLE outputTbl1
+SELECT * FROM
+(
+  SELECT CAST(key AS DOUBLE) AS key, count(1) as values FROM inputTbl1 group by key
+  UNION ALL
+  SELECT CAST(key AS BIGINT) AS key, count(1) as values FROM inputTbl1 group by key
+) a;
+
+desc formatted outputTbl1;
+
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+select * from outputTbl1 order by key, values;

Added: hive/trunk/ql/src/test/queries/clientpositive/union_remove_3.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_3.q?rev=1403928&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_3.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_3.q Tue Oct 30 23:39:17 2012
@@ -0,0 +1,50 @@
+set hive.stats.autogather=false;
+set hive.optimize.union.remove=true;
+set hive.mapred.supports.subdirectories=true;
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+set mapred.input.dir.recursive=true;
+
+-- This is to test the union->remove->filesink optimization
+-- Union of 3 subqueries is performed (all of which are map-only queries)
+-- followed by select star and a file sink.
+-- There is no need for any optimization, since the whole query can be processed in
+-- a single map-only job
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- off
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile;
+create table outputTbl1(key string, values bigint) stored as textfile;
+
+load data local inpath '../data/files/T1.txt' into table inputTbl1;
+
+explain
+insert overwrite table outputTbl1
+SELECT *
+FROM (
+  SELECT key, 1 as values from inputTbl1
+  UNION ALL
+  SELECT key, 2 as values from inputTbl1
+  UNION ALL
+  SELECT key, 3 as values from inputTbl1
+) a;
+
+insert overwrite table outputTbl1
+SELECT *
+FROM (
+  SELECT key, 1 as values from inputTbl1
+  UNION ALL
+  SELECT key, 2 as values from inputTbl1
+  UNION ALL
+  SELECT key, 3 as values from inputTbl1
+) a;
+
+desc formatted outputTbl1;
+
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+select * from outputTbl1 order by key, values;
+

Added: hive/trunk/ql/src/test/queries/clientpositive/union_remove_4.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_4.q?rev=1403928&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_4.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_4.q Tue Oct 30 23:39:17 2012
@@ -0,0 +1,45 @@
+set hive.stats.autogather=false;
+set hive.optimize.union.remove=true;
+set hive.mapred.supports.subdirectories=true;
+
+set hive.merge.mapfiles=true;
+set hive.merge.mapredfiles=true;
+set mapred.input.dir.recursive=true;
+set hive.merge.smallfiles.avgsize=1;
+
+-- This is to test the union->selectstar->filesink optimization
+-- Union of 2 map-reduce subqueries is performed followed by select star and a file sink
+-- There is no need to write the temporary results of the sub-queries, and then read them 
+-- again to process the union. The union can be removed completely.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- on
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile;
+create table outputTbl1(key string, values bigint) stored as textfile;
+
+load data local inpath '../data/files/T1.txt' into table inputTbl1;
+
+explain
+insert overwrite table outputTbl1
+SELECT *
+FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+) a;
+
+insert overwrite table outputTbl1
+SELECT *
+FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+) a;
+
+desc formatted outputTbl1;
+
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+select * from outputTbl1 order by key, values;

Added: hive/trunk/ql/src/test/queries/clientpositive/union_remove_5.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_5.q?rev=1403928&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_5.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_5.q Tue Oct 30 23:39:17 2012
@@ -0,0 +1,51 @@
+set hive.stats.autogather=false;
+set hive.optimize.union.remove=true;
+set hive.mapred.supports.subdirectories=true;
+
+set hive.merge.mapfiles=true;
+set hive.merge.mapredfiles=true;
+set hive.merge.smallfiles.avgsize=1;
+set mapred.input.dir.recursive=true;
+
+-- This is to test the union->selectstar->filesink optimization
+-- Union of 3 subqueries is performed (exactly one of which requires a map-reduce job)
+-- followed by select star and a file sink.
+-- There is no need to write the temporary results of the sub-queries, and then read them 
+-- again to process the union. The union can be removed completely.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- on
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile;
+create table outputTbl1(key string, values bigint) stored as textfile;
+
+load data local inpath '../data/files/T1.txt' into table inputTbl1;
+
+explain
+insert overwrite table outputTbl1
+SELECT *
+FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, 1 as values from inputTbl1
+  UNION ALL
+  SELECT key, 2 as values from inputTbl1
+) a;
+
+insert overwrite table outputTbl1
+SELECT *
+FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, 1 as values from inputTbl1
+  UNION ALL
+  SELECT key, 2 as values from inputTbl1
+) a;
+
+desc formatted outputTbl1;
+
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+select * from outputTbl1 order by key, values;

Added: hive/trunk/ql/src/test/queries/clientpositive/union_remove_6.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_6.q?rev=1403928&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_6.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_6.q Tue Oct 30 23:39:17 2012
@@ -0,0 +1,41 @@
+set hive.stats.autogather=false;
+set hive.optimize.union.remove=true;
+set hive.mapred.supports.subdirectories=true;
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+set mapred.input.dir.recursive=true;
+
+-- This is to test the union->selectstar->filesink optimization
+-- Union of 2 subqueries is performed (all of which are mapred queries)
+-- followed by select star and a file sink in 2 output tables.
+-- The optimiaztion does not take affect since it is a multi-table insert.
+-- It does not matter, whether the output is merged or not. In this case,
+-- merging is turned off
+
+create table inputTbl1(key string, val string) stored as textfile;
+create table outputTbl1(key string, values bigint) stored as textfile;
+create table outputTbl2(key string, values bigint) stored as textfile;
+
+load data local inpath '../data/files/T1.txt' into table inputTbl1;
+
+explain
+FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+) a
+insert overwrite table outputTbl1 select *
+insert overwrite table outputTbl2 select *;
+
+FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+) a
+insert overwrite table outputTbl1 select *
+insert overwrite table outputTbl2 select *;
+
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+select * from outputTbl1 order by key, values;
+select * from outputTbl2 order by key, values;;

Added: hive/trunk/ql/src/test/queries/clientpositive/union_remove_7.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_7.q?rev=1403928&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_7.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_7.q Tue Oct 30 23:39:17 2012
@@ -0,0 +1,46 @@
+set hive.stats.autogather=false;
+set hive.optimize.union.remove=true;
+set hive.mapred.supports.subdirectories=true;
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+set mapred.input.dir.recursive=true;
+
+-- This is to test the union->selectstar->filesink optimization
+-- Union of 2 map-reduce subqueries is performed followed by select star and a file sink
+-- There is no need to write the temporary results of the sub-queries, and then read them 
+-- again to process the union. The union can be removed completely.
+-- The final file format is different from the input and intermediate file format.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- off
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile;
+create table outputTbl1(key string, values bigint) stored as rcfile;
+
+load data local inpath '../data/files/T1.txt' into table inputTbl1;
+
+explain
+insert overwrite table outputTbl1
+SELECT *
+FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+) a;
+
+insert overwrite table outputTbl1
+SELECT *
+FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+) a;
+
+desc formatted outputTbl1;
+
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+select * from outputTbl1 order by key, values;

Added: hive/trunk/ql/src/test/queries/clientpositive/union_remove_8.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_8.q?rev=1403928&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_8.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_8.q Tue Oct 30 23:39:17 2012
@@ -0,0 +1,51 @@
+set hive.stats.autogather=false;
+set hive.optimize.union.remove=true;
+set hive.mapred.supports.subdirectories=true;
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+set mapred.input.dir.recursive=true;
+
+-- This is to test the union->selectstar->filesink optimization
+-- Union of 3 subqueries is performed (exactly one of which requires a map-reduce job)
+-- followed by select star and a file sink.
+-- There is no need to write the temporary results of the sub-queries, and then read them 
+-- again to process the union. The union can be removed completely.
+-- The final file format is different from the input and intermediate file format.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- off
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile;
+create table outputTbl1(key string, values bigint) stored as rcfile;
+
+load data local inpath '../data/files/T1.txt' into table inputTbl1;
+
+explain
+insert overwrite table outputTbl1
+SELECT *
+FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, 1 as values from inputTbl1
+  UNION ALL
+  SELECT key, 2 as values from inputTbl1
+) a;
+
+insert overwrite table outputTbl1
+SELECT *
+FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, 1 as values from inputTbl1
+  UNION ALL
+  SELECT key, 2 as values from inputTbl1
+) a;
+
+desc formatted outputTbl1;
+
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+select * from outputTbl1 order by key, values;

Added: hive/trunk/ql/src/test/queries/clientpositive/union_remove_9.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_9.q?rev=1403928&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_9.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_9.q Tue Oct 30 23:39:17 2012
@@ -0,0 +1,55 @@
+set hive.stats.autogather=false;
+set hive.optimize.union.remove=true;
+set hive.mapred.supports.subdirectories=true;
+
+set hive.merge.mapfiles=true;
+set hive.merge.mapredfiles=true;
+set hive.merge.smallfiles.avgsize=1;
+set mapred.input.dir.recursive=true;
+
+-- This is to test the union->selectstar->filesink optimization
+-- Union of 2 subqueries is performed (one of which contains a union and is map-only),
+-- and the other one is a map-reduce query followed by select star and a file sink.
+-- There is no need for the outer union.
+-- The final file format is different from the input and intermediate file format.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- on
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile;
+create table outputTbl1(key string, values bigint) stored as rcfile;
+
+load data local inpath '../data/files/T1.txt' into table inputTbl1;
+
+explain
+insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, count(1) as values from inputTbl1 group by key 
+union all
+select * FROM (
+  SELECT key, 1 as values from inputTbl1 
+  UNION ALL
+  SELECT key, 2 as values from inputTbl1
+) a
+)b;
+
+insert overwrite table outputTbl1
+SELECT * FROM
+(
+select key, count(1) as values from inputTbl1 group by key 
+union all
+select * FROM (
+  SELECT key, 1 as values from inputTbl1 
+  UNION ALL
+  SELECT key, 2 as values from inputTbl1
+) a
+)b;
+
+desc formatted outputTbl1;
+
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+select * from outputTbl1 order by key, values;

Added: hive/trunk/ql/src/test/results/clientpositive/skewjoin_union_remove_1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/skewjoin_union_remove_1.q.out?rev=1403928&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/skewjoin_union_remove_1.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/skewjoin_union_remove_1.q.out Tue Oct 30 23:39:17 2012
@@ -0,0 +1,875 @@
+PREHOOK: query: -- This is to test the union->selectstar->filesink and skewjoin optimization
+-- Union of 2 map-reduce subqueries is performed for the skew join
+-- There is no need to write the temporary results of the sub-queries, and then read them 
+-- again to process the union. The union can be removed completely.
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output, it might be easier to run the test
+-- only on hadoop 23
+
+CREATE TABLE T1(key STRING, val STRING)
+SKEWED BY (key) ON ((2)) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- This is to test the union->selectstar->filesink and skewjoin optimization
+-- Union of 2 map-reduce subqueries is performed for the skew join
+-- There is no need to write the temporary results of the sub-queries, and then read them 
+-- again to process the union. The union can be removed completely.
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output, it might be easier to run the test
+-- only on hadoop 23
+
+CREATE TABLE T1(key STRING, val STRING)
+SKEWED BY (key) ON ((2)) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@T1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@t1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@t1
+PREHOOK: query: CREATE TABLE T2(key STRING, val STRING)
+SKEWED BY (key) ON ((3)) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING)
+SKEWED BY (key) ON ((3)) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@T2
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2
+PREHOOK: type: LOAD
+PREHOOK: Output: default@t2
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@t2
+PREHOOK: query: -- a simple join query with skew on both the tables on the join key
+
+EXPLAIN
+SELECT * FROM T1 a JOIN T2 b ON a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- a simple join query with skew on both the tables on the join key
+
+EXPLAIN
+SELECT * FROM T1 a JOIN T2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME T2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-3 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        subquery1:a 
+          TableScan
+            alias: a
+            Filter Operator
+              predicate:
+                  expr: (not ((key = '2') or (key = '3')))
+                  type: boolean
+              Reduce Output Operator
+                key expressions:
+                      expr: key
+                      type: string
+                sort order: +
+                Map-reduce partition columns:
+                      expr: key
+                      type: string
+                tag: 0
+                value expressions:
+                      expr: key
+                      type: string
+                      expr: val
+                      type: string
+        subquery1:b 
+          TableScan
+            alias: b
+            Filter Operator
+              predicate:
+                  expr: (not ((key = '2') or (key = '3')))
+                  type: boolean
+              Reduce Output Operator
+                key expressions:
+                      expr: key
+                      type: string
+                sort order: +
+                Map-reduce partition columns:
+                      expr: key
+                      type: string
+                tag: 1
+                value expressions:
+                      expr: key
+                      type: string
+                      expr: val
+                      type: string
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          condition expressions:
+            0 {VALUE._col0} {VALUE._col1}
+            1 {VALUE._col0} {VALUE._col1}
+          handleSkewJoin: false
+          outputColumnNames: _col0, _col1, _col4, _col5
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: string
+                  expr: _col4
+                  type: string
+                  expr: _col5
+                  type: string
+            outputColumnNames: _col0, _col1, _col2, _col3
+            Select Operator
+              SELECT * : (no compute)
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-3
+    Map Reduce
+      Alias -> Map Operator Tree:
+        a 
+          TableScan
+            alias: a
+            Filter Operator
+              predicate:
+                  expr: ((key = '2') or (key = '3'))
+                  type: boolean
+              Reduce Output Operator
+                key expressions:
+                      expr: key
+                      type: string
+                sort order: +
+                Map-reduce partition columns:
+                      expr: key
+                      type: string
+                tag: 0
+                value expressions:
+                      expr: key
+                      type: string
+                      expr: val
+                      type: string
+        b 
+          TableScan
+            alias: b
+            Filter Operator
+              predicate:
+                  expr: ((key = '2') or (key = '3'))
+                  type: boolean
+              Reduce Output Operator
+                key expressions:
+                      expr: key
+                      type: string
+                sort order: +
+                Map-reduce partition columns:
+                      expr: key
+                      type: string
+                tag: 1
+                value expressions:
+                      expr: key
+                      type: string
+                      expr: val
+                      type: string
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          condition expressions:
+            0 {VALUE._col0} {VALUE._col1}
+            1 {VALUE._col0} {VALUE._col1}
+          handleSkewJoin: false
+          outputColumnNames: _col0, _col1, _col4, _col5
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: string
+                  expr: _col4
+                  type: string
+                  expr: _col5
+                  type: string
+            outputColumnNames: _col0, _col1, _col2, _col3
+            Select Operator
+              SELECT * : (no compute)
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: SELECT * FROM T1 a JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM T1 a JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+2	12	2	22
+3	13	3	13
+8	18	8	18
+8	18	8	18
+8	28	8	18
+8	28	8	18
+PREHOOK: query: -- test outer joins also
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- test outer joins also
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME T2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a))) (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME b))))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-3 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        subquery1:a 
+          TableScan
+            alias: a
+            Filter Operator
+              predicate:
+                  expr: (not ((key = '2') or (key = '3')))
+                  type: boolean
+              Reduce Output Operator
+                key expressions:
+                      expr: key
+                      type: string
+                sort order: +
+                Map-reduce partition columns:
+                      expr: key
+                      type: string
+                tag: 0
+                value expressions:
+                      expr: key
+                      type: string
+                      expr: val
+                      type: string
+        subquery1:b 
+          TableScan
+            alias: b
+            Filter Operator
+              predicate:
+                  expr: (not ((key = '2') or (key = '3')))
+                  type: boolean
+              Reduce Output Operator
+                key expressions:
+                      expr: key
+                      type: string
+                sort order: +
+                Map-reduce partition columns:
+                      expr: key
+                      type: string
+                tag: 1
+                value expressions:
+                      expr: key
+                      type: string
+                      expr: val
+                      type: string
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Right Outer Join0 to 1
+          condition expressions:
+            0 {VALUE._col0} {VALUE._col1}
+            1 {VALUE._col0} {VALUE._col1}
+          handleSkewJoin: false
+          outputColumnNames: _col0, _col1, _col4, _col5
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: string
+                  expr: _col4
+                  type: string
+                  expr: _col5
+                  type: string
+            outputColumnNames: _col0, _col1, _col2, _col3
+            Select Operator
+              SELECT * : (no compute)
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-3
+    Map Reduce
+      Alias -> Map Operator Tree:
+        a 
+          TableScan
+            alias: a
+            Filter Operator
+              predicate:
+                  expr: ((key = '2') or (key = '3'))
+                  type: boolean
+              Reduce Output Operator
+                key expressions:
+                      expr: key
+                      type: string
+                sort order: +
+                Map-reduce partition columns:
+                      expr: key
+                      type: string
+                tag: 0
+                value expressions:
+                      expr: key
+                      type: string
+                      expr: val
+                      type: string
+        b 
+          TableScan
+            alias: b
+            Filter Operator
+              predicate:
+                  expr: ((key = '2') or (key = '3'))
+                  type: boolean
+              Reduce Output Operator
+                key expressions:
+                      expr: key
+                      type: string
+                sort order: +
+                Map-reduce partition columns:
+                      expr: key
+                      type: string
+                tag: 1
+                value expressions:
+                      expr: key
+                      type: string
+                      expr: val
+                      type: string
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Right Outer Join0 to 1
+          condition expressions:
+            0 {VALUE._col0} {VALUE._col1}
+            1 {VALUE._col0} {VALUE._col1}
+          handleSkewJoin: false
+          outputColumnNames: _col0, _col1, _col4, _col5
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: string
+                  expr: _col4
+                  type: string
+                  expr: _col5
+                  type: string
+            outputColumnNames: _col0, _col1, _col2, _col3
+            Select Operator
+              SELECT * : (no compute)
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+NULL	NULL	4	14
+NULL	NULL	5	15
+2	12	2	22
+3	13	3	13
+8	18	8	18
+8	18	8	18
+8	28	8	18
+8	28	8	18
+PREHOOK: query: create table DEST1(key1 STRING, val1 STRING, key2 STRING, val2 STRING)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table DEST1(key1 STRING, val1 STRING, key2 STRING, val2 STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@DEST1
+PREHOOK: query: EXPLAIN
+INSERT OVERWRITE TABLE DEST1
+SELECT * FROM T1 a JOIN T2 b ON a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+INSERT OVERWRITE TABLE DEST1
+SELECT * FROM T1 a JOIN T2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME T2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1, Stage-3
+  Stage-3 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        subquery1:a 
+          TableScan
+            alias: a
+            Filter Operator
+              predicate:
+                  expr: (not ((key = '2') or (key = '3')))
+                  type: boolean
+              Reduce Output Operator
+                key expressions:
+                      expr: key
+                      type: string
+                sort order: +
+                Map-reduce partition columns:
+                      expr: key
+                      type: string
+                tag: 0
+                value expressions:
+                      expr: key
+                      type: string
+                      expr: val
+                      type: string
+        subquery1:b 
+          TableScan
+            alias: b
+            Filter Operator
+              predicate:
+                  expr: (not ((key = '2') or (key = '3')))
+                  type: boolean
+              Reduce Output Operator
+                key expressions:
+                      expr: key
+                      type: string
+                sort order: +
+                Map-reduce partition columns:
+                      expr: key
+                      type: string
+                tag: 1
+                value expressions:
+                      expr: key
+                      type: string
+                      expr: val
+                      type: string
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          condition expressions:
+            0 {VALUE._col0} {VALUE._col1}
+            1 {VALUE._col0} {VALUE._col1}
+          handleSkewJoin: false
+          outputColumnNames: _col0, _col1, _col4, _col5
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: string
+                  expr: _col4
+                  type: string
+                  expr: _col5
+                  type: string
+            outputColumnNames: _col0, _col1, _col2, _col3
+            Select Operator
+              SELECT * : (no compute)
+              File Output Operator
+                compressed: false
+                GlobalTableId: 1
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.dest1
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.dest1
+
+  Stage: Stage-3
+    Map Reduce
+      Alias -> Map Operator Tree:
+        a 
+          TableScan
+            alias: a
+            Filter Operator
+              predicate:
+                  expr: ((key = '2') or (key = '3'))
+                  type: boolean
+              Reduce Output Operator
+                key expressions:
+                      expr: key
+                      type: string
+                sort order: +
+                Map-reduce partition columns:
+                      expr: key
+                      type: string
+                tag: 0
+                value expressions:
+                      expr: key
+                      type: string
+                      expr: val
+                      type: string
+        b 
+          TableScan
+            alias: b
+            Filter Operator
+              predicate:
+                  expr: ((key = '2') or (key = '3'))
+                  type: boolean
+              Reduce Output Operator
+                key expressions:
+                      expr: key
+                      type: string
+                sort order: +
+                Map-reduce partition columns:
+                      expr: key
+                      type: string
+                tag: 1
+                value expressions:
+                      expr: key
+                      type: string
+                      expr: val
+                      type: string
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          condition expressions:
+            0 {VALUE._col0} {VALUE._col1}
+            1 {VALUE._col0} {VALUE._col1}
+          handleSkewJoin: false
+          outputColumnNames: _col0, _col1, _col4, _col5
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: string
+                  expr: _col4
+                  type: string
+                  expr: _col5
+                  type: string
+            outputColumnNames: _col0, _col1, _col2, _col3
+            Select Operator
+              SELECT * : (no compute)
+              File Output Operator
+                compressed: false
+                GlobalTableId: 1
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.dest1
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE DEST1
+SELECT * FROM T1 a JOIN T2 b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+PREHOOK: Output: default@dest1
+POSTHOOK: query: INSERT OVERWRITE TABLE DEST1
+SELECT * FROM T1 a JOIN T2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+POSTHOOK: Output: default@dest1
+POSTHOOK: Lineage: dest1.key1 SIMPLE [(t1)a.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: dest1.key2 SIMPLE [(t2)b.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: dest1.val1 SIMPLE [(t1)a.FieldSchema(name:val, type:string, comment:null), ]
+POSTHOOK: Lineage: dest1.val2 SIMPLE [(t2)b.FieldSchema(name:val, type:string, comment:null), ]
+PREHOOK: query: SELECT * FROM DEST1
+ORDER BY key1, key2, val1, val2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM DEST1
+ORDER BY key1, key2, val1, val2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: dest1.key1 SIMPLE [(t1)a.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: dest1.key2 SIMPLE [(t2)b.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: dest1.val1 SIMPLE [(t1)a.FieldSchema(name:val, type:string, comment:null), ]
+POSTHOOK: Lineage: dest1.val2 SIMPLE [(t2)b.FieldSchema(name:val, type:string, comment:null), ]
+2	12	2	22
+3	13	3	13
+8	18	8	18
+8	18	8	18
+8	28	8	18
+8	28	8	18
+PREHOOK: query: EXPLAIN
+INSERT OVERWRITE TABLE DEST1
+SELECT * FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+INSERT OVERWRITE TABLE DEST1
+SELECT * FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: dest1.key1 SIMPLE [(t1)a.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: dest1.key2 SIMPLE [(t2)b.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: dest1.val1 SIMPLE [(t1)a.FieldSchema(name:val, type:string, comment:null), ]
+POSTHOOK: Lineage: dest1.val2 SIMPLE [(t2)b.FieldSchema(name:val, type:string, comment:null), ]
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME T2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1, Stage-3
+  Stage-3 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        subquery1:a 
+          TableScan
+            alias: a
+            Filter Operator
+              predicate:
+                  expr: (not ((key = '2') or (key = '3')))
+                  type: boolean
+              Reduce Output Operator
+                key expressions:
+                      expr: key
+                      type: string
+                sort order: +
+                Map-reduce partition columns:
+                      expr: key
+                      type: string
+                tag: 0
+                value expressions:
+                      expr: key
+                      type: string
+                      expr: val
+                      type: string
+        subquery1:b 
+          TableScan
+            alias: b
+            Filter Operator
+              predicate:
+                  expr: (not ((key = '2') or (key = '3')))
+                  type: boolean
+              Reduce Output Operator
+                key expressions:
+                      expr: key
+                      type: string
+                sort order: +
+                Map-reduce partition columns:
+                      expr: key
+                      type: string
+                tag: 1
+                value expressions:
+                      expr: key
+                      type: string
+                      expr: val
+                      type: string
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Right Outer Join0 to 1
+          condition expressions:
+            0 {VALUE._col0} {VALUE._col1}
+            1 {VALUE._col0} {VALUE._col1}
+          handleSkewJoin: false
+          outputColumnNames: _col0, _col1, _col4, _col5
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: string
+                  expr: _col4
+                  type: string
+                  expr: _col5
+                  type: string
+            outputColumnNames: _col0, _col1, _col2, _col3
+            Select Operator
+              SELECT * : (no compute)
+              File Output Operator
+                compressed: false
+                GlobalTableId: 1
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.dest1
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.dest1
+
+  Stage: Stage-3
+    Map Reduce
+      Alias -> Map Operator Tree:
+        a 
+          TableScan
+            alias: a
+            Filter Operator
+              predicate:
+                  expr: ((key = '2') or (key = '3'))
+                  type: boolean
+              Reduce Output Operator
+                key expressions:
+                      expr: key
+                      type: string
+                sort order: +
+                Map-reduce partition columns:
+                      expr: key
+                      type: string
+                tag: 0
+                value expressions:
+                      expr: key
+                      type: string
+                      expr: val
+                      type: string
+        b 
+          TableScan
+            alias: b
+            Filter Operator
+              predicate:
+                  expr: ((key = '2') or (key = '3'))
+                  type: boolean
+              Reduce Output Operator
+                key expressions:
+                      expr: key
+                      type: string
+                sort order: +
+                Map-reduce partition columns:
+                      expr: key
+                      type: string
+                tag: 1
+                value expressions:
+                      expr: key
+                      type: string
+                      expr: val
+                      type: string
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Right Outer Join0 to 1
+          condition expressions:
+            0 {VALUE._col0} {VALUE._col1}
+            1 {VALUE._col0} {VALUE._col1}
+          handleSkewJoin: false
+          outputColumnNames: _col0, _col1, _col4, _col5
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: string
+                  expr: _col4
+                  type: string
+                  expr: _col5
+                  type: string
+            outputColumnNames: _col0, _col1, _col2, _col3
+            Select Operator
+              SELECT * : (no compute)
+              File Output Operator
+                compressed: false
+                GlobalTableId: 1
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.dest1
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE DEST1
+SELECT * FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+PREHOOK: Output: default@dest1
+POSTHOOK: query: INSERT OVERWRITE TABLE DEST1
+SELECT * FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+POSTHOOK: Output: default@dest1
+POSTHOOK: Lineage: dest1.key1 SIMPLE [(t1)a.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: dest1.key1 SIMPLE [(t1)a.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: dest1.key2 SIMPLE [(t2)b.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: dest1.key2 SIMPLE [(t2)b.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: dest1.val1 SIMPLE [(t1)a.FieldSchema(name:val, type:string, comment:null), ]
+POSTHOOK: Lineage: dest1.val1 SIMPLE [(t1)a.FieldSchema(name:val, type:string, comment:null), ]
+POSTHOOK: Lineage: dest1.val2 SIMPLE [(t2)b.FieldSchema(name:val, type:string, comment:null), ]
+POSTHOOK: Lineage: dest1.val2 SIMPLE [(t2)b.FieldSchema(name:val, type:string, comment:null), ]
+PREHOOK: query: SELECT * FROM DEST1
+ORDER BY key1, key2, val1, val2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM DEST1
+ORDER BY key1, key2, val1, val2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: dest1.key1 SIMPLE [(t1)a.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: dest1.key1 SIMPLE [(t1)a.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: dest1.key2 SIMPLE [(t2)b.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: dest1.key2 SIMPLE [(t2)b.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: dest1.val1 SIMPLE [(t1)a.FieldSchema(name:val, type:string, comment:null), ]
+POSTHOOK: Lineage: dest1.val1 SIMPLE [(t1)a.FieldSchema(name:val, type:string, comment:null), ]
+POSTHOOK: Lineage: dest1.val2 SIMPLE [(t2)b.FieldSchema(name:val, type:string, comment:null), ]
+POSTHOOK: Lineage: dest1.val2 SIMPLE [(t2)b.FieldSchema(name:val, type:string, comment:null), ]
+NULL	NULL	4	14
+NULL	NULL	5	15
+2	12	2	22
+3	13	3	13
+8	18	8	18
+8	18	8	18
+8	28	8	18
+8	28	8	18

Added: hive/trunk/ql/src/test/results/clientpositive/skewjoin_union_remove_2.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/skewjoin_union_remove_2.q.out?rev=1403928&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/skewjoin_union_remove_2.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/skewjoin_union_remove_2.q.out Tue Oct 30 23:39:17 2012
@@ -0,0 +1,290 @@
+PREHOOK: query: CREATE TABLE T1(key STRING, val STRING)
+SKEWED BY (key) ON ((2), (8)) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING)
+SKEWED BY (key) ON ((2), (8)) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@T1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@t1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@t1
+PREHOOK: query: CREATE TABLE T2(key STRING, val STRING)
+SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING)
+SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@T2
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2
+PREHOOK: type: LOAD
+PREHOOK: Output: default@t2
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@t2
+PREHOOK: query: CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@T3
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T3.txt' INTO TABLE T3
+PREHOOK: type: LOAD
+PREHOOK: Output: default@t3
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T3.txt' INTO TABLE T3
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@t3
+PREHOOK: query: -- This is to test the union->selectstar->filesink and skewjoin optimization
+-- Union of 3 map-reduce subqueries is performed for the skew join
+-- There is no need to write the temporary results of the sub-queries, and then read them 
+-- again to process the union. The union can be removed completely.
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table, it might be easier
+-- to run the test only on hadoop 23
+
+EXPLAIN
+SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- This is to test the union->selectstar->filesink and skewjoin optimization
+-- Union of 3 map-reduce subqueries is performed for the skew join
+-- There is no need to write the temporary results of the sub-queries, and then read them 
+-- again to process the union. The union can be removed completely.
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table, it might be easier
+-- to run the test only on hadoop 23
+
+EXPLAIN
+SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1) a) (TOK_TABREF (TOK_TABNAME T2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME T3) c) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a))) (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME b))) (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME c))))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-4 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        subquery1:a 
+          TableScan
+            alias: a
+            Filter Operator
+              predicate:
+                  expr: (not (((key = '2') or (key = '8')) or (key = '3')))
+                  type: boolean
+              Reduce Output Operator
+                key expressions:
+                      expr: key
+                      type: string
+                sort order: +
+                Map-reduce partition columns:
+                      expr: key
+                      type: string
+                tag: 0
+                value expressions:
+                      expr: key
+                      type: string
+                      expr: val
+                      type: string
+        subquery1:b 
+          TableScan
+            alias: b
+            Filter Operator
+              predicate:
+                  expr: (not (((key = '2') or (key = '8')) or (key = '3')))
+                  type: boolean
+              Reduce Output Operator
+                key expressions:
+                      expr: key
+                      type: string
+                sort order: +
+                Map-reduce partition columns:
+                      expr: key
+                      type: string
+                tag: 1
+                value expressions:
+                      expr: key
+                      type: string
+                      expr: val
+                      type: string
+        subquery1:c 
+          TableScan
+            alias: c
+            Filter Operator
+              predicate:
+                  expr: (not (((key = '2') or (key = '8')) or (key = '3')))
+                  type: boolean
+              Reduce Output Operator
+                key expressions:
+                      expr: key
+                      type: string
+                sort order: +
+                Map-reduce partition columns:
+                      expr: key
+                      type: string
+                tag: 2
+                value expressions:
+                      expr: key
+                      type: string
+                      expr: val
+                      type: string
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+               Inner Join 0 to 2
+          condition expressions:
+            0 {VALUE._col0} {VALUE._col1}
+            1 {VALUE._col0} {VALUE._col1}
+            2 {VALUE._col0} {VALUE._col1}
+          handleSkewJoin: false
+          outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: string
+                  expr: _col4
+                  type: string
+                  expr: _col5
+                  type: string
+                  expr: _col8
+                  type: string
+                  expr: _col9
+                  type: string
+            outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+            Select Operator
+              SELECT * : (no compute)
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-4
+    Map Reduce
+      Alias -> Map Operator Tree:
+        a 
+          TableScan
+            alias: a
+            Filter Operator
+              predicate:
+                  expr: (((key = '2') or (key = '8')) or (key = '3'))
+                  type: boolean
+              Reduce Output Operator
+                key expressions:
+                      expr: key
+                      type: string
+                sort order: +
+                Map-reduce partition columns:
+                      expr: key
+                      type: string
+                tag: 0
+                value expressions:
+                      expr: key
+                      type: string
+                      expr: val
+                      type: string
+        b 
+          TableScan
+            alias: b
+            Filter Operator
+              predicate:
+                  expr: (((key = '2') or (key = '8')) or (key = '3'))
+                  type: boolean
+              Reduce Output Operator
+                key expressions:
+                      expr: key
+                      type: string
+                sort order: +
+                Map-reduce partition columns:
+                      expr: key
+                      type: string
+                tag: 1
+                value expressions:
+                      expr: key
+                      type: string
+                      expr: val
+                      type: string
+        c 
+          TableScan
+            alias: c
+            Filter Operator
+              predicate:
+                  expr: (((key = '2') or (key = '8')) or (key = '3'))
+                  type: boolean
+              Reduce Output Operator
+                key expressions:
+                      expr: key
+                      type: string
+                sort order: +
+                Map-reduce partition columns:
+                      expr: key
+                      type: string
+                tag: 2
+                value expressions:
+                      expr: key
+                      type: string
+                      expr: val
+                      type: string
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+               Inner Join 0 to 2
+          condition expressions:
+            0 {VALUE._col0} {VALUE._col1}
+            1 {VALUE._col0} {VALUE._col1}
+            2 {VALUE._col0} {VALUE._col1}
+          handleSkewJoin: false
+          outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: string
+                  expr: _col4
+                  type: string
+                  expr: _col5
+                  type: string
+                  expr: _col8
+                  type: string
+                  expr: _col9
+                  type: string
+            outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+            Select Operator
+              SELECT * : (no compute)
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key
+ORDER BY a.key, b.key, c.key, a.val, b.val, c.val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+PREHOOK: Input: default@t3
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key
+ORDER BY a.key, b.key, c.key, a.val, b.val, c.val
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+POSTHOOK: Input: default@t3
+#### A masked pattern was here ####
+2	12	2	22	2	12

Added: hive/trunk/ql/src/test/results/clientpositive/union_remove_1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/union_remove_1.q.out?rev=1403928&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/union_remove_1.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/union_remove_1.q.out Tue Oct 30 23:39:17 2012
@@ -0,0 +1,277 @@
+PREHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 map-reduce subqueries is performed followed by select star and a file sink
+-- There is no need to write the temporary results of the sub-queries, and then read them 
+-- again to process the union. The union can be removed completely.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- off
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 map-reduce subqueries is performed followed by select star and a file sink
+-- There is no need to write the temporary results of the sub-queries, and then read them 
+-- again to process the union. The union can be removed completely.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- off
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table outputTbl1(key string, values bigint) stored as textfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table outputTbl1(key string, values bigint) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@outputTbl1
+PREHOOK: query: load data local inpath '../data/files/T1.txt' into table inputTbl1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: load data local inpath '../data/files/T1.txt' into table inputTbl1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@inputtbl1
+PREHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT *
+FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+) a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT *
+FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+) a
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1) values)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1) values)) (TOK_GROUPBY (TOK_TABLE_OR_COL key))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1, Stage-2
+  Stage-2 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        null-subquery2:a-subquery2:inputtbl1 
+          TableScan
+            alias: inputtbl1
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+              outputColumnNames: key
+              Group By Operator
+                aggregations:
+                      expr: count(1)
+                bucketGroup: false
+                keys:
+                      expr: key
+                      type: string
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Reduce Output Operator
+                  key expressions:
+                        expr: _col0
+                        type: string
+                  sort order: +
+                  Map-reduce partition columns:
+                        expr: _col0
+                        type: string
+                  tag: -1
+                  value expressions:
+                        expr: _col1
+                        type: bigint
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(VALUE._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: bigint
+            outputColumnNames: _col0, _col1
+            Select Operator
+              expressions:
+                    expr: _col0
+                    type: string
+                    expr: _col1
+                    type: bigint
+              outputColumnNames: _col0, _col1
+              File Output Operator
+                compressed: false
+                GlobalTableId: 1
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.outputtbl1
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.outputtbl1
+
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+        null-subquery1:a-subquery1:inputtbl1 
+          TableScan
+            alias: inputtbl1
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+              outputColumnNames: key
+              Group By Operator
+                aggregations:
+                      expr: count(1)
+                bucketGroup: false
+                keys:
+                      expr: key
+                      type: string
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Reduce Output Operator
+                  key expressions:
+                        expr: _col0
+                        type: string
+                  sort order: +
+                  Map-reduce partition columns:
+                        expr: _col0
+                        type: string
+                  tag: -1
+                  value expressions:
+                        expr: _col1
+                        type: bigint
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(VALUE._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: bigint
+            outputColumnNames: _col0, _col1
+            Select Operator
+              expressions:
+                    expr: _col0
+                    type: string
+                    expr: _col1
+                    type: bigint
+              outputColumnNames: _col0, _col1
+              File Output Operator
+                compressed: false
+                GlobalTableId: 1
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.outputtbl1
+
+
+PREHOOK: query: insert overwrite table outputTbl1
+SELECT *
+FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: insert overwrite table outputTbl1
+SELECT *
+FROM (
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+PREHOOK: query: desc formatted outputTbl1
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: desc formatted outputTbl1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	None                
+values              	bigint              	None                
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Protect Mode:       	None                	 
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: select * from outputTbl1 order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+1	1
+1	1
+2	1
+2	1
+3	1
+3	1
+7	1
+7	1
+8	2
+8	2