You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jp...@apache.org on 2015/01/08 23:41:21 UTC

svn commit: r1650409 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java test/queries/clientpositive/union_remove_6_subq.q test/results/clientpositive/union_remove_6_subq.q.out

Author: jpullokk
Date: Thu Jan  8 22:41:21 2015
New Revision: 1650409

URL: http://svn.apache.org/r1650409
Log:
HIVE-9217: UnionProcessor misses results for multi-insert when hive.optimize.union.remove=true (Pengcheng Xiong via Laljo John Pullokkaran)

Added:
    hive/trunk/ql/src/test/queries/clientpositive/union_remove_6_subq.q
    hive/trunk/ql/src/test/results/clientpositive/union_remove_6_subq.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java?rev=1650409&r1=1650408&r2=1650409&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java Thu Jan  8 22:41:21 2015
@@ -265,13 +265,20 @@ public final class UnionProcFactory {
         Operator<? extends OperatorDesc> operator =
           (Operator<? extends OperatorDesc>)stack.get(pos);
 
+        // (1) Because we have operator.supportUnionRemoveOptimization() for
+        // true only in SEL and FIL operators,
+        // this rule will actually only match UNION%(SEL%|FIL%)*FS%
+        // (2) The assumption here is that, if
+        // operator.getChildOperators().size() > 1, we are going to have
+        // multiple FS operators, i.e., multiple inserts.
+        // Current implementation does not support this. More details, please
+        // see HIVE-9217.
+        if (operator.getChildOperators() != null && operator.getChildOperators().size() > 1) {
+          return null;
+        }
         // Break if it encountered a union
         if (operator instanceof UnionOperator) {
           union = (UnionOperator)operator;
-          // No need for this optimization in case of multi-table inserts
-          if (union.getChildOperators().size() > 1) {
-            return null;
-          }
           break;
         }
 

Added: hive/trunk/ql/src/test/queries/clientpositive/union_remove_6_subq.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_6_subq.q?rev=1650409&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_6_subq.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_6_subq.q Thu Jan  8 22:41:21 2015
@@ -0,0 +1,76 @@
+set hive.stats.autogather=false;
+set hive.optimize.union.remove=true;
+set hive.mapred.supports.subdirectories=true;
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+set mapred.input.dir.recursive=true;
+
+-- This is to test the union->selectstar->filesink optimization
+-- Union of 2 subqueries is performed (all of which are mapred queries)
+-- followed by select star and a file sink in 2 output tables.
+-- The optimiaztion does not take affect since it is a multi-table insert.
+-- It does not matter, whether the output is merged or not. In this case,
+-- merging is turned off
+
+create table inputTbl1(key string, val string) stored as textfile;
+create table outputTbl1(key string, values bigint) stored as textfile;
+create table outputTbl2(key string, values bigint) stored as textfile;
+
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
+
+explain
+FROM (
+  select * from(
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+  )subq
+) a
+insert overwrite table outputTbl1 select *
+insert overwrite table outputTbl2 select *;
+
+FROM (
+  select * from(
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+  )subq
+) a
+insert overwrite table outputTbl1 select *
+insert overwrite table outputTbl2 select *;
+
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+select * from outputTbl1 order by key, values;
+select * from outputTbl2 order by key, values;
+
+-- The following queries guarantee the correctness.
+explain
+select avg(c) from(
+  SELECT count(1)-200 as c from src
+  UNION ALL
+  SELECT count(1) as c from src
+)subq;
+
+select avg(c) from(
+  SELECT count(1)-200 as c from src
+  UNION ALL
+  SELECT count(1) as c from src
+)subq;
+
+explain
+select key, avg(c) over w from(
+  SELECT key, count(1)*2 as c from src group by key
+  UNION ALL
+  SELECT key, count(1) as c from src group by key
+)subq group by key, c
+WINDOW w AS (PARTITION BY key ORDER BY c ROWS UNBOUNDED PRECEDING);
+
+select key, avg(c) over w from(
+  SELECT key, count(1)*2 as c from src group by key
+  UNION ALL
+  SELECT key, count(1) as c from src group by key
+)subq group by key, c
+WINDOW w AS (PARTITION BY key ORDER BY c ROWS UNBOUNDED PRECEDING);
+
+

Added: hive/trunk/ql/src/test/results/clientpositive/union_remove_6_subq.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/union_remove_6_subq.q.out?rev=1650409&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/union_remove_6_subq.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/union_remove_6_subq.q.out Thu Jan  8 22:41:21 2015
@@ -0,0 +1,1242 @@
+PREHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 subqueries is performed (all of which are mapred queries)
+-- followed by select star and a file sink in 2 output tables.
+-- The optimiaztion does not take affect since it is a multi-table insert.
+-- It does not matter, whether the output is merged or not. In this case,
+-- merging is turned off
+
+create table inputTbl1(key string, val string) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@inputTbl1
+POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 subqueries is performed (all of which are mapred queries)
+-- followed by select star and a file sink in 2 output tables.
+-- The optimiaztion does not take affect since it is a multi-table insert.
+-- It does not matter, whether the output is merged or not. In this case,
+-- merging is turned off
+
+create table inputTbl1(key string, val string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table outputTbl1(key string, values bigint) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@outputTbl1
+POSTHOOK: query: create table outputTbl1(key string, values bigint) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@outputTbl1
+PREHOOK: query: create table outputTbl2(key string, values bigint) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@outputTbl2
+POSTHOOK: query: create table outputTbl2(key string, values bigint) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@outputTbl2
+PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputTbl1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputTbl1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@inputtbl1
+PREHOOK: query: explain
+FROM (
+  select * from(
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+  )subq
+) a
+insert overwrite table outputTbl1 select *
+insert overwrite table outputTbl2 select *
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM (
+  select * from(
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+  )subq
+) a
+insert overwrite table outputTbl1 select *
+insert overwrite table outputTbl2 select *
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-2 is a root stage
+  Stage-3 depends on stages: Stage-2, Stage-4
+  Stage-0 depends on stages: Stage-3
+  Stage-1 depends on stages: Stage-3
+  Stage-4 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: inputtbl1
+            Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: key
+              Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+              Group By Operator
+                aggregations: count(1)
+                keys: key (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                  value expressions: _col1 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Union
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.outputtbl1
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.outputtbl2
+          TableScan
+            Union
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.outputtbl1
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.outputtbl2
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.outputtbl1
+
+  Stage: Stage-1
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.outputtbl2
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: inputtbl1
+            Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: key
+              Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+              Group By Operator
+                aggregations: count(1)
+                keys: key (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                  value expressions: _col1 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+PREHOOK: query: FROM (
+  select * from(
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+  )subq
+) a
+insert overwrite table outputTbl1 select *
+insert overwrite table outputTbl2 select *
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+PREHOOK: Output: default@outputtbl2
+POSTHOOK: query: FROM (
+  select * from(
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+  )subq
+) a
+insert overwrite table outputTbl1 select *
+insert overwrite table outputTbl2 select *
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Output: default@outputtbl2
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+POSTHOOK: Lineage: outputtbl2.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl2.values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+PREHOOK: query: select * from outputTbl1 order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+1	1
+1	1
+2	1
+2	1
+3	1
+3	1
+7	1
+7	1
+8	2
+8	2
+PREHOOK: query: select * from outputTbl2 order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl2 order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl2
+#### A masked pattern was here ####
+1	1
+1	1
+2	1
+2	1
+3	1
+3	1
+7	1
+7	1
+8	2
+8	2
+PREHOOK: query: -- The following queries guarantee the correctness.
+explain
+select avg(c) from(
+  SELECT count(1)-200 as c from src
+  UNION ALL
+  SELECT count(1) as c from src
+)subq
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The following queries guarantee the correctness.
+explain
+select avg(c) from(
+  SELECT count(1)-200 as c from src
+  UNION ALL
+  SELECT count(1) as c from src
+)subq
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1, Stage-3
+  Stage-3 is a root stage
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+            Select Operator
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+              Group By Operator
+                aggregations: count(1)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  value expressions: _col0 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+          Select Operator
+            expressions: (_col0 - 200) (type: bigint)
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Union
+              Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+              Group By Operator
+                aggregations: avg(_col0)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+                  value expressions: _col0 (type: struct<count:bigint,sum:double,input:bigint>)
+          TableScan
+            Union
+              Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+              Group By Operator
+                aggregations: avg(_col0)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+                  value expressions: _col0 (type: struct<count:bigint,sum:double,input:bigint>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: avg(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+          Select Operator
+            expressions: _col0 (type: double)
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+            Select Operator
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+              Group By Operator
+                aggregations: count(1)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  value expressions: _col0 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select avg(c) from(
+  SELECT count(1)-200 as c from src
+  UNION ALL
+  SELECT count(1) as c from src
+)subq
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select avg(c) from(
+  SELECT count(1)-200 as c from src
+  UNION ALL
+  SELECT count(1) as c from src
+)subq
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+400.0
+PREHOOK: query: explain
+select key, avg(c) over w from(
+  SELECT key, count(1)*2 as c from src group by key
+  UNION ALL
+  SELECT key, count(1) as c from src group by key
+)subq group by key, c
+WINDOW w AS (PARTITION BY key ORDER BY c ROWS UNBOUNDED PRECEDING)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key, avg(c) over w from(
+  SELECT key, count(1)*2 as c from src group by key
+  UNION ALL
+  SELECT key, count(1) as c from src group by key
+)subq group by key, c
+WINDOW w AS (PARTITION BY key ORDER BY c ROWS UNBOUNDED PRECEDING)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1, Stage-4
+  Stage-3 depends on stages: Stage-2
+  Stage-4 is a root stage
+  Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: _col0
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(1)
+                keys: _col0 (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), (_col1 * 2) (type: bigint)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Union
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: _col0 (type: string), _col1 (type: bigint)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: bigint)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+          TableScan
+            Union
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: _col0 (type: string), _col1 (type: bigint)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: bigint)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: bigint)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: bigint)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: string), _col1 (type: bigint)
+      Reduce Operator Tree:
+        Extract
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          PTF Operator
+            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: _col0 (type: string), _wcol0 (type: double)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: _col0
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(1)
+                keys: _col0 (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select key, avg(c) over w from(
+  SELECT key, count(1)*2 as c from src group by key
+  UNION ALL
+  SELECT key, count(1) as c from src group by key
+)subq group by key, c
+WINDOW w AS (PARTITION BY key ORDER BY c ROWS UNBOUNDED PRECEDING)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key, avg(c) over w from(
+  SELECT key, count(1)*2 as c from src group by key
+  UNION ALL
+  SELECT key, count(1) as c from src group by key
+)subq group by key, c
+WINDOW w AS (PARTITION BY key ORDER BY c ROWS UNBOUNDED PRECEDING)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0	3.0
+0	4.5
+10	1.0
+10	1.5
+100	2.0
+100	3.0
+103	2.0
+103	3.0
+104	2.0
+104	3.0
+105	1.0
+105	1.5
+11	1.0
+11	1.5
+111	1.0
+111	1.5
+113	2.0
+113	3.0
+114	1.0
+114	1.5
+116	1.0
+116	1.5
+118	2.0
+118	3.0
+119	3.0
+119	4.5
+12	2.0
+12	3.0
+120	2.0
+120	3.0
+125	2.0
+125	3.0
+126	1.0
+126	1.5
+128	3.0
+128	4.5
+129	2.0
+129	3.0
+131	1.0
+131	1.5
+133	1.0
+133	1.5
+134	2.0
+134	3.0
+136	1.0
+136	1.5
+137	2.0
+137	3.0
+138	4.0
+138	6.0
+143	1.0
+143	1.5
+145	1.0
+145	1.5
+146	2.0
+146	3.0
+149	2.0
+149	3.0
+15	2.0
+15	3.0
+150	1.0
+150	1.5
+152	2.0
+152	3.0
+153	1.0
+153	1.5
+155	1.0
+155	1.5
+156	1.0
+156	1.5
+157	1.0
+157	1.5
+158	1.0
+158	1.5
+160	1.0
+160	1.5
+162	1.0
+162	1.5
+163	1.0
+163	1.5
+164	2.0
+164	3.0
+165	2.0
+165	3.0
+166	1.0
+166	1.5
+167	3.0
+167	4.5
+168	1.0
+168	1.5
+169	4.0
+169	6.0
+17	1.0
+17	1.5
+170	1.0
+170	1.5
+172	2.0
+172	3.0
+174	2.0
+174	3.0
+175	2.0
+175	3.0
+176	2.0
+176	3.0
+177	1.0
+177	1.5
+178	1.0
+178	1.5
+179	2.0
+179	3.0
+18	2.0
+18	3.0
+180	1.0
+180	1.5
+181	1.0
+181	1.5
+183	1.0
+183	1.5
+186	1.0
+186	1.5
+187	3.0
+187	4.5
+189	1.0
+189	1.5
+19	1.0
+19	1.5
+190	1.0
+190	1.5
+191	2.0
+191	3.0
+192	1.0
+192	1.5
+193	3.0
+193	4.5
+194	1.0
+194	1.5
+195	2.0
+195	3.0
+196	1.0
+196	1.5
+197	2.0
+197	3.0
+199	3.0
+199	4.5
+2	1.0
+2	1.5
+20	1.0
+20	1.5
+200	2.0
+200	3.0
+201	1.0
+201	1.5
+202	1.0
+202	1.5
+203	2.0
+203	3.0
+205	2.0
+205	3.0
+207	2.0
+207	3.0
+208	3.0
+208	4.5
+209	2.0
+209	3.0
+213	2.0
+213	3.0
+214	1.0
+214	1.5
+216	2.0
+216	3.0
+217	2.0
+217	3.0
+218	1.0
+218	1.5
+219	2.0
+219	3.0
+221	2.0
+221	3.0
+222	1.0
+222	1.5
+223	2.0
+223	3.0
+224	2.0
+224	3.0
+226	1.0
+226	1.5
+228	1.0
+228	1.5
+229	2.0
+229	3.0
+230	5.0
+230	7.5
+233	2.0
+233	3.0
+235	1.0
+235	1.5
+237	2.0
+237	3.0
+238	2.0
+238	3.0
+239	2.0
+239	3.0
+24	2.0
+24	3.0
+241	1.0
+241	1.5
+242	2.0
+242	3.0
+244	1.0
+244	1.5
+247	1.0
+247	1.5
+248	1.0
+248	1.5
+249	1.0
+249	1.5
+252	1.0
+252	1.5
+255	2.0
+255	3.0
+256	2.0
+256	3.0
+257	1.0
+257	1.5
+258	1.0
+258	1.5
+26	2.0
+26	3.0
+260	1.0
+260	1.5
+262	1.0
+262	1.5
+263	1.0
+263	1.5
+265	2.0
+265	3.0
+266	1.0
+266	1.5
+27	1.0
+27	1.5
+272	2.0
+272	3.0
+273	3.0
+273	4.5
+274	1.0
+274	1.5
+275	1.0
+275	1.5
+277	4.0
+277	6.0
+278	2.0
+278	3.0
+28	1.0
+28	1.5
+280	2.0
+280	3.0
+281	2.0
+281	3.0
+282	2.0
+282	3.0
+283	1.0
+283	1.5
+284	1.0
+284	1.5
+285	1.0
+285	1.5
+286	1.0
+286	1.5
+287	1.0
+287	1.5
+288	2.0
+288	3.0
+289	1.0
+289	1.5
+291	1.0
+291	1.5
+292	1.0
+292	1.5
+296	1.0
+296	1.5
+298	3.0
+298	4.5
+30	1.0
+30	1.5
+302	1.0
+302	1.5
+305	1.0
+305	1.5
+306	1.0
+306	1.5
+307	2.0
+307	3.0
+308	1.0
+308	1.5
+309	2.0
+309	3.0
+310	1.0
+310	1.5
+311	3.0
+311	4.5
+315	1.0
+315	1.5
+316	3.0
+316	4.5
+317	2.0
+317	3.0
+318	3.0
+318	4.5
+321	2.0
+321	3.0
+322	2.0
+322	3.0
+323	1.0
+323	1.5
+325	2.0
+325	3.0
+327	3.0
+327	4.5
+33	1.0
+33	1.5
+331	2.0
+331	3.0
+332	1.0
+332	1.5
+333	2.0
+333	3.0
+335	1.0
+335	1.5
+336	1.0
+336	1.5
+338	1.0
+338	1.5
+339	1.0
+339	1.5
+34	1.0
+34	1.5
+341	1.0
+341	1.5
+342	2.0
+342	3.0
+344	2.0
+344	3.0
+345	1.0
+345	1.5
+348	5.0
+348	7.5
+35	3.0
+35	4.5
+351	1.0
+351	1.5
+353	2.0
+353	3.0
+356	1.0
+356	1.5
+360	1.0
+360	1.5
+362	1.0
+362	1.5
+364	1.0
+364	1.5
+365	1.0
+365	1.5
+366	1.0
+366	1.5
+367	2.0
+367	3.0
+368	1.0
+368	1.5
+369	3.0
+369	4.5
+37	2.0
+37	3.0
+373	1.0
+373	1.5
+374	1.0
+374	1.5
+375	1.0
+375	1.5
+377	1.0
+377	1.5
+378	1.0
+378	1.5
+379	1.0
+379	1.5
+382	2.0
+382	3.0
+384	3.0
+384	4.5
+386	1.0
+386	1.5
+389	1.0
+389	1.5
+392	1.0
+392	1.5
+393	1.0
+393	1.5
+394	1.0
+394	1.5
+395	2.0
+395	3.0
+396	3.0
+396	4.5
+397	2.0
+397	3.0
+399	2.0
+399	3.0
+4	1.0
+4	1.5
+400	1.0
+400	1.5
+401	5.0
+401	7.5
+402	1.0
+402	1.5
+403	3.0
+403	4.5
+404	2.0
+404	3.0
+406	4.0
+406	6.0
+407	1.0
+407	1.5
+409	3.0
+409	4.5
+41	1.0
+41	1.5
+411	1.0
+411	1.5
+413	2.0
+413	3.0
+414	2.0
+414	3.0
+417	3.0
+417	4.5
+418	1.0
+418	1.5
+419	1.0
+419	1.5
+42	2.0
+42	3.0
+421	1.0
+421	1.5
+424	2.0
+424	3.0
+427	1.0
+427	1.5
+429	2.0
+429	3.0
+43	1.0
+43	1.5
+430	3.0
+430	4.5
+431	3.0
+431	4.5
+432	1.0
+432	1.5
+435	1.0
+435	1.5
+436	1.0
+436	1.5
+437	1.0
+437	1.5
+438	3.0
+438	4.5
+439	2.0
+439	3.0
+44	1.0
+44	1.5
+443	1.0
+443	1.5
+444	1.0
+444	1.5
+446	1.0
+446	1.5
+448	1.0
+448	1.5
+449	1.0
+449	1.5
+452	1.0
+452	1.5
+453	1.0
+453	1.5
+454	3.0
+454	4.5
+455	1.0
+455	1.5
+457	1.0
+457	1.5
+458	2.0
+458	3.0
+459	2.0
+459	3.0
+460	1.0
+460	1.5
+462	2.0
+462	3.0
+463	2.0
+463	3.0
+466	3.0
+466	4.5
+467	1.0
+467	1.5
+468	4.0
+468	6.0
+469	5.0
+469	7.5
+47	1.0
+47	1.5
+470	1.0
+470	1.5
+472	1.0
+472	1.5
+475	1.0
+475	1.5
+477	1.0
+477	1.5
+478	2.0
+478	3.0
+479	1.0
+479	1.5
+480	3.0
+480	4.5
+481	1.0
+481	1.5
+482	1.0
+482	1.5
+483	1.0
+483	1.5
+484	1.0
+484	1.5
+485	1.0
+485	1.5
+487	1.0
+487	1.5
+489	4.0
+489	6.0
+490	1.0
+490	1.5
+491	1.0
+491	1.5
+492	2.0
+492	3.0
+493	1.0
+493	1.5
+494	1.0
+494	1.5
+495	1.0
+495	1.5
+496	1.0
+496	1.5
+497	1.0
+497	1.5
+498	3.0
+498	4.5
+5	3.0
+5	4.5
+51	2.0
+51	3.0
+53	1.0
+53	1.5
+54	1.0
+54	1.5
+57	1.0
+57	1.5
+58	2.0
+58	3.0
+64	1.0
+64	1.5
+65	1.0
+65	1.5
+66	1.0
+66	1.5
+67	2.0
+67	3.0
+69	1.0
+69	1.5
+70	3.0
+70	4.5
+72	2.0
+72	3.0
+74	1.0
+74	1.5
+76	2.0
+76	3.0
+77	1.0
+77	1.5
+78	1.0
+78	1.5
+8	1.0
+8	1.5
+80	1.0
+80	1.5
+82	1.0
+82	1.5
+83	2.0
+83	3.0
+84	2.0
+84	3.0
+85	1.0
+85	1.5
+86	1.0
+86	1.5
+87	1.0
+87	1.5
+9	1.0
+9	1.5
+90	3.0
+90	4.5
+92	1.0
+92	1.5
+95	2.0
+95	3.0
+96	1.0
+96	1.5
+97	2.0
+97	3.0
+98	2.0
+98	3.0