You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2016/10/12 07:27:37 UTC
[24/37] hive git commit: HIVE-14877: Move slow CliDriver tests to
MiniLlap (Prasanth Jayachandran reviewed by Siddharth Seth)
http://git-wip-us.apache.org/repos/asf/hive/blob/1f258e96/ql/src/test/results/clientpositive/llap/groupby_sort_skew_1_23.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/groupby_sort_skew_1_23.q.out b/ql/src/test/results/clientpositive/llap/groupby_sort_skew_1_23.q.out
new file mode 100644
index 0000000..c1b04b8
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/groupby_sort_skew_1_23.q.out
@@ -0,0 +1,4482 @@
+PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S)
+-- SORT_QUERY_RESULTS
+
+CREATE TABLE T1(key STRING, val STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T1
+POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S)
+-- SORT_QUERY_RESULTS
+
+CREATE TABLE T1(key STRING, val STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@t1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@t1
+PREHOOK: query: -- perform an insert to make sure there are 2 files
+INSERT OVERWRITE TABLE T1 select key, val from T1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@t1
+POSTHOOK: query: -- perform an insert to make sure there are 2 files
+INSERT OVERWRITE TABLE T1 select key, val from T1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@t1
+POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+PREHOOK: query: CREATE TABLE outputTbl1(key int, cnt int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@outputTbl1
+POSTHOOK: query: CREATE TABLE outputTbl1(key int, cnt int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@outputTbl1
+PREHOOK: query: -- The plan should be converted to a map-side group by if the group by key
+-- matches the sorted key
+EXPLAIN EXTENDED
+INSERT OVERWRITE TABLE outputTbl1
+SELECT key, count(1) FROM T1 GROUP BY key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The plan should be converted to a map-side group by if the group by key
+-- matches the sorted key
+EXPLAIN EXTENDED
+INSERT OVERWRITE TABLE outputTbl1
+SELECT key, count(1) FROM T1 GROUP BY key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: string)
+ mode: final
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+ columns key,cnt
+ columns.comments
+ columns.types int:int
+#### A masked pattern was here ####
+ name default.outputtbl1
+ numFiles 0
+ numRows 0
+ rawDataSize 0
+ serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 0
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
+ Execution mode: llap
+ LLAP IO: no inputs
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: t1
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 2
+ bucket_field_name key
+ columns key,val
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.t1
+ numFiles 1
+ numRows 6
+ rawDataSize 24
+ serialization.ddl struct t1 { string key, string val}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 30
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 2
+ bucket_field_name key
+ columns key,val
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.t1
+ numFiles 1
+ numRows 6
+ rawDataSize 24
+ serialization.ddl struct t1 { string key, string val}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 30
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.t1
+ name: default.t1
+ Truncated Path -> Alias:
+ /t1 [t1]
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+ columns key,cnt
+ columns.comments
+ columns.types int:int
+#### A masked pattern was here ####
+ name default.outputtbl1
+ numFiles 0
+ numRows 0
+ rawDataSize 0
+ serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 0
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+#### A masked pattern was here ####
+
+PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1
+SELECT key, count(1) FROM T1 GROUP BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1
+SELECT key, count(1) FROM T1 GROUP BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ]
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+PREHOOK: query: SELECT * FROM outputTbl1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM outputTbl1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+1 1
+2 1
+3 1
+7 1
+8 2
+PREHOOK: query: CREATE TABLE outputTbl2(key1 int, key2 string, cnt int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@outputTbl2
+POSTHOOK: query: CREATE TABLE outputTbl2(key1 int, key2 string, cnt int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@outputTbl2
+PREHOOK: query: -- no map-side group by even if the group by key is a superset of sorted key
+EXPLAIN EXTENDED
+INSERT OVERWRITE TABLE outputTbl2
+SELECT key, val, count(1) FROM T1 GROUP BY key, val
+PREHOOK: type: QUERY
+POSTHOOK: query: -- no map-side group by even if the group by key is a superset of sorted key
+EXPLAIN EXTENDED
+INSERT OVERWRITE TABLE outputTbl2
+SELECT key, val, count(1) FROM T1 GROUP BY key, val
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Select Operator
+ expressions: key (type: string), val (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: string), _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ null sort order: aa
+ sort order: ++
+ Map-reduce partition columns: rand() (type: double)
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col2 (type: bigint)
+ auto parallelism: true
+ Execution mode: llap
+ LLAP IO: no inputs
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: t1
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 2
+ bucket_field_name key
+ columns key,val
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.t1
+ numFiles 1
+ numRows 6
+ rawDataSize 24
+ serialization.ddl struct t1 { string key, string val}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 30
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 2
+ bucket_field_name key
+ columns key,val
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.t1
+ numFiles 1
+ numRows 6
+ rawDataSize 24
+ serialization.ddl struct t1 { string key, string val}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 30
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.t1
+ name: default.t1
+ Truncated Path -> Alias:
+ /t1 [t1]
+ Reducer 2
+ Execution mode: llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: partials
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ null sort order: aa
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col2 (type: bigint)
+ auto parallelism: true
+ Reducer 3
+ Execution mode: llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+ columns key1,key2,cnt
+ columns.comments
+ columns.types int:string:int
+#### A masked pattern was here ####
+ name default.outputtbl2
+ numFiles 0
+ numRows 0
+ rawDataSize 0
+ serialization.ddl struct outputtbl2 { i32 key1, string key2, i32 cnt}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 0
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl2
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+ columns key1,key2,cnt
+ columns.comments
+ columns.types int:string:int
+#### A masked pattern was here ####
+ name default.outputtbl2
+ numFiles 0
+ numRows 0
+ rawDataSize 0
+ serialization.ddl struct outputtbl2 { i32 key1, string key2, i32 cnt}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 0
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl2
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+#### A masked pattern was here ####
+
+PREHOOK: query: INSERT OVERWRITE TABLE outputTbl2
+SELECT key, val, count(1) FROM T1 GROUP BY key, val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@outputtbl2
+POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl2
+SELECT key, val, count(1) FROM T1 GROUP BY key, val
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@outputtbl2
+POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ]
+POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+PREHOOK: query: SELECT * FROM outputTbl2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM outputTbl2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl2
+#### A masked pattern was here ####
+1 11 1
+2 12 1
+3 13 1
+7 17 1
+8 18 1
+8 28 1
+PREHOOK: query: -- It should work for sub-queries
+EXPLAIN EXTENDED
+INSERT OVERWRITE TABLE outputTbl1
+SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- It should work for sub-queries
+EXPLAIN EXTENDED
+INSERT OVERWRITE TABLE outputTbl1
+SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: string)
+ mode: final
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+ columns key,cnt
+ columns.comments
+ columns.types int:int
+#### A masked pattern was here ####
+ name default.outputtbl1
+ numFiles 1
+ numRows 5
+ rawDataSize 15
+ serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 20
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
+ Execution mode: llap
+ LLAP IO: no inputs
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: t1
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 2
+ bucket_field_name key
+ columns key,val
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.t1
+ numFiles 1
+ numRows 6
+ rawDataSize 24
+ serialization.ddl struct t1 { string key, string val}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 30
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 2
+ bucket_field_name key
+ columns key,val
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.t1
+ numFiles 1
+ numRows 6
+ rawDataSize 24
+ serialization.ddl struct t1 { string key, string val}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 30
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.t1
+ name: default.t1
+ Truncated Path -> Alias:
+ /t1 [t1]
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+ columns key,cnt
+ columns.comments
+ columns.types int:int
+#### A masked pattern was here ####
+ name default.outputtbl1
+ numFiles 1
+ numRows 5
+ rawDataSize 15
+ serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 20
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+#### A masked pattern was here ####
+
+PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1
+SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1
+SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ]
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+PREHOOK: query: SELECT * FROM outputTbl1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM outputTbl1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+1 1
+2 1
+3 1
+7 1
+8 2
+PREHOOK: query: -- It should work for sub-queries with column aliases
+EXPLAIN EXTENDED
+INSERT OVERWRITE TABLE outputTbl1
+SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k
+PREHOOK: type: QUERY
+POSTHOOK: query: -- It should work for sub-queries with column aliases
+EXPLAIN EXTENDED
+INSERT OVERWRITE TABLE outputTbl1
+SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: string)
+ mode: final
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+ columns key,cnt
+ columns.comments
+ columns.types int:int
+#### A masked pattern was here ####
+ name default.outputtbl1
+ numFiles 1
+ numRows 5
+ rawDataSize 15
+ serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 20
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
+ Execution mode: llap
+ LLAP IO: no inputs
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: t1
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 2
+ bucket_field_name key
+ columns key,val
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.t1
+ numFiles 1
+ numRows 6
+ rawDataSize 24
+ serialization.ddl struct t1 { string key, string val}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 30
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 2
+ bucket_field_name key
+ columns key,val
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.t1
+ numFiles 1
+ numRows 6
+ rawDataSize 24
+ serialization.ddl struct t1 { string key, string val}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 30
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.t1
+ name: default.t1
+ Truncated Path -> Alias:
+ /t1 [t1]
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+ columns key,cnt
+ columns.comments
+ columns.types int:int
+#### A masked pattern was here ####
+ name default.outputtbl1
+ numFiles 1
+ numRows 5
+ rawDataSize 15
+ serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 20
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+#### A masked pattern was here ####
+
+PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1
+SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1
+SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ]
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+PREHOOK: query: SELECT * FROM outputTbl1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM outputTbl1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+1 1
+2 1
+3 1
+7 1
+8 2
+PREHOOK: query: CREATE TABLE outputTbl3(key1 int, key2 int, cnt int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@outputTbl3
+POSTHOOK: query: CREATE TABLE outputTbl3(key1 int, key2 int, cnt int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@outputTbl3
+PREHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant followed
+-- by a match to the sorted key
+EXPLAIN EXTENDED
+INSERT OVERWRITE TABLE outputTbl3
+SELECT 1, key, count(1) FROM T1 GROUP BY 1, key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant followed
+-- by a match to the sorted key
+EXPLAIN EXTENDED
+INSERT OVERWRITE TABLE outputTbl3
+SELECT 1, key, count(1) FROM T1 GROUP BY 1, key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(key)
+ keys: key (type: string)
+ mode: final
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 1 (type: int), UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+ columns key1,key2,cnt
+ columns.comments
+ columns.types int:int:int
+#### A masked pattern was here ####
+ name default.outputtbl3
+ numFiles 0
+ numRows 0
+ rawDataSize 0
+ serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 0
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl3
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
+ Execution mode: llap
+ LLAP IO: no inputs
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: t1
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 2
+ bucket_field_name key
+ columns key,val
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.t1
+ numFiles 1
+ numRows 6
+ rawDataSize 24
+ serialization.ddl struct t1 { string key, string val}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 30
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 2
+ bucket_field_name key
+ columns key,val
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.t1
+ numFiles 1
+ numRows 6
+ rawDataSize 24
+ serialization.ddl struct t1 { string key, string val}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 30
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.t1
+ name: default.t1
+ Truncated Path -> Alias:
+ /t1 [t1]
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+ columns key1,key2,cnt
+ columns.comments
+ columns.types int:int:int
+#### A masked pattern was here ####
+ name default.outputtbl3
+ numFiles 0
+ numRows 0
+ rawDataSize 0
+ serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 0
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl3
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+#### A masked pattern was here ####
+
+PREHOOK: query: INSERT OVERWRITE TABLE outputTbl3
+SELECT 1, key, count(1) FROM T1 GROUP BY 1, key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@outputtbl3
+POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl3
+SELECT 1, key, count(1) FROM T1 GROUP BY 1, key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@outputtbl3
+POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl3.key1 SIMPLE []
+POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+PREHOOK: query: SELECT * FROM outputTbl3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl3
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM outputTbl3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl3
+#### A masked pattern was here ####
+1 1 1
+1 2 1
+1 3 1
+1 7 1
+1 8 2
+PREHOOK: query: CREATE TABLE outputTbl4(key1 int, key2 int, key3 string, cnt int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@outputTbl4
+POSTHOOK: query: CREATE TABLE outputTbl4(key1 int, key2 int, key3 string, cnt int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@outputTbl4
+PREHOOK: query: -- no map-side group by if the group by key contains a constant followed by another column
+EXPLAIN EXTENDED
+INSERT OVERWRITE TABLE outputTbl4
+SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val
+PREHOOK: type: QUERY
+POSTHOOK: query: -- no map-side group by if the group by key contains a constant followed by another column
+EXPLAIN EXTENDED
+INSERT OVERWRITE TABLE outputTbl4
+SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Select Operator
+ expressions: key (type: string), val (type: string)
+ outputColumnNames: key, val
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(val)
+ keys: key (type: string), val (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ null sort order: aa
+ sort order: ++
+ Map-reduce partition columns: rand() (type: double)
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col2 (type: bigint)
+ auto parallelism: true
+ Execution mode: llap
+ LLAP IO: no inputs
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: t1
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 2
+ bucket_field_name key
+ columns key,val
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.t1
+ numFiles 1
+ numRows 6
+ rawDataSize 24
+ serialization.ddl struct t1 { string key, string val}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 30
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 2
+ bucket_field_name key
+ columns key,val
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.t1
+ numFiles 1
+ numRows 6
+ rawDataSize 24
+ serialization.ddl struct t1 { string key, string val}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 30
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.t1
+ name: default.t1
+ Truncated Path -> Alias:
+ /t1 [t1]
+ Reducer 2
+ Execution mode: llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: partials
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ null sort order: aa
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col2 (type: bigint)
+ auto parallelism: true
+ Reducer 3
+ Execution mode: llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger(_col0) (type: int), 1 (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+ columns key1,key2,key3,cnt
+ columns.comments
+ columns.types int:int:string:int
+#### A masked pattern was here ####
+ name default.outputtbl4
+ numFiles 0
+ numRows 0
+ rawDataSize 0
+ serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 0
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl4
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+ columns key1,key2,key3,cnt
+ columns.comments
+ columns.types int:int:string:int
+#### A masked pattern was here ####
+ name default.outputtbl4
+ numFiles 0
+ numRows 0
+ rawDataSize 0
+ serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 0
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl4
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+#### A masked pattern was here ####
+
+PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4
+SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@outputtbl4
+POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4
+SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@outputtbl4
+POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl4.key2 SIMPLE []
+POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+PREHOOK: query: SELECT * FROM outputTbl4
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl4
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM outputTbl4
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl4
+#### A masked pattern was here ####
+1 1 11 1
+2 1 12 1
+3 1 13 1
+7 1 17 1
+8 1 18 1
+8 1 28 1
+PREHOOK: query: -- no map-side group by if the group by key contains a function
+EXPLAIN EXTENDED
+INSERT OVERWRITE TABLE outputTbl3
+SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1
+PREHOOK: type: QUERY
+POSTHOOK: query: -- no map-side group by if the group by key contains a function
+EXPLAIN EXTENDED
+INSERT OVERWRITE TABLE outputTbl3
+SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Select Operator
+ expressions: key (type: string), (UDFToDouble(key) + 1.0) (type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: string), _col1 (type: double)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: double)
+ null sort order: aa
+ sort order: ++
+ Map-reduce partition columns: rand() (type: double)
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col2 (type: bigint)
+ auto parallelism: true
+ Execution mode: llap
+ LLAP IO: no inputs
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: t1
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 2
+ bucket_field_name key
+ columns key,val
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.t1
+ numFiles 1
+ numRows 6
+ rawDataSize 24
+ serialization.ddl struct t1 { string key, string val}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 30
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 2
+ bucket_field_name key
+ columns key,val
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.t1
+ numFiles 1
+ numRows 6
+ rawDataSize 24
+ serialization.ddl struct t1 { string key, string val}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 30
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.t1
+ name: default.t1
+ Truncated Path -> Alias:
+ /t1 [t1]
+ Reducer 2
+ Execution mode: llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: double)
+ mode: partials
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: double)
+ null sort order: aa
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: double)
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col2 (type: bigint)
+ auto parallelism: true
+ Reducer 3
+ Execution mode: llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: double)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+ columns key1,key2,cnt
+ columns.comments
+ columns.types int:int:int
+#### A masked pattern was here ####
+ name default.outputtbl3
+ numFiles 1
+ numRows 5
+ rawDataSize 25
+ serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 30
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl3
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+ columns key1,key2,cnt
+ columns.comments
+ columns.types int:int:int
+#### A masked pattern was here ####
+ name default.outputtbl3
+ numFiles 1
+ numRows 5
+ rawDataSize 25
+ serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 30
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl3
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+#### A masked pattern was here ####
+
+PREHOOK: query: INSERT OVERWRITE TABLE outputTbl3
+SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@outputtbl3
+POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl3
+SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@outputtbl3
+POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ]
+POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+PREHOOK: query: SELECT * FROM outputTbl3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl3
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM outputTbl3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl3
+#### A masked pattern was here ####
+1 2 1
+2 3 1
+3 4 1
+7 8 1
+8 9 2
+PREHOOK: query: -- it should not matter what follows the group by
+-- test various cases
+
+-- group by followed by another group by
+EXPLAIN EXTENDED
+INSERT OVERWRITE TABLE outputTbl1
+SELECT key + key, sum(cnt) from
+(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1
+group by key + key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- it should not matter what follows the group by
+-- test various cases
+
+-- group by followed by another group by
+EXPLAIN EXTENDED
+INSERT OVERWRITE TABLE outputTbl1
+SELECT key + key, sum(cnt) from
+(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1
+group by key + key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: string)
+ mode: final
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (UDFToDouble(_col0) + UDFToDouble(_col0)) (type: double), _col1 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col1)
+ keys: _col0 (type: double)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: double)
+ null sort order: a
+ sort order: +
+ Map-reduce partition columns: rand() (type: double)
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col1 (type: bigint)
+ auto parallelism: true
+ Execution mode: llap
+ LLAP IO: no inputs
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: t1
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 2
+ bucket_field_name key
+ columns key,val
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.t1
+ numFiles 1
+ numRows 6
+ rawDataSize 24
+ serialization.ddl struct t1 { string key, string val}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 30
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 2
+ bucket_field_name key
+ columns key,val
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.t1
+ numFiles 1
+ numRows 6
+ rawDataSize 24
+ serialization.ddl struct t1 { string key, string val}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 30
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.t1
+ name: default.t1
+ Truncated Path -> Alias:
+ /t1 [t1]
+ Reducer 2
+ Execution mode: llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: double)
+ mode: partials
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: double)
+ null sort order: a
+ sort order: +
+ Map-reduce partition columns: _col0 (type: double)
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col1 (type: bigint)
+ auto parallelism: true
+ Reducer 3
+ Execution mode: llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: double)
+ mode: final
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+ columns key,cnt
+ columns.comments
+ columns.types int:int
+#### A masked pattern was here ####
+ name default.outputtbl1
+ numFiles 1
+ numRows 5
+ rawDataSize 15
+ serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 20
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+ columns key,cnt
+ columns.comments
+ columns.types int:int
+#### A masked pattern was here ####
+ name default.outputtbl1
+ numFiles 1
+ numRows 5
+ rawDataSize 15
+ serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 20
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+#### A masked pattern was here ####
+
+PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1
+SELECT key + key, sum(cnt) from
+(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1
+group by key + key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1
+SELECT key + key, sum(cnt) from
+(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1
+group by key + key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ]
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+PREHOOK: query: SELECT * FROM outputTbl1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM outputTbl1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+14 1
+16 2
+2 1
+4 1
+6 1
+PREHOOK: query: -- group by followed by a union
+EXPLAIN EXTENDED
+INSERT OVERWRITE TABLE outputTbl1
+SELECT * FROM (
+SELECT key, count(1) FROM T1 GROUP BY key
+ UNION ALL
+SELECT key, count(1) FROM T1 GROUP BY key
+) subq1
+PREHOOK: type: QUERY
+POSTHOOK: query: -- group by followed by a union
+EXPLAIN EXTENDED
+INSERT OVERWRITE TABLE outputTbl1
+SELECT * FROM (
+SELECT key, count(1) FROM T1 GROUP BY key
+ UNION ALL
+SELECT key, count(1) FROM T1 GROUP BY key
+) subq1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Union 2 (CONTAINS)
+ Map 3 <- Union 2 (CONTAINS)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: string)
+ mode: final
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+ columns key,cnt
+ columns.comments
+ columns.types int:int
+#### A masked pattern was here ####
+ name default.outputtbl1
+ numFiles 1
+ numRows 5
+ rawDataSize 17
+ serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 22
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
+ Execution mode: llap
+ LLAP IO: no inputs
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: t1
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 2
+ bucket_field_name key
+ columns key,val
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.t1
+ numFiles 1
+ numRows 6
+ rawDataSize 24
+ serialization.ddl struct t1 { string key, string val}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 30
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 2
+ bucket_field_name key
+ columns key,val
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.t1
+ numFiles 1
+ numRows 6
+ rawDataSize 24
+ serialization.ddl struct t1 { string key, string val}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 30
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.t1
+ name: default.t1
+ Truncated Path -> Alias:
+ /t1 [t1]
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: string)
+ mode: final
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+ columns key,cnt
+ columns.comments
+ columns.types int:int
+#### A masked pattern was here ####
+ name default.outputtbl1
+ numFiles 1
+ numRows 5
+ rawDataSize 17
+ serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 22
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
+ Execution mode: llap
+ LLAP IO: no inputs
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: t1
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 2
+ bucket_field_name key
+ columns key,val
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.t1
+ numFiles 1
+ numRows 6
+ rawDataSize 24
+ serialization.ddl struct t1 { string key, string val}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 30
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 2
+
<TRUNCATED>