You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2017/02/03 21:50:25 UTC
[12/51] [partial] hive git commit: HIVE-15790: Remove unused beeline
golden files (Gunther Hagleitner, reviewed by Sergey Shelukhin)
http://git-wip-us.apache.org/repos/asf/hive/blob/3890ed65/ql/src/test/results/beelinepositive/groupby_sort_skew_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/beelinepositive/groupby_sort_skew_1.q.out b/ql/src/test/results/beelinepositive/groupby_sort_skew_1.q.out
deleted file mode 100644
index 766f127..0000000
--- a/ql/src/test/results/beelinepositive/groupby_sort_skew_1.q.out
+++ /dev/null
@@ -1,4891 +0,0 @@
-Saving all output to "!!{outputDirectory}!!/groupby_sort_skew_1.q.raw". Enter "record" with no arguments to stop it.
->>> !run !!{qFileDirectory}!!/groupby_sort_skew_1.q
->>> set hive.enforce.bucketing = true;
-No rows affected
->>> set hive.enforce.sorting = true;
-No rows affected
->>> set hive.exec.reducers.max = 10;
-No rows affected
->>> set hive.map.groupby.sorted=true;
-No rows affected
->>> set hive.groupby.skewindata=true;
-No rows affected
->>>
->>> CREATE TABLE T1(key STRING, val STRING)
-CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
-No rows affected
->>>
->>> LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
-No rows affected
->>>
->>> -- perform an insert to make sure there are 2 files
->>> INSERT OVERWRITE TABLE T1 select key, val from T1;
-'key','val'
-No rows selected
->>>
->>> CREATE TABLE outputTbl1(key int, cnt int);
-No rows affected
->>>
->>> -- The plan should be converted to a map-side group by if the group by key
->>> -- matches the skewed key
->>> -- addind a order by at the end to make the test results deterministic
->>> EXPLAIN EXTENDED
-INSERT OVERWRITE TABLE outputTbl1
-SELECT key, count(1) FROM T1 GROUP BY key;
-'Explain'
-'ABSTRACT SYNTAX TREE:'
-' (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))))'
-''
-'STAGE DEPENDENCIES:'
-' Stage-1 is a root stage'
-' Stage-0 depends on stages: Stage-1'
-' Stage-2 depends on stages: Stage-0'
-''
-'STAGE PLANS:'
-' Stage: Stage-1'
-' Map Reduce'
-' Alias -> Map Operator Tree:'
-' t1 '
-' TableScan'
-' alias: t1'
-' GatherStats: false'
-' Select Operator'
-' expressions:'
-' expr: key'
-' type: string'
-' outputColumnNames: key'
-' Group By Operator'
-' aggregations:'
-' expr: count(1)'
-' bucketGroup: false'
-' keys:'
-' expr: key'
-' type: string'
-' mode: final'
-' outputColumnNames: _col0, _col1'
-' Select Operator'
-' expressions:'
-' expr: _col0'
-' type: string'
-' expr: _col1'
-' type: bigint'
-' outputColumnNames: _col0, _col1'
-' Select Operator'
-' expressions:'
-' expr: UDFToInteger(_col0)'
-' type: int'
-' expr: UDFToInteger(_col1)'
-' type: int'
-' outputColumnNames: _col0, _col1'
-' File Output Operator'
-' compressed: false'
-' GlobalTableId: 1'
-' directory: pfile:!!{hive.exec.scratchdir}!!'
-' NumFilesPerFileSink: 1'
-' Stats Publishing Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
-' table:'
-' input format: org.apache.hadoop.mapred.TextInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' properties:'
-' bucket_count -1'
-' columns key,cnt'
-' columns.types int:int'
-' file.inputformat org.apache.hadoop.mapred.TextInputFormat'
-' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' location !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/outputtbl1'
-' name groupby_sort_skew_1.outputtbl1'
-' serialization.ddl struct outputtbl1 { i32 key, i32 cnt}'
-' serialization.format 1'
-' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' transient_lastDdlTime !!UNIXTIME!!'
-' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' name: groupby_sort_skew_1.outputtbl1'
-' TotalFiles: 1'
-' GatherStats: true'
-' MultiFileSpray: false'
-' Needs Tagging: false'
-' Path -> Alias:'
-' !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/t1 [t1]'
-' Path -> Partition:'
-' !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/t1 '
-' Partition'
-' base file name: t1'
-' input format: org.apache.hadoop.mapred.TextInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' properties:'
-' SORTBUCKETCOLSPREFIX TRUE'
-' bucket_count 2'
-' bucket_field_name key'
-' columns key,val'
-' columns.types string:string'
-' file.inputformat org.apache.hadoop.mapred.TextInputFormat'
-' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' location !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/t1'
-' name groupby_sort_skew_1.t1'
-' numFiles 1'
-' numPartitions 0'
-' numRows 6'
-' rawDataSize 24'
-' serialization.ddl struct t1 { string key, string val}'
-' serialization.format 1'
-' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' totalSize 30'
-' transient_lastDdlTime !!UNIXTIME!!'
-' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' '
-' input format: org.apache.hadoop.mapred.TextInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' properties:'
-' SORTBUCKETCOLSPREFIX TRUE'
-' bucket_count 2'
-' bucket_field_name key'
-' columns key,val'
-' columns.types string:string'
-' file.inputformat org.apache.hadoop.mapred.TextInputFormat'
-' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' location !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/t1'
-' name groupby_sort_skew_1.t1'
-' numFiles 1'
-' numPartitions 0'
-' numRows 6'
-' rawDataSize 24'
-' serialization.ddl struct t1 { string key, string val}'
-' serialization.format 1'
-' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' totalSize 30'
-' transient_lastDdlTime !!UNIXTIME!!'
-' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' name: groupby_sort_skew_1.t1'
-' name: groupby_sort_skew_1.t1'
-''
-' Stage: Stage-0'
-' Move Operator'
-' tables:'
-' replace: true'
-' source: pfile:!!{hive.exec.scratchdir}!!'
-' table:'
-' input format: org.apache.hadoop.mapred.TextInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' properties:'
-' bucket_count -1'
-' columns key,cnt'
-' columns.types int:int'
-' file.inputformat org.apache.hadoop.mapred.TextInputFormat'
-' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' location !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/outputtbl1'
-' name groupby_sort_skew_1.outputtbl1'
-' serialization.ddl struct outputtbl1 { i32 key, i32 cnt}'
-' serialization.format 1'
-' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' transient_lastDdlTime !!UNIXTIME!!'
-' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' name: groupby_sort_skew_1.outputtbl1'
-' tmp directory: pfile:!!{hive.exec.scratchdir}!!'
-''
-' Stage: Stage-2'
-' Stats-Aggr Operator'
-' Stats Aggregation Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
-''
-''
-154 rows selected
->>>
->>> INSERT OVERWRITE TABLE outputTbl1
-SELECT key, count(1) FROM T1 GROUP BY key;
-'_col0','_col1'
-No rows selected
->>>
->>> SELECT * FROM outputTbl1 ORDER BY key;
-'key','cnt'
-'1','1'
-'2','1'
-'3','1'
-'7','1'
-'8','2'
-5 rows selected
->>>
->>> CREATE TABLE outputTbl2(key1 int, key2 string, cnt int);
-No rows affected
->>>
->>> -- no map-side group by even if the group by key is a superset of skewed key
->>> EXPLAIN EXTENDED
-INSERT OVERWRITE TABLE outputTbl2
-SELECT key, val, count(1) FROM T1 GROUP BY key, val;
-'Explain'
-'ABSTRACT SYNTAX TREE:'
-' (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL val))))'
-''
-'STAGE DEPENDENCIES:'
-' Stage-1 is a root stage'
-' Stage-2 depends on stages: Stage-1'
-' Stage-0 depends on stages: Stage-2'
-' Stage-3 depends on stages: Stage-0'
-''
-'STAGE PLANS:'
-' Stage: Stage-1'
-' Map Reduce'
-' Alias -> Map Operator Tree:'
-' t1 '
-' TableScan'
-' alias: t1'
-' GatherStats: false'
-' Select Operator'
-' expressions:'
-' expr: key'
-' type: string'
-' expr: val'
-' type: string'
-' outputColumnNames: key, val'
-' Group By Operator'
-' aggregations:'
-' expr: count(1)'
-' bucketGroup: false'
-' keys:'
-' expr: key'
-' type: string'
-' expr: val'
-' type: string'
-' mode: hash'
-' outputColumnNames: _col0, _col1, _col2'
-' Reduce Output Operator'
-' key expressions:'
-' expr: _col0'
-' type: string'
-' expr: _col1'
-' type: string'
-' sort order: ++'
-' Map-reduce partition columns:'
-' expr: rand()'
-' type: double'
-' tag: -1'
-' value expressions:'
-' expr: _col2'
-' type: bigint'
-' Needs Tagging: false'
-' Path -> Alias:'
-' !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/t1 [t1]'
-' Path -> Partition:'
-' !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/t1 '
-' Partition'
-' base file name: t1'
-' input format: org.apache.hadoop.mapred.TextInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' properties:'
-' SORTBUCKETCOLSPREFIX TRUE'
-' bucket_count 2'
-' bucket_field_name key'
-' columns key,val'
-' columns.types string:string'
-' file.inputformat org.apache.hadoop.mapred.TextInputFormat'
-' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' location !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/t1'
-' name groupby_sort_skew_1.t1'
-' numFiles 1'
-' numPartitions 0'
-' numRows 6'
-' rawDataSize 24'
-' serialization.ddl struct t1 { string key, string val}'
-' serialization.format 1'
-' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' totalSize 30'
-' transient_lastDdlTime !!UNIXTIME!!'
-' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' '
-' input format: org.apache.hadoop.mapred.TextInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' properties:'
-' SORTBUCKETCOLSPREFIX TRUE'
-' bucket_count 2'
-' bucket_field_name key'
-' columns key,val'
-' columns.types string:string'
-' file.inputformat org.apache.hadoop.mapred.TextInputFormat'
-' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' location !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/t1'
-' name groupby_sort_skew_1.t1'
-' numFiles 1'
-' numPartitions 0'
-' numRows 6'
-' rawDataSize 24'
-' serialization.ddl struct t1 { string key, string val}'
-' serialization.format 1'
-' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' totalSize 30'
-' transient_lastDdlTime !!UNIXTIME!!'
-' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' name: groupby_sort_skew_1.t1'
-' name: groupby_sort_skew_1.t1'
-' Reduce Operator Tree:'
-' Group By Operator'
-' aggregations:'
-' expr: count(VALUE._col0)'
-' bucketGroup: false'
-' keys:'
-' expr: KEY._col0'
-' type: string'
-' expr: KEY._col1'
-' type: string'
-' mode: partials'
-' outputColumnNames: _col0, _col1, _col2'
-' File Output Operator'
-' compressed: false'
-' GlobalTableId: 0'
-' directory: file:!!{hive.exec.scratchdir}!!'
-' NumFilesPerFileSink: 1'
-' table:'
-' input format: org.apache.hadoop.mapred.SequenceFileInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat'
-' properties:'
-' columns _col0,_col1,_col2'
-' columns.types string,string,bigint'
-' escape.delim \'
-' TotalFiles: 1'
-' GatherStats: false'
-' MultiFileSpray: false'
-''
-' Stage: Stage-2'
-' Map Reduce'
-' Alias -> Map Operator Tree:'
-' file:!!{hive.exec.scratchdir}!! '
-' Reduce Output Operator'
-' key expressions:'
-' expr: _col0'
-' type: string'
-' expr: _col1'
-' type: string'
-' sort order: ++'
-' Map-reduce partition columns:'
-' expr: _col0'
-' type: string'
-' expr: _col1'
-' type: string'
-' tag: -1'
-' value expressions:'
-' expr: _col2'
-' type: bigint'
-' Needs Tagging: false'
-' Path -> Alias:'
-' file:!!{hive.exec.scratchdir}!! [file:!!{hive.exec.scratchdir}!!]'
-' Path -> Partition:'
-' file:!!{hive.exec.scratchdir}!! '
-' Partition'
-' base file name: -mr-10002'
-' input format: org.apache.hadoop.mapred.SequenceFileInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat'
-' properties:'
-' columns _col0,_col1,_col2'
-' columns.types string,string,bigint'
-' escape.delim \'
-' '
-' input format: org.apache.hadoop.mapred.SequenceFileInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat'
-' properties:'
-' columns _col0,_col1,_col2'
-' columns.types string,string,bigint'
-' escape.delim \'
-' Reduce Operator Tree:'
-' Group By Operator'
-' aggregations:'
-' expr: count(VALUE._col0)'
-' bucketGroup: false'
-' keys:'
-' expr: KEY._col0'
-' type: string'
-' expr: KEY._col1'
-' type: string'
-' mode: final'
-' outputColumnNames: _col0, _col1, _col2'
-' Select Operator'
-' expressions:'
-' expr: _col0'
-' type: string'
-' expr: _col1'
-' type: string'
-' expr: _col2'
-' type: bigint'
-' outputColumnNames: _col0, _col1, _col2'
-' Select Operator'
-' expressions:'
-' expr: UDFToInteger(_col0)'
-' type: int'
-' expr: _col1'
-' type: string'
-' expr: UDFToInteger(_col2)'
-' type: int'
-' outputColumnNames: _col0, _col1, _col2'
-' File Output Operator'
-' compressed: false'
-' GlobalTableId: 1'
-' directory: pfile:!!{hive.exec.scratchdir}!!'
-' NumFilesPerFileSink: 1'
-' Stats Publishing Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
-' table:'
-' input format: org.apache.hadoop.mapred.TextInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' properties:'
-' bucket_count -1'
-' columns key1,key2,cnt'
-' columns.types int:string:int'
-' file.inputformat org.apache.hadoop.mapred.TextInputFormat'
-' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' location !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/outputtbl2'
-' name groupby_sort_skew_1.outputtbl2'
-' serialization.ddl struct outputtbl2 { i32 key1, string key2, i32 cnt}'
-' serialization.format 1'
-' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' transient_lastDdlTime !!UNIXTIME!!'
-' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' name: groupby_sort_skew_1.outputtbl2'
-' TotalFiles: 1'
-' GatherStats: true'
-' MultiFileSpray: false'
-''
-' Stage: Stage-0'
-' Move Operator'
-' tables:'
-' replace: true'
-' source: pfile:!!{hive.exec.scratchdir}!!'
-' table:'
-' input format: org.apache.hadoop.mapred.TextInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' properties:'
-' bucket_count -1'
-' columns key1,key2,cnt'
-' columns.types int:string:int'
-' file.inputformat org.apache.hadoop.mapred.TextInputFormat'
-' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' location !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/outputtbl2'
-' name groupby_sort_skew_1.outputtbl2'
-' serialization.ddl struct outputtbl2 { i32 key1, string key2, i32 cnt}'
-' serialization.format 1'
-' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' transient_lastDdlTime !!UNIXTIME!!'
-' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' name: groupby_sort_skew_1.outputtbl2'
-' tmp directory: pfile:!!{hive.exec.scratchdir}!!'
-''
-' Stage: Stage-3'
-' Stats-Aggr Operator'
-' Stats Aggregation Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
-''
-''
-257 rows selected
->>>
->>> INSERT OVERWRITE TABLE outputTbl2
-SELECT key, val, count(1) FROM T1 GROUP BY key, val;
-'_col0','_col1','_col2'
-No rows selected
->>>
->>> SELECT * FROM outputTbl2 ORDER BY key1, key2;
-'key1','key2','cnt'
-'1','11','1'
-'2','12','1'
-'3','13','1'
-'7','17','1'
-'8','18','1'
-'8','28','1'
-6 rows selected
->>>
->>> -- It should work for sub-queries
->>> EXPLAIN EXTENDED
-INSERT OVERWRITE TABLE outputTbl1
-SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key;
-'Explain'
-'ABSTRACT SYNTAX TREE:'
-' (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val))))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))))'
-''
-'STAGE DEPENDENCIES:'
-' Stage-1 is a root stage'
-' Stage-0 depends on stages: Stage-1'
-' Stage-2 depends on stages: Stage-0'
-''
-'STAGE PLANS:'
-' Stage: Stage-1'
-' Map Reduce'
-' Alias -> Map Operator Tree:'
-' subq1:t1 '
-' TableScan'
-' alias: t1'
-' GatherStats: false'
-' Select Operator'
-' expressions:'
-' expr: key'
-' type: string'
-' outputColumnNames: _col0'
-' Select Operator'
-' expressions:'
-' expr: _col0'
-' type: string'
-' outputColumnNames: _col0'
-' Group By Operator'
-' aggregations:'
-' expr: count(1)'
-' bucketGroup: false'
-' keys:'
-' expr: _col0'
-' type: string'
-' mode: final'
-' outputColumnNames: _col0, _col1'
-' Select Operator'
-' expressions:'
-' expr: _col0'
-' type: string'
-' expr: _col1'
-' type: bigint'
-' outputColumnNames: _col0, _col1'
-' Select Operator'
-' expressions:'
-' expr: UDFToInteger(_col0)'
-' type: int'
-' expr: UDFToInteger(_col1)'
-' type: int'
-' outputColumnNames: _col0, _col1'
-' File Output Operator'
-' compressed: false'
-' GlobalTableId: 1'
-' directory: pfile:!!{hive.exec.scratchdir}!!'
-' NumFilesPerFileSink: 1'
-' Stats Publishing Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
-' table:'
-' input format: org.apache.hadoop.mapred.TextInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' properties:'
-' bucket_count -1'
-' columns key,cnt'
-' columns.types int:int'
-' file.inputformat org.apache.hadoop.mapred.TextInputFormat'
-' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' location !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/outputtbl1'
-' name groupby_sort_skew_1.outputtbl1'
-' numFiles 1'
-' numPartitions 0'
-' numRows 5'
-' rawDataSize 15'
-' serialization.ddl struct outputtbl1 { i32 key, i32 cnt}'
-' serialization.format 1'
-' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' totalSize 20'
-' transient_lastDdlTime !!UNIXTIME!!'
-' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' name: groupby_sort_skew_1.outputtbl1'
-' TotalFiles: 1'
-' GatherStats: true'
-' MultiFileSpray: false'
-' Needs Tagging: false'
-' Path -> Alias:'
-' !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/t1 [subq1:t1]'
-' Path -> Partition:'
-' !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/t1 '
-' Partition'
-' base file name: t1'
-' input format: org.apache.hadoop.mapred.TextInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' properties:'
-' SORTBUCKETCOLSPREFIX TRUE'
-' bucket_count 2'
-' bucket_field_name key'
-' columns key,val'
-' columns.types string:string'
-' file.inputformat org.apache.hadoop.mapred.TextInputFormat'
-' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' location !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/t1'
-' name groupby_sort_skew_1.t1'
-' numFiles 1'
-' numPartitions 0'
-' numRows 6'
-' rawDataSize 24'
-' serialization.ddl struct t1 { string key, string val}'
-' serialization.format 1'
-' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' totalSize 30'
-' transient_lastDdlTime !!UNIXTIME!!'
-' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' '
-' input format: org.apache.hadoop.mapred.TextInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' properties:'
-' SORTBUCKETCOLSPREFIX TRUE'
-' bucket_count 2'
-' bucket_field_name key'
-' columns key,val'
-' columns.types string:string'
-' file.inputformat org.apache.hadoop.mapred.TextInputFormat'
-' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' location !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/t1'
-' name groupby_sort_skew_1.t1'
-' numFiles 1'
-' numPartitions 0'
-' numRows 6'
-' rawDataSize 24'
-' serialization.ddl struct t1 { string key, string val}'
-' serialization.format 1'
-' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' totalSize 30'
-' transient_lastDdlTime !!UNIXTIME!!'
-' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' name: groupby_sort_skew_1.t1'
-' name: groupby_sort_skew_1.t1'
-''
-' Stage: Stage-0'
-' Move Operator'
-' tables:'
-' replace: true'
-' source: pfile:!!{hive.exec.scratchdir}!!'
-' table:'
-' input format: org.apache.hadoop.mapred.TextInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' properties:'
-' bucket_count -1'
-' columns key,cnt'
-' columns.types int:int'
-' file.inputformat org.apache.hadoop.mapred.TextInputFormat'
-' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' location !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/outputtbl1'
-' name groupby_sort_skew_1.outputtbl1'
-' numFiles 1'
-' numPartitions 0'
-' numRows 5'
-' rawDataSize 15'
-' serialization.ddl struct outputtbl1 { i32 key, i32 cnt}'
-' serialization.format 1'
-' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' totalSize 20'
-' transient_lastDdlTime !!UNIXTIME!!'
-' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' name: groupby_sort_skew_1.outputtbl1'
-' tmp directory: pfile:!!{hive.exec.scratchdir}!!'
-''
-' Stage: Stage-2'
-' Stats-Aggr Operator'
-' Stats Aggregation Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
-''
-''
-169 rows selected
->>>
->>> INSERT OVERWRITE TABLE outputTbl1
-SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key;
-'_col0','_col1'
-No rows selected
->>>
->>> SELECT * FROM outputTbl1 ORDER BY key;
-'key','cnt'
-'1','1'
-'2','1'
-'3','1'
-'7','1'
-'8','2'
-5 rows selected
->>>
->>> -- It should work for sub-queries with column aliases
->>> EXPLAIN EXTENDED
-INSERT OVERWRITE TABLE outputTbl1
-SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k;
-'Explain'
-'ABSTRACT SYNTAX TREE:'
-' (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key) k) (TOK_SELEXPR (TOK_TABLE_OR_COL val) v)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL k)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL k))))'
-''
-'STAGE DEPENDENCIES:'
-' Stage-1 is a root stage'
-' Stage-0 depends on stages: Stage-1'
-' Stage-2 depends on stages: Stage-0'
-''
-'STAGE PLANS:'
-' Stage: Stage-1'
-' Map Reduce'
-' Alias -> Map Operator Tree:'
-' subq1:t1 '
-' TableScan'
-' alias: t1'
-' GatherStats: false'
-' Select Operator'
-' expressions:'
-' expr: key'
-' type: string'
-' outputColumnNames: _col0'
-' Select Operator'
-' expressions:'
-' expr: _col0'
-' type: string'
-' outputColumnNames: _col0'
-' Group By Operator'
-' aggregations:'
-' expr: count(1)'
-' bucketGroup: false'
-' keys:'
-' expr: _col0'
-' type: string'
-' mode: final'
-' outputColumnNames: _col0, _col1'
-' Select Operator'
-' expressions:'
-' expr: _col0'
-' type: string'
-' expr: _col1'
-' type: bigint'
-' outputColumnNames: _col0, _col1'
-' Select Operator'
-' expressions:'
-' expr: UDFToInteger(_col0)'
-' type: int'
-' expr: UDFToInteger(_col1)'
-' type: int'
-' outputColumnNames: _col0, _col1'
-' File Output Operator'
-' compressed: false'
-' GlobalTableId: 1'
-' directory: pfile:!!{hive.exec.scratchdir}!!'
-' NumFilesPerFileSink: 1'
-' Stats Publishing Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
-' table:'
-' input format: org.apache.hadoop.mapred.TextInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' properties:'
-' bucket_count -1'
-' columns key,cnt'
-' columns.types int:int'
-' file.inputformat org.apache.hadoop.mapred.TextInputFormat'
-' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' location !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/outputtbl1'
-' name groupby_sort_skew_1.outputtbl1'
-' numFiles 1'
-' numPartitions 0'
-' numRows 5'
-' rawDataSize 15'
-' serialization.ddl struct outputtbl1 { i32 key, i32 cnt}'
-' serialization.format 1'
-' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' totalSize 20'
-' transient_lastDdlTime !!UNIXTIME!!'
-' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' name: groupby_sort_skew_1.outputtbl1'
-' TotalFiles: 1'
-' GatherStats: true'
-' MultiFileSpray: false'
-' Needs Tagging: false'
-' Path -> Alias:'
-' !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/t1 [subq1:t1]'
-' Path -> Partition:'
-' !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/t1 '
-' Partition'
-' base file name: t1'
-' input format: org.apache.hadoop.mapred.TextInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' properties:'
-' SORTBUCKETCOLSPREFIX TRUE'
-' bucket_count 2'
-' bucket_field_name key'
-' columns key,val'
-' columns.types string:string'
-' file.inputformat org.apache.hadoop.mapred.TextInputFormat'
-' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' location !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/t1'
-' name groupby_sort_skew_1.t1'
-' numFiles 1'
-' numPartitions 0'
-' numRows 6'
-' rawDataSize 24'
-' serialization.ddl struct t1 { string key, string val}'
-' serialization.format 1'
-' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' totalSize 30'
-' transient_lastDdlTime !!UNIXTIME!!'
-' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' '
-' input format: org.apache.hadoop.mapred.TextInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' properties:'
-' SORTBUCKETCOLSPREFIX TRUE'
-' bucket_count 2'
-' bucket_field_name key'
-' columns key,val'
-' columns.types string:string'
-' file.inputformat org.apache.hadoop.mapred.TextInputFormat'
-' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' location !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/t1'
-' name groupby_sort_skew_1.t1'
-' numFiles 1'
-' numPartitions 0'
-' numRows 6'
-' rawDataSize 24'
-' serialization.ddl struct t1 { string key, string val}'
-' serialization.format 1'
-' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' totalSize 30'
-' transient_lastDdlTime !!UNIXTIME!!'
-' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' name: groupby_sort_skew_1.t1'
-' name: groupby_sort_skew_1.t1'
-''
-' Stage: Stage-0'
-' Move Operator'
-' tables:'
-' replace: true'
-' source: pfile:!!{hive.exec.scratchdir}!!'
-' table:'
-' input format: org.apache.hadoop.mapred.TextInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' properties:'
-' bucket_count -1'
-' columns key,cnt'
-' columns.types int:int'
-' file.inputformat org.apache.hadoop.mapred.TextInputFormat'
-' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' location !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/outputtbl1'
-' name groupby_sort_skew_1.outputtbl1'
-' numFiles 1'
-' numPartitions 0'
-' numRows 5'
-' rawDataSize 15'
-' serialization.ddl struct outputtbl1 { i32 key, i32 cnt}'
-' serialization.format 1'
-' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' totalSize 20'
-' transient_lastDdlTime !!UNIXTIME!!'
-' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' name: groupby_sort_skew_1.outputtbl1'
-' tmp directory: pfile:!!{hive.exec.scratchdir}!!'
-''
-' Stage: Stage-2'
-' Stats-Aggr Operator'
-' Stats Aggregation Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
-''
-''
-169 rows selected
->>>
->>> INSERT OVERWRITE TABLE outputTbl1
-SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k;
-'_col0','_col1'
-No rows selected
->>>
->>> SELECT * FROM outputTbl1 ORDER BY key;
-'key','cnt'
-'1','1'
-'2','1'
-'3','1'
-'7','1'
-'8','2'
-5 rows selected
->>>
->>> CREATE TABLE outputTbl3(key1 int, key2 int, cnt int);
-No rows affected
->>>
->>> -- The plan should be converted to a map-side group by if the group by key contains a constant followed
->>> -- by a match to the skewed key
->>> EXPLAIN EXTENDED
-INSERT OVERWRITE TABLE outputTbl3
-SELECT 1, key, count(1) FROM T1 GROUP BY 1, key;
-'Explain'
-'ABSTRACT SYNTAX TREE:'
-' (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl3))) (TOK_SELECT (TOK_SELEXPR 1) (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY 1 (TOK_TABLE_OR_COL key))))'
-''
-'STAGE DEPENDENCIES:'
-' Stage-1 is a root stage'
-' Stage-0 depends on stages: Stage-1'
-' Stage-2 depends on stages: Stage-0'
-''
-'STAGE PLANS:'
-' Stage: Stage-1'
-' Map Reduce'
-' Alias -> Map Operator Tree:'
-' t1 '
-' TableScan'
-' alias: t1'
-' GatherStats: false'
-' Select Operator'
-' expressions:'
-' expr: key'
-' type: string'
-' outputColumnNames: key'
-' Group By Operator'
-' aggregations:'
-' expr: count(1)'
-' bucketGroup: false'
-' keys:'
-' expr: 1'
-' type: int'
-' expr: key'
-' type: string'
-' mode: final'
-' outputColumnNames: _col0, _col1, _col2'
-' Select Operator'
-' expressions:'
-' expr: _col0'
-' type: int'
-' expr: _col1'
-' type: string'
-' expr: _col2'
-' type: bigint'
-' outputColumnNames: _col0, _col1, _col2'
-' Select Operator'
-' expressions:'
-' expr: _col0'
-' type: int'
-' expr: UDFToInteger(_col1)'
-' type: int'
-' expr: UDFToInteger(_col2)'
-' type: int'
-' outputColumnNames: _col0, _col1, _col2'
-' File Output Operator'
-' compressed: false'
-' GlobalTableId: 1'
-' directory: pfile:!!{hive.exec.scratchdir}!!'
-' NumFilesPerFileSink: 1'
-' Stats Publishing Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
-' table:'
-' input format: org.apache.hadoop.mapred.TextInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' properties:'
-' bucket_count -1'
-' columns key1,key2,cnt'
-' columns.types int:int:int'
-' file.inputformat org.apache.hadoop.mapred.TextInputFormat'
-' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' location !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/outputtbl3'
-' name groupby_sort_skew_1.outputtbl3'
-' serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt}'
-' serialization.format 1'
-' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' transient_lastDdlTime !!UNIXTIME!!'
-' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' name: groupby_sort_skew_1.outputtbl3'
-' TotalFiles: 1'
-' GatherStats: true'
-' MultiFileSpray: false'
-' Needs Tagging: false'
-' Path -> Alias:'
-' !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/t1 [t1]'
-' Path -> Partition:'
-' !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/t1 '
-' Partition'
-' base file name: t1'
-' input format: org.apache.hadoop.mapred.TextInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' properties:'
-' SORTBUCKETCOLSPREFIX TRUE'
-' bucket_count 2'
-' bucket_field_name key'
-' columns key,val'
-' columns.types string:string'
-' file.inputformat org.apache.hadoop.mapred.TextInputFormat'
-' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' location !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/t1'
-' name groupby_sort_skew_1.t1'
-' numFiles 1'
-' numPartitions 0'
-' numRows 6'
-' rawDataSize 24'
-' serialization.ddl struct t1 { string key, string val}'
-' serialization.format 1'
-' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' totalSize 30'
-' transient_lastDdlTime !!UNIXTIME!!'
-' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' '
-' input format: org.apache.hadoop.mapred.TextInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' properties:'
-' SORTBUCKETCOLSPREFIX TRUE'
-' bucket_count 2'
-' bucket_field_name key'
-' columns key,val'
-' columns.types string:string'
-' file.inputformat org.apache.hadoop.mapred.TextInputFormat'
-' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' location !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/t1'
-' name groupby_sort_skew_1.t1'
-' numFiles 1'
-' numPartitions 0'
-' numRows 6'
-' rawDataSize 24'
-' serialization.ddl struct t1 { string key, string val}'
-' serialization.format 1'
-' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' totalSize 30'
-' transient_lastDdlTime !!UNIXTIME!!'
-' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' name: groupby_sort_skew_1.t1'
-' name: groupby_sort_skew_1.t1'
-''
-' Stage: Stage-0'
-' Move Operator'
-' tables:'
-' replace: true'
-' source: pfile:!!{hive.exec.scratchdir}!!'
-' table:'
-' input format: org.apache.hadoop.mapred.TextInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' properties:'
-' bucket_count -1'
-' columns key1,key2,cnt'
-' columns.types int:int:int'
-' file.inputformat org.apache.hadoop.mapred.TextInputFormat'
-' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' location !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/outputtbl3'
-' name groupby_sort_skew_1.outputtbl3'
-' serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt}'
-' serialization.format 1'
-' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' transient_lastDdlTime !!UNIXTIME!!'
-' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' name: groupby_sort_skew_1.outputtbl3'
-' tmp directory: pfile:!!{hive.exec.scratchdir}!!'
-''
-' Stage: Stage-2'
-' Stats-Aggr Operator'
-' Stats Aggregation Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
-''
-''
-160 rows selected
->>>
->>> INSERT OVERWRITE TABLE outputTbl3
-SELECT 1, key, count(1) FROM T1 GROUP BY 1, key;
-'_col0','_col1','_col2'
-No rows selected
->>>
->>> SELECT * FROM outputTbl3 ORDER BY key1, key2;
-'key1','key2','cnt'
-'1','1','1'
-'1','2','1'
-'1','3','1'
-'1','7','1'
-'1','8','2'
-5 rows selected
->>>
->>> CREATE TABLE outputTbl4(key1 int, key2 int, key3 string, cnt int);
-No rows affected
->>>
->>> -- no map-side group by if the group by key contains a constant followed by another column
->>> EXPLAIN EXTENDED
-INSERT OVERWRITE TABLE outputTbl4
-SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val;
-'Explain'
-'ABSTRACT SYNTAX TREE:'
-' (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl4))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 1) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) 1 (TOK_TABLE_OR_COL val))))'
-''
-'STAGE DEPENDENCIES:'
-' Stage-1 is a root stage'
-' Stage-2 depends on stages: Stage-1'
-' Stage-0 depends on stages: Stage-2'
-' Stage-3 depends on stages: Stage-0'
-''
-'STAGE PLANS:'
-' Stage: Stage-1'
-' Map Reduce'
-' Alias -> Map Operator Tree:'
-' t1 '
-' TableScan'
-' alias: t1'
-' GatherStats: false'
-' Select Operator'
-' expressions:'
-' expr: key'
-' type: string'
-' expr: val'
-' type: string'
-' outputColumnNames: key, val'
-' Group By Operator'
-' aggregations:'
-' expr: count(1)'
-' bucketGroup: false'
-' keys:'
-' expr: key'
-' type: string'
-' expr: 1'
-' type: int'
-' expr: val'
-' type: string'
-' mode: hash'
-' outputColumnNames: _col0, _col1, _col2, _col3'
-' Reduce Output Operator'
-' key expressions:'
-' expr: _col0'
-' type: string'
-' expr: _col1'
-' type: int'
-' expr: _col2'
-' type: string'
-' sort order: +++'
-' Map-reduce partition columns:'
-' expr: rand()'
-' type: double'
-' tag: -1'
-' value expressions:'
-' expr: _col3'
-' type: bigint'
-' Needs Tagging: false'
-' Path -> Alias:'
-' !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/t1 [t1]'
-' Path -> Partition:'
-' !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/t1 '
-' Partition'
-' base file name: t1'
-' input format: org.apache.hadoop.mapred.TextInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' properties:'
-' SORTBUCKETCOLSPREFIX TRUE'
-' bucket_count 2'
-' bucket_field_name key'
-' columns key,val'
-' columns.types string:string'
-' file.inputformat org.apache.hadoop.mapred.TextInputFormat'
-' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' location !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/t1'
-' name groupby_sort_skew_1.t1'
-' numFiles 1'
-' numPartitions 0'
-' numRows 6'
-' rawDataSize 24'
-' serialization.ddl struct t1 { string key, string val}'
-' serialization.format 1'
-' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' totalSize 30'
-' transient_lastDdlTime !!UNIXTIME!!'
-' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' '
-' input format: org.apache.hadoop.mapred.TextInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' properties:'
-' SORTBUCKETCOLSPREFIX TRUE'
-' bucket_count 2'
-' bucket_field_name key'
-' columns key,val'
-' columns.types string:string'
-' file.inputformat org.apache.hadoop.mapred.TextInputFormat'
-' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' location !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/t1'
-' name groupby_sort_skew_1.t1'
-' numFiles 1'
-' numPartitions 0'
-' numRows 6'
-' rawDataSize 24'
-' serialization.ddl struct t1 { string key, string val}'
-' serialization.format 1'
-' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' totalSize 30'
-' transient_lastDdlTime !!UNIXTIME!!'
-' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' name: groupby_sort_skew_1.t1'
-' name: groupby_sort_skew_1.t1'
-' Reduce Operator Tree:'
-' Group By Operator'
-' aggregations:'
-' expr: count(VALUE._col0)'
-' bucketGroup: false'
-' keys:'
-' expr: KEY._col0'
-' type: string'
-' expr: KEY._col1'
-' type: int'
-' expr: KEY._col2'
-' type: string'
-' mode: partials'
-' outputColumnNames: _col0, _col1, _col2, _col3'
-' File Output Operator'
-' compressed: false'
-' GlobalTableId: 0'
-' directory: file:!!{hive.exec.scratchdir}!!'
-' NumFilesPerFileSink: 1'
-' table:'
-' input format: org.apache.hadoop.mapred.SequenceFileInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat'
-' properties:'
-' columns _col0,_col1,_col2,_col3'
-' columns.types string,int,string,bigint'
-' escape.delim \'
-' TotalFiles: 1'
-' GatherStats: false'
-' MultiFileSpray: false'
-''
-' Stage: Stage-2'
-' Map Reduce'
-' Alias -> Map Operator Tree:'
-' file:!!{hive.exec.scratchdir}!! '
-' Reduce Output Operator'
-' key expressions:'
-' expr: _col0'
-' type: string'
-' expr: _col1'
-' type: int'
-' expr: _col2'
-' type: string'
-' sort order: +++'
-' Map-reduce partition columns:'
-' expr: _col0'
-' type: string'
-' expr: _col1'
-' type: int'
-' expr: _col2'
-' type: string'
-' tag: -1'
-' value expressions:'
-' expr: _col3'
-' type: bigint'
-' Needs Tagging: false'
-' Path -> Alias:'
-' file:!!{hive.exec.scratchdir}!! [file:!!{hive.exec.scratchdir}!!]'
-' Path -> Partition:'
-' file:!!{hive.exec.scratchdir}!! '
-' Partition'
-' base file name: -mr-10002'
-' input format: org.apache.hadoop.mapred.SequenceFileInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat'
-' properties:'
-' columns _col0,_col1,_col2,_col3'
-' columns.types string,int,string,bigint'
-' escape.delim \'
-' '
-' input format: org.apache.hadoop.mapred.SequenceFileInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat'
-' properties:'
-' columns _col0,_col1,_col2,_col3'
-' columns.types string,int,string,bigint'
-' escape.delim \'
-' Reduce Operator Tree:'
-' Group By Operator'
-' aggregations:'
-' expr: count(VALUE._col0)'
-' bucketGroup: false'
-' keys:'
-' expr: KEY._col0'
-' type: string'
-' expr: KEY._col1'
-' type: int'
-' expr: KEY._col2'
-' type: string'
-' mode: final'
-' outputColumnNames: _col0, _col1, _col2, _col3'
-' Select Operator'
-' expressions:'
-' expr: _col0'
-' type: string'
-' expr: _col1'
-' type: int'
-' expr: _col2'
-' type: string'
-' expr: _col3'
-' type: bigint'
-' outputColumnNames: _col0, _col1, _col2, _col3'
-' Select Operator'
-' expressions:'
-' expr: UDFToInteger(_col0)'
-' type: int'
-' expr: _col1'
-' type: int'
-' expr: _col2'
-' type: string'
-' expr: UDFToInteger(_col3)'
-' type: int'
-' outputColumnNames: _col0, _col1, _col2, _col3'
-' File Output Operator'
-' compressed: false'
-' GlobalTableId: 1'
-' directory: pfile:!!{hive.exec.scratchdir}!!'
-' NumFilesPerFileSink: 1'
-' Stats Publishing Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
-' table:'
-' input format: org.apache.hadoop.mapred.TextInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' properties:'
-' bucket_count -1'
-' columns key1,key2,key3,cnt'
-' columns.types int:int:string:int'
-' file.inputformat org.apache.hadoop.mapred.TextInputFormat'
-' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' location !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/outputtbl4'
-' name groupby_sort_skew_1.outputtbl4'
-' serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt}'
-' serialization.format 1'
-' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' transient_lastDdlTime !!UNIXTIME!!'
-' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' name: groupby_sort_skew_1.outputtbl4'
-' TotalFiles: 1'
-' GatherStats: true'
-' MultiFileSpray: false'
-''
-' Stage: Stage-0'
-' Move Operator'
-' tables:'
-' replace: true'
-' source: pfile:!!{hive.exec.scratchdir}!!'
-' table:'
-' input format: org.apache.hadoop.mapred.TextInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' properties:'
-' bucket_count -1'
-' columns key1,key2,key3,cnt'
-' columns.types int:int:string:int'
-' file.inputformat org.apache.hadoop.mapred.TextInputFormat'
-' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' location !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/outputtbl4'
-' name groupby_sort_skew_1.outputtbl4'
-' serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt}'
-' serialization.format 1'
-' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' transient_lastDdlTime !!UNIXTIME!!'
-' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' name: groupby_sort_skew_1.outputtbl4'
-' tmp directory: pfile:!!{hive.exec.scratchdir}!!'
-''
-' Stage: Stage-3'
-' Stats-Aggr Operator'
-' Stats Aggregation Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
-''
-''
-273 rows selected
->>>
->>> INSERT OVERWRITE TABLE outputTbl4
-SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val;
-'_col0','_col1','_col2','_col3'
-No rows selected
->>>
->>> SELECT * FROM outputTbl4 ORDER BY key1, key2, key3;
-'key1','key2','key3','cnt'
-'1','1','11','1'
-'2','1','12','1'
-'3','1','13','1'
-'7','1','17','1'
-'8','1','18','1'
-'8','1','28','1'
-6 rows selected
->>>
->>> -- no map-side group by if the group by key contains a function
->>> EXPLAIN EXTENDED
-INSERT OVERWRITE TABLE outputTbl3
-SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1;
-'Explain'
-'ABSTRACT SYNTAX TREE:'
-' (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl3))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (+ (TOK_TABLE_OR_COL key) 1)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (+ (TOK_TABLE_OR_COL key) 1))))'
-''
-'STAGE DEPENDENCIES:'
-' Stage-1 is a root stage'
-' Stage-2 depends on stages: Stage-1'
-' Stage-0 depends on stages: Stage-2'
-' Stage-3 depends on stages: Stage-0'
-''
-'STAGE PLANS:'
-' Stage: Stage-1'
-' Map Reduce'
-' Alias -> Map Operator Tree:'
-' t1 '
-' TableScan'
-' alias: t1'
-' GatherStats: false'
-' Select Operator'
-' expressions:'
-' expr: key'
-' type: string'
-' outputColumnNames: key'
-' Group By Operator'
-' aggregations:'
-' expr: count(1)'
-' bucketGroup: false'
-' keys:'
-' expr: key'
-' type: string'
-' expr: (key + 1)'
-' type: double'
-' mode: hash'
-' outputColumnNames: _col0, _col1, _col2'
-' Reduce Output Operator'
-' key expressions:'
-' expr: _col0'
-' type: string'
-' expr: _col1'
-' type: double'
-' sort order: ++'
-' Map-reduce partition columns:'
-' expr: rand()'
-' type: double'
-' tag: -1'
-' value expressions:'
-' expr: _col2'
-' type: bigint'
-' Needs Tagging: false'
-' Path -> Alias:'
-' !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/t1 [t1]'
-' Path -> Partition:'
-' !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/t1 '
-' Partition'
-' base file name: t1'
-' input format: org.apache.hadoop.mapred.TextInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' properties:'
-' SORTBUCKETCOLSPREFIX TRUE'
-' bucket_count 2'
-' bucket_field_name key'
-' columns key,val'
-' columns.types string:string'
-' file.inputformat org.apache.hadoop.mapred.TextInputFormat'
-' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' location !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/t1'
-' name groupby_sort_skew_1.t1'
-' numFiles 1'
-' numPartitions 0'
-' numRows 6'
-' rawDataSize 24'
-' serialization.ddl struct t1 { string key, string val}'
-' serialization.format 1'
-' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' totalSize 30'
-' transient_lastDdlTime !!UNIXTIME!!'
-' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' '
-' input format: org.apache.hadoop.mapred.TextInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' properties:'
-' SORTBUCKETCOLSPREFIX TRUE'
-' bucket_count 2'
-' bucket_field_name key'
-' columns key,val'
-' columns.types string:string'
-' file.inputformat org.apache.hadoop.mapred.TextInputFormat'
-' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' location !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/t1'
-' name groupby_sort_skew_1.t1'
-' numFiles 1'
-' numPartitions 0'
-' numRows 6'
-' rawDataSize 24'
-' serialization.ddl struct t1 { string key, string val}'
-' serialization.format 1'
-' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' totalSize 30'
-' transient_lastDdlTime !!UNIXTIME!!'
-' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' name: groupby_sort_skew_1.t1'
-' name: groupby_sort_skew_1.t1'
-' Reduce Operator Tree:'
-' Group By Operator'
-' aggregations:'
-' expr: count(VALUE._col0)'
-' bucketGroup: false'
-' keys:'
-' expr: KEY._col0'
-' type: string'
-' expr: KEY._col1'
-' type: double'
-' mode: partials'
-' outputColumnNames: _col0, _col1, _col2'
-' File Output Operator'
-' compressed: false'
-' GlobalTableId: 0'
-' directory: file:!!{hive.exec.scratchdir}!!'
-' NumFilesPerFileSink: 1'
-' table:'
-' input format: org.apache.hadoop.mapred.SequenceFileInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat'
-' properties:'
-' columns _col0,_col1,_col2'
-' columns.types string,double,bigint'
-' escape.delim \'
-' TotalFiles: 1'
-' GatherStats: false'
-' MultiFileSpray: false'
-''
-' Stage: Stage-2'
-' Map Reduce'
-' Alias -> Map Operator Tree:'
-' file:!!{hive.exec.scratchdir}!! '
-' Reduce Output Operator'
-' key expressions:'
-' expr: _col0'
-' type: string'
-' expr: _col1'
-' type: double'
-' sort order: ++'
-' Map-reduce partition columns:'
-' expr: _col0'
-' type: string'
-' expr: _col1'
-' type: double'
-' tag: -1'
-' value expressions:'
-' expr: _col2'
-' type: bigint'
-' Needs Tagging: false'
-' Path -> Alias:'
-' file:!!{hive.exec.scratchdir}!! [file:!!{hive.exec.scratchdir}!!]'
-' Path -> Partition:'
-' file:!!{hive.exec.scratchdir}!! '
-' Partition'
-' base file name: -mr-10002'
-' input format: org.apache.hadoop.mapred.SequenceFileInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat'
-' properties:'
-' columns _col0,_col1,_col2'
-' columns.types string,double,bigint'
-' escape.delim \'
-' '
-' input format: org.apache.hadoop.mapred.SequenceFileInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat'
-' properties:'
-' columns _col0,_col1,_col2'
-' columns.types string,double,bigint'
-' escape.delim \'
-' Reduce Operator Tree:'
-' Group By Operator'
-' aggregations:'
-' expr: count(VALUE._col0)'
-' bucketGroup: false'
-' keys:'
-' expr: KEY._col0'
-' type: string'
-' expr: KEY._col1'
-' type: double'
-' mode: final'
-' outputColumnNames: _col0, _col1, _col2'
-' Select Operator'
-' expressions:'
-' expr: _col0'
-' type: string'
-' expr: _col1'
-' type: double'
-' expr: _col2'
-' type: bigint'
-' outputColumnNames: _col0, _col1, _col2'
-' Select Operator'
-' expressions:'
-' expr: UDFToInteger(_col0)'
-' type: int'
-' expr: UDFToInteger(_col1)'
-' type: int'
-' expr: UDFToInteger(_col2)'
-' type: int'
-' outputColumnNames: _col0, _col1, _col2'
-' File Output Operator'
-' compressed: false'
-' GlobalTableId: 1'
-' directory: pfile:!!{hive.exec.scratchdir}!!'
-' NumFilesPerFileSink: 1'
-' Stats Publishing Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
-' table:'
-' input format: org.apache.hadoop.mapred.TextInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' properties:'
-' bucket_count -1'
-' columns key1,key2,cnt'
-' columns.types int:int:int'
-' file.inputformat org.apache.hadoop.mapred.TextInputFormat'
-' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' location !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/outputtbl3'
-' name groupby_sort_skew_1.outputtbl3'
-' numFiles 1'
-' numPartitions 0'
-' numRows 5'
-' rawDataSize 25'
-' serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt}'
-' serialization.format 1'
-' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' totalSize 30'
-' transient_lastDdlTime !!UNIXTIME!!'
-' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' name: groupby_sort_skew_1.outputtbl3'
-' TotalFiles: 1'
-' GatherStats: true'
-' MultiFileSpray: false'
-''
-' Stage: Stage-0'
-' Move Operator'
-' tables:'
-' replace: true'
-' source: pfile:!!{hive.exec.scratchdir}!!'
-' table:'
-' input format: org.apache.hadoop.mapred.TextInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' properties:'
-' bucket_count -1'
-' columns key1,key2,cnt'
-' columns.types int:int:int'
-' file.inputformat org.apache.hadoop.mapred.TextInputFormat'
-' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' location !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/outputtbl3'
-' name groupby_sort_skew_1.outputtbl3'
-' numFiles 1'
-' numPartitions 0'
-' numRows 5'
-' rawDataSize 25'
-' serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt}'
-' serialization.format 1'
-' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' totalSize 30'
-' transient_lastDdlTime !!UNIXTIME!!'
-' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' name: groupby_sort_skew_1.outputtbl3'
-' tmp directory: pfile:!!{hive.exec.scratchdir}!!'
-''
-' Stage: Stage-3'
-' Stats-Aggr Operator'
-' Stats Aggregation Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
-''
-''
-265 rows selected
->>>
->>> INSERT OVERWRITE TABLE outputTbl3
-SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1;
-'_col0','_col1','_col2'
-No rows selected
->>>
->>> SELECT * FROM outputTbl3 ORDER BY key1, key2;
-'key1','key2','cnt'
-'1','2','1'
-'2','3','1'
-'3','4','1'
-'7','8','1'
-'8','9','2'
-5 rows selected
->>>
->>> -- it should not matter what follows the group by
->>> -- test various cases
->>>
->>> -- group by followed by another group by
->>> EXPLAIN EXTENDED
-INSERT OVERWRITE TABLE outputTbl1
-SELECT key + key, sum(cnt) from
-(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1
-group by key + key;
-'Explain'
-'ABSTRACT SYNTAX TREE:'
-' (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (+ (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL key))) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL cnt)))) (TOK_GROUPBY (+ (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL key)))))'
-''
-'STAGE DEPENDENCIES:'
-' Stage-1 is a root stage'
-' Stage-2 depends on stages: Stage-1'
-' Stage-0 depends on stages: Stage-2'
-' Stage-3 depends on stages: Stage-0'
-''
-'STAGE PLANS:'
-' Stage: Stage-1'
-' Map Reduce'
-' Alias -> Map Operator Tree:'
-' subq1:t1 '
-' TableScan'
-' alias: t1'
-' GatherStats: false'
-' Select Operator'
-' expressions:'
-' expr: key'
-' type: string'
-' outputColumnNames: key'
-' Group By Operator'
-' aggregations:'
-' expr: count(1)'
-' bucketGroup: false'
-' keys:'
-' expr: key'
-' type: string'
-' mode: final'
-' outputColumnNames: _col0, _col1'
-' Select Operator'
-' expressions:'
-' expr: _col0'
-' type: string'
-' expr: _col1'
-' type: bigint'
-' outputColumnNames: _col0, _col1'
-' Select Operator'
-' expressions:'
-' expr: _col0'
-' type: string'
-' expr: _col1'
-' type: bigint'
-' outputColumnNames: _col0, _col1'
-' Group By Operator'
-' aggregations:'
-' expr: sum(_col1)'
-' bucketGroup: false'
-' keys:'
-' expr: (_col0 + _col0)'
-' type: double'
-' mode: hash'
-' outputColumnNames: _col0, _col1'
-' Reduce Output Operator'
-' key expressions:'
-' expr: _col0'
-' type: double'
-' sort order: +'
-' Map-reduce partition columns:'
-' expr: rand()'
-' type: double'
-' tag: -1'
-' value expressions:'
-' expr: _col1'
-' type: bigint'
-' Needs Tagging: false'
-' Path -> Alias:'
-' !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/t1 [subq1:t1]'
-' Path -> Partition:'
-' !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/t1 '
-' Partition'
-' base file name: t1'
-' input format: org.apache.hadoop.mapred.TextInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' properties:'
-' SORTBUCKETCOLSPREFIX TRUE'
-' bucket_count 2'
-' bucket_field_name key'
-' columns key,val'
-' columns.types string:string'
-' file.inputformat org.apache.hadoop.mapred.TextInputFormat'
-' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' location !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/t1'
-' name groupby_sort_skew_1.t1'
-' numFiles 1'
-' numPartitions 0'
-' numRows 6'
-' rawDataSize 24'
-' serialization.ddl struct t1 { string key, string val}'
-' serialization.format 1'
-' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' totalSize 30'
-' transient_lastDdlTime !!UNIXTIME!!'
-' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' '
-' input format: org.apache.hadoop.mapred.TextInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' properties:'
-' SORTBUCKETCOLSPREFIX TRUE'
-' bucket_count 2'
-' bucket_field_name key'
-' columns key,val'
-' columns.types string:string'
-' file.inputformat org.apache.hadoop.mapred.TextInputFormat'
-' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' location !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/t1'
-' name groupby_sort_skew_1.t1'
-' numFiles 1'
-' numPartitions 0'
-' numRows 6'
-' rawDataSize 24'
-' serialization.ddl struct t1 { string key, string val}'
-' serialization.format 1'
-' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' totalSize 30'
-' transient_lastDdlTime !!UNIXTIME!!'
-' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' name: groupby_sort_skew_1.t1'
-' name: groupby_sort_skew_1.t1'
-' Reduce Operator Tree:'
-' Group By Operator'
-' aggregations:'
-' expr: sum(VALUE._col0)'
-' bucketGroup: false'
-' keys:'
-' expr: KEY._col0'
-' type: double'
-' mode: partials'
-' outputColumnNames: _col0, _col1'
-' File Output Operator'
-' compressed: false'
-' GlobalTableId: 0'
-' directory: file:!!{hive.exec.scratchdir}!!'
-' NumFilesPerFileSink: 1'
-' table:'
-' input format: org.apache.hadoop.mapred.SequenceFileInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat'
-' properties:'
-' columns _col0,_col1'
-' columns.types double,bigint'
-' escape.delim \'
-' TotalFiles: 1'
-' GatherStats: false'
-' MultiFileSpray: false'
-''
-' Stage: Stage-2'
-' Map Reduce'
-' Alias -> Map Operator Tree:'
-' file:!!{hive.exec.scratchdir}!! '
-' Reduce Output Operator'
-' key expressions:'
-' expr: _col0'
-' type: double'
-' sort order: +'
-' Map-reduce partition columns:'
-' expr: _col0'
-' type: double'
-' tag: -1'
-' value expressions:'
-' expr: _col1'
-' type: bigint'
-' Needs Tagging: false'
-' Path -> Alias:'
-' file:!!{hive.exec.scratchdir}!! [file:!!{hive.exec.scratchdir}!!]'
-' Path -> Partition:'
-' file:!!{hive.exec.scratchdir}!! '
-' Partition'
-' base file name: -mr-10002'
-' input format: org.apache.hadoop.mapred.SequenceFileInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat'
-' properties:'
-' columns _col0,_col1'
-' columns.types double,bigint'
-' escape.delim \'
-' '
-' input format: org.apache.hadoop.mapred.SequenceFileInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat'
-' properties:'
-' columns _col0,_col1'
-' columns.types double,bigint'
-' escape.delim \'
-' Reduce Operator Tree:'
-' Group By Operator'
-' aggregations:'
-' expr: sum(VALUE._col0)'
-' bucketGroup: false'
-' keys:'
-' expr: KEY._col0'
-' type: double'
-' mode: final'
-' outputColumnNames: _col0, _col1'
-' Select Operator'
-' expressions:'
-' expr: _col0'
-' type: double'
-' expr: _col1'
-' type: bigint'
-' outputColumnNames: _col0, _col1'
-' Select Operator'
-' expressions:'
-' expr: UDFToInteger(_col0)'
-' type: int'
-' expr: UDFToInteger(_col1)'
-' type: int'
-' outputColumnNames: _col0, _col1'
-' File Output Operator'
-' compressed: false'
-' GlobalTableId: 1'
-' directory: pfile:!!{hive.exec.scratchdir}!!'
-' NumFilesPerFileSink: 1'
-' Stats Publishing Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
-' table:'
-' input format: org.apache.hadoop.mapred.TextInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' properties:'
-' bucket_count -1'
-' columns key,cnt'
-' columns.types int:int'
-' file.inputformat org.apache.hadoop.mapred.TextInputFormat'
-' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' location !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/outputtbl1'
-' name groupby_sort_skew_1.outputtbl1'
-' numFiles 1'
-' numPartitions 0'
-' numRows 5'
-' rawDataSize 15'
-' serialization.ddl struct outputtbl1 { i32 key, i32 cnt}'
-' serialization.format 1'
-' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' totalSize 20'
-' transient_lastDdlTime !!UNIXTIME!!'
-' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' name: groupby_sort_skew_1.outputtbl1'
-' TotalFiles: 1'
-' GatherStats: true'
-' MultiFileSpray: false'
-''
-' Stage: Stage-0'
-' Move Operator'
-' tables:'
-' replace: true'
-' source: pfile:!!{hive.exec.scratchdir}!!'
-' table:'
-' input format: org.apache.hadoop.mapred.TextInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' properties:'
-' bucket_count -1'
-' columns key,cnt'
-' columns.types int:int'
-' file.inputformat org.apache.hadoop.mapred.TextInputFormat'
-' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' location !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/outputtbl1'
-' name groupby_sort_skew_1.outputtbl1'
-' numFiles 1'
-' numPartitions 0'
-' numRows 5'
-' rawDataSize 15'
-' serialization.ddl struct outputtbl1 { i32 key, i32 cnt}'
-' serialization.format 1'
-' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' totalSize 20'
-' transient_lastDdlTime !!UNIXTIME!!'
-' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-' name: groupby_sort_skew_1.outputtbl1'
-' tmp directory: pfile:!!{hive.exec.scratchdir}!!'
-''
-' Stage: Stage-3'
-' Stats-Aggr Operator'
-' Stats Aggregation Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
-''
-''
-272 rows selected
->>>
->>> INSERT OVERWRITE TABLE outputTbl1
-SELECT key + key, sum(cnt) from
-(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1
-group by key + key;
-'_col0','_col1'
-No rows selected
->>>
->>> SELECT * FROM outputTbl1 ORDER BY key;
-'key','cnt'
-'2','1'
-'4','1'
-'6','1'
-'14','1'
-'16','2'
-5 rows selected
->>>
->>> -- group by followed by a union
->>> EXPLAIN EXTENDED
-INSERT OVERWRITE TABLE outputTbl1
-SELECT * FROM (
-SELECT key, count(1) FROM T1 GROUP BY key
-UNION ALL
-SELECT key, count(1) FROM T1 GROUP BY key
-) subq1;
-'Explain'
-'ABSTRACT SYNTAX TREE:'
-' (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))'
-''
-'STAGE DEPENDENCIES:'
-' Stage-1 is a root stage'
-' Stage-0 depends on stages: Stage-1'
-' Stage-2 depends on stages: Stage-0'
-''
-'STAGE PLANS:'
-' Stage: Stage-1'
-' Map Reduce'
-' Alias -> Map Operator Tree:'
-' null-subquery1:subq1-subquery1:t1 '
-' TableScan'
-' alias: t1'
-' GatherStats: false'
-' Select Operator'
-' expressions:'
-' expr: key'
-' type: string'
-' outputColumnNames: key'
-' Group By Operator'
-' aggregations:'
-' expr: count(1)'
-' bucketGroup: false'
-' keys:'
-' expr: key'
-' type: string'
-' mode: final'
-' outputColumnNames: _col0, _col1'
-' Select Operator'
-' expressions:'
-' expr: _col0'
-' type: string'
-' expr: _col1'
-' type: bigint'
-' outputColumnNames: _col0, _col1'
-' Union'
-' Select Operator'
-' expressions:'
-' expr: _col0'
-' type: string'
-' expr: _col1'
-' type: bigint'
-' outputColumnNames: _col0, _col1'
-' Select Operator'
-' expressions:'
-' expr: UDFToInteger(_col0)'
-' type: int'
-' expr: UDFToInteger(_col1)'
-' type: int'
-' outputColumnNames: _col0, _col1'
-' File Output Operator'
-' compressed: false'
-' GlobalTableId: 1'
-' directory: pfile:!!{hive.exec.scratchdir}!!'
-' NumFilesPerFileSink: 1'
-' Stats Publishing Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
-' table:'
-' input format: org.apache.hadoop.mapred.TextInputFormat'
-' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' properties:'
-' bucket_count -1'
-' columns key,cnt'
-' columns.types int:int'
-' file.inputformat org.apache.hadoop.mapred.TextInputFormat'
-' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
-' location !!{hive.metastore.warehouse.dir}!!/groupby_sort_skew_1.db/outputtbl1'
-' name groupby_sort
<TRUNCATED>