You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2015/12/10 20:36:29 UTC
[1/3] hive git commit: HIVE-11531: Add mysql-style LIMIT support to
Hive, or improve ROW_NUMBER performance-wise (Hui Zheng,
reviewed by Sergey Shelukhin, Jesus Camacho Rodriguez)
Repository: hive
Updated Branches:
refs/heads/master 57f39a990 -> e7abf72c7
http://git-wip-us.apache.org/repos/asf/hive/blob/e7abf72c/ql/src/test/results/clientpositive/offset_limit_ppd_optimizer.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/offset_limit_ppd_optimizer.q.out b/ql/src/test/results/clientpositive/offset_limit_ppd_optimizer.q.out
new file mode 100644
index 0000000..facb26c
--- /dev/null
+++ b/ql/src/test/results/clientpositive/offset_limit_ppd_optimizer.q.out
@@ -0,0 +1,1377 @@
+PREHOOK: query: explain
+select key,value from src order by key limit 10,20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key,value from src order by key limit 10,20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.3
+ value expressions: _col1 (type: string)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
+ Offset of rows: 10
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 20
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select key,value from src order by key limit 10,20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key,value from src order by key limit 10,20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+105 val_105
+11 val_11
+111 val_111
+113 val_113
+113 val_113
+114 val_114
+116 val_116
+118 val_118
+118 val_118
+119 val_119
+119 val_119
+119 val_119
+12 val_12
+12 val_12
+120 val_120
+120 val_120
+125 val_125
+125 val_125
+126 val_126
+128 val_128
+PREHOOK: query: explain
+select key,value from src order by key desc limit 10,20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key,value from src order by key desc limit 10,20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: -
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.3
+ value expressions: _col1 (type: string)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
+ Offset of rows: 10
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 20
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select key,value from src order by key desc limit 10,20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key,value from src order by key desc limit 10,20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+90 val_90
+9 val_9
+87 val_87
+86 val_86
+85 val_85
+84 val_84
+84 val_84
+83 val_83
+83 val_83
+82 val_82
+80 val_80
+8 val_8
+78 val_78
+77 val_77
+76 val_76
+76 val_76
+74 val_74
+72 val_72
+72 val_72
+70 val_70
+PREHOOK: query: explain
+select value, sum(key + 1) as sum from src group by value order by value limit 10,20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select value, sum(key + 1) as sum from src group by value order by value limit 10,20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: value (type: string), key (type: string)
+ outputColumnNames: value, key
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum((key + 1))
+ keys: value (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.3
+ value expressions: _col1 (type: double)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
+ Offset of rows: 10
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 20
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select value, sum(key + 1) as sum from src group by value order by value limit 10,20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select value, sum(key + 1) as sum from src group by value order by value limit 10,20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+val_116 117.0
+val_118 238.0
+val_119 360.0
+val_12 26.0
+val_120 242.0
+val_125 252.0
+val_126 127.0
+val_128 387.0
+val_129 260.0
+val_131 132.0
+val_133 134.0
+val_134 270.0
+val_136 137.0
+val_137 276.0
+val_138 556.0
+val_143 144.0
+val_145 146.0
+val_146 294.0
+val_149 300.0
+val_15 32.0
+PREHOOK: query: -- deduped RS
+explain
+select value,avg(key + 1) from src group by value order by value limit 10,20
+PREHOOK: type: QUERY
+POSTHOOK: query: -- deduped RS
+explain
+select value,avg(key + 1) from src group by value order by value limit 10,20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: value (type: string), key (type: string)
+ outputColumnNames: value, key
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg((key + 1))
+ keys: value (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.3
+ value expressions: _col1 (type: struct<count:bigint,sum:double,input:double>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
+ Offset of rows: 10
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 20
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select value,avg(key + 1) from src group by value order by value limit 10,20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select value,avg(key + 1) from src group by value order by value limit 10,20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+val_116 117.0
+val_118 119.0
+val_119 120.0
+val_12 13.0
+val_120 121.0
+val_125 126.0
+val_126 127.0
+val_128 129.0
+val_129 130.0
+val_131 132.0
+val_133 134.0
+val_134 135.0
+val_136 137.0
+val_137 138.0
+val_138 139.0
+val_143 144.0
+val_145 146.0
+val_146 147.0
+val_149 150.0
+val_15 16.0
+PREHOOK: query: -- distincts
+explain
+select distinct(cdouble) as dis from alltypesorc order by dis limit 10,20
+PREHOOK: type: QUERY
+POSTHOOK: query: -- distincts
+explain
+select distinct(cdouble) as dis from alltypesorc order by dis limit 10,20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cdouble (type: double)
+ outputColumnNames: cdouble
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: cdouble (type: double)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: double)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: double)
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.3
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: double)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
+ Offset of rows: 10
+ Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 20
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct(cdouble) as dis from alltypesorc order by dis limit 10,20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct(cdouble) as dis from alltypesorc order by dis limit 10,20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-16309.0
+-16307.0
+-16306.0
+-16305.0
+-16300.0
+-16296.0
+-16280.0
+-16277.0
+-16274.0
+-16269.0
+-16243.0
+-16236.0
+-16227.0
+-16225.0
+-16221.0
+-16218.0
+-16217.0
+-16211.0
+-16208.0
+-16207.0
+PREHOOK: query: explain
+select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 10,20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 10,20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint), cdouble (type: double)
+ outputColumnNames: ctinyint, cdouble
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(DISTINCT cdouble)
+ keys: ctinyint (type: tinyint), cdouble (type: double)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint), _col1 (type: double)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.3
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(DISTINCT KEY._col1:0._col0)
+ keys: KEY._col0 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
+ Offset of rows: 10
+ Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 20
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 10,20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 10,20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-55 29
+-54 26
+-53 22
+-52 33
+-51 21
+-50 30
+-49 26
+-48 29
+-47 22
+-46 24
+-45 24
+-44 24
+-43 30
+-42 17
+-41 24
+-40 26
+-39 22
+-38 31
+-37 20
+-36 26
+PREHOOK: query: explain
+select ctinyint, count(cdouble) from (select ctinyint, cdouble from alltypesorc group by ctinyint, cdouble) t1 group by ctinyint order by ctinyint limit 10,20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select ctinyint, count(cdouble) from (select ctinyint, cdouble from alltypesorc group by ctinyint, cdouble) t1 group by ctinyint order by ctinyint limit 10,20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint), cdouble (type: double)
+ outputColumnNames: ctinyint, cdouble
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: ctinyint (type: tinyint), cdouble (type: double)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint), _col1 (type: double)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: tinyint), KEY._col1 (type: double)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col1)
+ keys: _col0 (type: tinyint)
+ mode: complete
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
+ Offset of rows: 10
+ Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 20
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select ctinyint, count(cdouble) from (select ctinyint, cdouble from alltypesorc group by ctinyint, cdouble) t1 group by ctinyint order by ctinyint limit 10,20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ctinyint, count(cdouble) from (select ctinyint, cdouble from alltypesorc group by ctinyint, cdouble) t1 group by ctinyint order by ctinyint limit 10,20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-55 29
+-54 26
+-53 22
+-52 33
+-51 21
+-50 30
+-49 26
+-48 29
+-47 22
+-46 24
+-45 24
+-44 24
+-43 30
+-42 17
+-41 24
+-40 26
+-39 22
+-38 31
+-37 20
+-36 26
+PREHOOK: query: -- multi distinct
+explain
+select ctinyint, count(distinct(cstring1)), count(distinct(cstring2)) from alltypesorc group by ctinyint order by ctinyint limit 10,20
+PREHOOK: type: QUERY
+POSTHOOK: query: -- multi distinct
+explain
+select ctinyint, count(distinct(cstring1)), count(distinct(cstring2)) from alltypesorc group by ctinyint order by ctinyint limit 10,20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint), cstring1 (type: string), cstring2 (type: string)
+ outputColumnNames: ctinyint, cstring1, cstring2
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(DISTINCT cstring1), count(DISTINCT cstring2)
+ keys: ctinyint (type: tinyint), cstring1 (type: string), cstring2 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint), _col1 (type: string), _col2 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.3
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0)
+ keys: KEY._col0 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
+ Offset of rows: 10
+ Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 20
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select ctinyint, count(distinct(cstring1)), count(distinct(cstring2)) from alltypesorc group by ctinyint order by ctinyint limit 10,20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ctinyint, count(distinct(cstring1)), count(distinct(cstring2)) from alltypesorc group by ctinyint order by ctinyint limit 10,20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-55 3 21
+-54 3 21
+-53 3 17
+-52 3 21
+-51 1012 1045
+-50 3 25
+-49 3 24
+-48 3 27
+-47 3 23
+-46 3 19
+-45 3 24
+-44 3 31
+-43 3 26
+-42 3 22
+-41 3 29
+-40 3 25
+-39 3 30
+-38 3 19
+-37 3 27
+-36 3 18
+PREHOOK: query: -- limit zero
+explain
+select key,value from src order by key limit 0,0
+PREHOOK: type: QUERY
+POSTHOOK: query: -- limit zero
+explain
+select key,value from src order by key limit 0,0
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 0
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select key,value from src order by key limit 0,0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key,value from src order by key limit 0,0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+PREHOOK: query: -- 2MR (applied to last RS)
+explain
+select value, sum(key) as sum from src group by value order by sum limit 10,20
+PREHOOK: type: QUERY
+POSTHOOK: query: -- 2MR (applied to last RS)
+explain
+select value, sum(key) as sum from src group by value order by sum limit 10,20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: value (type: string), key (type: string)
+ outputColumnNames: value, key
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(key)
+ keys: value (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: double)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col1 (type: double)
+ sort order: +
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.3
+ value expressions: _col0 (type: string)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
+ Offset of rows: 10
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 20
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select value, sum(key) as sum from src group by value order by sum limit 10,20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select value, sum(key) as sum from src group by value order by sum limit 10,20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+val_20 20.0
+val_12 24.0
+val_27 27.0
+val_28 28.0
+val_15 30.0
+val_30 30.0
+val_33 33.0
+val_34 34.0
+val_18 36.0
+val_41 41.0
+val_43 43.0
+val_44 44.0
+val_47 47.0
+val_24 48.0
+val_26 52.0
+val_53 53.0
+val_54 54.0
+val_57 57.0
+val_64 64.0
+val_65 65.0
+PREHOOK: query: -- map aggregation disabled
+explain
+select value, sum(key) as sum from src group by value order by value limit 10,20
+PREHOOK: type: QUERY
+POSTHOOK: query: -- map aggregation disabled
+explain
+select value, sum(key) as sum from src group by value order by value limit 10,20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: value (type: string)
+ sort order: +
+ Map-reduce partition columns: value (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.3
+ value expressions: key (type: string)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: complete
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
+ Offset of rows: 10
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 20
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select value, sum(key) as sum from src group by value order by value limit 10,20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select value, sum(key) as sum from src group by value order by value limit 10,20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+val_116 116.0
+val_118 236.0
+val_119 357.0
+val_12 24.0
+val_120 240.0
+val_125 250.0
+val_126 126.0
+val_128 384.0
+val_129 258.0
+val_131 131.0
+val_133 133.0
+val_134 268.0
+val_136 136.0
+val_137 274.0
+val_138 552.0
+val_143 143.0
+val_145 145.0
+val_146 292.0
+val_149 298.0
+val_15 30.0
+PREHOOK: query: -- flush for order-by
+explain
+select key,value,value,value,value,value,value,value,value from src order by key limit 30,70
+PREHOOK: type: QUERY
+POSTHOOK: query: -- flush for order-by
+explain
+select key,value,value,value,value,value,value,value,value from src order by key limit 30,70
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 2.0E-5
+ value expressions: _col1 (type: string)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col0 (type: string), VALUE._col0 (type: string), VALUE._col0 (type: string), VALUE._col0 (type: string), VALUE._col0 (type: string), VALUE._col0 (type: string), VALUE._col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 70
+ Offset of rows: 30
+ Statistics: Num rows: 70 Data size: 700 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 70 Data size: 700 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 70
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select key,value,value,value,value,value,value,value,value from src order by key limit 30,70
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key,value,value,value,value,value,value,value,value from src order by key limit 30,70
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+128 val_128 val_128 val_128 val_128 val_128 val_128 val_128 val_128
+128 val_128 val_128 val_128 val_128 val_128 val_128 val_128 val_128
+129 val_129 val_129 val_129 val_129 val_129 val_129 val_129 val_129
+129 val_129 val_129 val_129 val_129 val_129 val_129 val_129 val_129
+131 val_131 val_131 val_131 val_131 val_131 val_131 val_131 val_131
+133 val_133 val_133 val_133 val_133 val_133 val_133 val_133 val_133
+134 val_134 val_134 val_134 val_134 val_134 val_134 val_134 val_134
+134 val_134 val_134 val_134 val_134 val_134 val_134 val_134 val_134
+136 val_136 val_136 val_136 val_136 val_136 val_136 val_136 val_136
+137 val_137 val_137 val_137 val_137 val_137 val_137 val_137 val_137
+137 val_137 val_137 val_137 val_137 val_137 val_137 val_137 val_137
+138 val_138 val_138 val_138 val_138 val_138 val_138 val_138 val_138
+138 val_138 val_138 val_138 val_138 val_138 val_138 val_138 val_138
+138 val_138 val_138 val_138 val_138 val_138 val_138 val_138 val_138
+138 val_138 val_138 val_138 val_138 val_138 val_138 val_138 val_138
+143 val_143 val_143 val_143 val_143 val_143 val_143 val_143 val_143
+145 val_145 val_145 val_145 val_145 val_145 val_145 val_145 val_145
+146 val_146 val_146 val_146 val_146 val_146 val_146 val_146 val_146
+146 val_146 val_146 val_146 val_146 val_146 val_146 val_146 val_146
+149 val_149 val_149 val_149 val_149 val_149 val_149 val_149 val_149
+149 val_149 val_149 val_149 val_149 val_149 val_149 val_149 val_149
+15 val_15 val_15 val_15 val_15 val_15 val_15 val_15 val_15
+15 val_15 val_15 val_15 val_15 val_15 val_15 val_15 val_15
+150 val_150 val_150 val_150 val_150 val_150 val_150 val_150 val_150
+152 val_152 val_152 val_152 val_152 val_152 val_152 val_152 val_152
+152 val_152 val_152 val_152 val_152 val_152 val_152 val_152 val_152
+153 val_153 val_153 val_153 val_153 val_153 val_153 val_153 val_153
+155 val_155 val_155 val_155 val_155 val_155 val_155 val_155 val_155
+156 val_156 val_156 val_156 val_156 val_156 val_156 val_156 val_156
+157 val_157 val_157 val_157 val_157 val_157 val_157 val_157 val_157
+158 val_158 val_158 val_158 val_158 val_158 val_158 val_158 val_158
+160 val_160 val_160 val_160 val_160 val_160 val_160 val_160 val_160
+162 val_162 val_162 val_162 val_162 val_162 val_162 val_162 val_162
+163 val_163 val_163 val_163 val_163 val_163 val_163 val_163 val_163
+164 val_164 val_164 val_164 val_164 val_164 val_164 val_164 val_164
+164 val_164 val_164 val_164 val_164 val_164 val_164 val_164 val_164
+165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165
+165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165
+166 val_166 val_166 val_166 val_166 val_166 val_166 val_166 val_166
+167 val_167 val_167 val_167 val_167 val_167 val_167 val_167 val_167
+167 val_167 val_167 val_167 val_167 val_167 val_167 val_167 val_167
+167 val_167 val_167 val_167 val_167 val_167 val_167 val_167 val_167
+168 val_168 val_168 val_168 val_168 val_168 val_168 val_168 val_168
+169 val_169 val_169 val_169 val_169 val_169 val_169 val_169 val_169
+169 val_169 val_169 val_169 val_169 val_169 val_169 val_169 val_169
+169 val_169 val_169 val_169 val_169 val_169 val_169 val_169 val_169
+169 val_169 val_169 val_169 val_169 val_169 val_169 val_169 val_169
+17 val_17 val_17 val_17 val_17 val_17 val_17 val_17 val_17
+170 val_170 val_170 val_170 val_170 val_170 val_170 val_170 val_170
+172 val_172 val_172 val_172 val_172 val_172 val_172 val_172 val_172
+172 val_172 val_172 val_172 val_172 val_172 val_172 val_172 val_172
+174 val_174 val_174 val_174 val_174 val_174 val_174 val_174 val_174
+174 val_174 val_174 val_174 val_174 val_174 val_174 val_174 val_174
+175 val_175 val_175 val_175 val_175 val_175 val_175 val_175 val_175
+175 val_175 val_175 val_175 val_175 val_175 val_175 val_175 val_175
+176 val_176 val_176 val_176 val_176 val_176 val_176 val_176 val_176
+176 val_176 val_176 val_176 val_176 val_176 val_176 val_176 val_176
+177 val_177 val_177 val_177 val_177 val_177 val_177 val_177 val_177
+178 val_178 val_178 val_178 val_178 val_178 val_178 val_178 val_178
+179 val_179 val_179 val_179 val_179 val_179 val_179 val_179 val_179
+179 val_179 val_179 val_179 val_179 val_179 val_179 val_179 val_179
+18 val_18 val_18 val_18 val_18 val_18 val_18 val_18 val_18
+18 val_18 val_18 val_18 val_18 val_18 val_18 val_18 val_18
+180 val_180 val_180 val_180 val_180 val_180 val_180 val_180 val_180
+181 val_181 val_181 val_181 val_181 val_181 val_181 val_181 val_181
+183 val_183 val_183 val_183 val_183 val_183 val_183 val_183 val_183
+186 val_186 val_186 val_186 val_186 val_186 val_186 val_186 val_186
+187 val_187 val_187 val_187 val_187 val_187 val_187 val_187 val_187
+187 val_187 val_187 val_187 val_187 val_187 val_187 val_187 val_187
+187 val_187 val_187 val_187 val_187 val_187 val_187 val_187 val_187
+PREHOOK: query: -- flush for group-by
+explain
+select sum(key) as sum from src group by concat(key,value,value,value,value,value,value,value,value,value) order by sum limit 30,70
+PREHOOK: type: QUERY
+POSTHOOK: query: -- flush for group-by
+explain
+select sum(key) as sum from src group by concat(key,value,value,value,value,value,value,value,value,value) order by sum limit 30,70
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: concat(key, value, value, value, value, value, value, value, value, value) (type: string)
+ sort order: +
+ Map-reduce partition columns: concat(key, value, value, value, value, value, value, value, value, value) (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: key (type: string)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: complete
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: double)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: double)
+ sort order: +
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 2.0E-5
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: double)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 70
+ Offset of rows: 30
+ Statistics: Num rows: 70 Data size: 700 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 70 Data size: 700 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 70
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select sum(key) as sum from src group by concat(key,value,value,value,value,value,value,value,value,value) order by sum limit 30,70
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(key) as sum from src group by concat(key,value,value,value,value,value,value,value,value,value) order by sum limit 30,70
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+66.0
+69.0
+74.0
+74.0
+77.0
+78.0
+80.0
+82.0
+84.0
+85.0
+86.0
+87.0
+92.0
+96.0
+102.0
+105.0
+105.0
+111.0
+114.0
+116.0
+116.0
+126.0
+131.0
+133.0
+134.0
+136.0
+143.0
+144.0
+145.0
+150.0
+152.0
+153.0
+155.0
+156.0
+157.0
+158.0
+160.0
+162.0
+163.0
+166.0
+166.0
+168.0
+168.0
+170.0
+177.0
+178.0
+180.0
+181.0
+183.0
+186.0
+189.0
+190.0
+190.0
+192.0
+194.0
+194.0
+196.0
+196.0
+200.0
+201.0
+202.0
+206.0
+208.0
+210.0
+214.0
+218.0
+222.0
+226.0
+226.0
+228.0
+PREHOOK: query: -- subqueries
+explain
+select * from
+(select key, count(1) from src group by key order by key limit 10,20) subq
+join
+(select key, count(1) from src group by key limit 20,20) subq2
+on subq.key=subq2.key limit 3,5
+PREHOOK: type: QUERY
+POSTHOOK: query: -- subqueries
+explain
+select * from
+(select key, count(1) from src group by key order by key limit 10,20) subq
+join
+(select key, count(1) from src group by key limit 20,20) subq2
+on subq.key=subq2.key limit 3,5
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-4
+ Stage-3 is a root stage
+ Stage-4 depends on stages: Stage-3
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 2.0E-5
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(1)
+ keys: KEY._col0 (type: string)
+ mode: complete
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
+ Offset of rows: 10
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col0 is not null (type: boolean)
+ Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 11 Data size: 110 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 5
+ Offset of rows: 3
+ Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 2.0E-5
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(1)
+ keys: KEY._col0 (type: string)
+ mode: complete
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
+ Offset of rows: 20
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 2.0E-5
+ value expressions: _col0 (type: string), _col1 (type: bigint)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
+ Offset of rows: 20
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col0 is not null (type: boolean)
+ Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 5
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from
+(select key, count(1) from src group by key order by key limit 10,20) subq
+join
+(select key, count(1) from src group by key order by key limit 20,20) subq2
+on subq.key=subq2.key limit 3,5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select * from
+(select key, count(1) from src group by key order by key limit 10,20) subq
+join
+(select key, count(1) from src group by key order by key limit 20,20) subq2
+on subq.key=subq2.key limit 3,5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+137 2 137 2
+138 4 138 4
+143 1 143 1
+145 1 145 1
+146 2 146 2
http://git-wip-us.apache.org/repos/asf/hive/blob/e7abf72c/ql/src/test/results/clientpositive/vectorization_offset_limit.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vectorization_offset_limit.q.out b/ql/src/test/results/clientpositive/vectorization_offset_limit.q.out
new file mode 100644
index 0000000..d5aeabd
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vectorization_offset_limit.q.out
@@ -0,0 +1,118 @@
+WARNING: Comparing a bigint and a double may result in a loss of precision.
+PREHOOK: query: explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((UDFToDouble(cbigint) < cdouble) and (cint > 0)) (type: boolean)
+ Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cbigint (type: bigint), cdouble (type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 2
+ Offset of rows: 3
+ Statistics: Num rows: 2 Data size: 430 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 430 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 2
+ Processor Tree:
+ ListSink
+
+WARNING: Comparing a bigint and a double may result in a loss of precision.
+PREHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-1887561756 10361.0
+-1887561756 -8881.0
+PREHOOK: query: explain
+select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 10,3
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 10,3
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ctinyint is not null (type: boolean)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint), cdouble (type: double), csmallint (type: smallint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint), _col1 (type: double)
+ sort order: ++
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col2 (type: smallint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: smallint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 3
+ Offset of rows: 10
+ Statistics: Num rows: 3 Data size: 645 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 645 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 3
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 10,3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 10,3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-64 -7196.0 -7196
+-64 -6907.0 -6907
+-64 -4803.0 -4803
[3/3] hive git commit: HIVE-11531: Add mysql-style LIMIT support to
Hive, or improve ROW_NUMBER performance-wise (Hui Zheng,
reviewed by Sergey Shelukhin, Jesus Camacho Rodriguez)
Posted by jc...@apache.org.
HIVE-11531: Add mysql-style LIMIT support to Hive, or improve ROW_NUMBER performance-wise (Hui Zheng, reviewed by Sergey Shelukhin, Jesus Camacho Rodriguez)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e7abf72c
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e7abf72c
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e7abf72c
Branch: refs/heads/master
Commit: e7abf72c7009e312296985ebc24198e23ab632bd
Parents: 57f39a9
Author: Hui Zheng <hu...@yahoo-corp.jp>
Authored: Thu Dec 10 10:16:16 2015 +0100
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Thu Dec 10 10:16:16 2015 +0100
----------------------------------------------------------------------
.../hadoop/hive/ql/exec/LimitOperator.java | 6 +-
.../ql/exec/vector/VectorLimitOperator.java | 26 +-
.../hive/ql/optimizer/GenMapRedUtils.java | 3 +-
.../hive/ql/optimizer/GlobalLimitOptimizer.java | 22 +-
.../ql/optimizer/LimitPushdownOptimizer.java | 5 +-
.../calcite/reloperators/HiveSortLimit.java | 4 +
.../calcite/translator/ASTBuilder.java | 5 +-
.../calcite/translator/ASTConverter.java | 11 +-
.../hadoop/hive/ql/parse/CalcitePlanner.java | 15 +-
.../hadoop/hive/ql/parse/GlobalLimitCtx.java | 10 +-
.../org/apache/hadoop/hive/ql/parse/HiveLexer.g | 1 +
.../apache/hadoop/hive/ql/parse/HiveParser.g | 5 +-
.../hadoop/hive/ql/parse/IdentifiersParser.g | 2 +-
.../hadoop/hive/ql/parse/MapReduceCompiler.java | 3 +-
.../hadoop/hive/ql/parse/QBParseInfo.java | 20 +-
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 27 +-
.../apache/hadoop/hive/ql/plan/LimitDesc.java | 19 +
.../test/queries/clientpositive/offset_limit.q | 25 +
.../offset_limit_global_optimizer.q | 45 +
.../clientpositive/offset_limit_ppd_optimizer.q | 80 +
.../clientpositive/vectorization_offset_limit.q | 10 +
.../results/clientpositive/offset_limit.q.out | 257 ++
.../offset_limit_global_optimizer.q.out | 3390 ++++++++++++++++++
.../offset_limit_ppd_optimizer.q.out | 1377 +++++++
.../vectorization_offset_limit.q.out | 118 +
25 files changed, 5442 insertions(+), 44 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/e7abf72c/ql/src/java/org/apache/hadoop/hive/ql/exec/LimitOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/LimitOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/LimitOperator.java
index 8fe96be..fc85bea 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/LimitOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/LimitOperator.java
@@ -34,6 +34,7 @@ public class LimitOperator extends Operator<LimitDesc> implements Serializable {
private static final long serialVersionUID = 1L;
protected transient int limit;
+ protected transient int offset;
protected transient int leastRow;
protected transient int currCount;
protected transient boolean isMap;
@@ -43,15 +44,18 @@ public class LimitOperator extends Operator<LimitDesc> implements Serializable {
super.initializeOp(hconf);
limit = conf.getLimit();
leastRow = conf.getLeastRows();
+ offset = (conf.getOffset() == null) ? 0 : conf.getOffset();
currCount = 0;
isMap = hconf.getBoolean("mapred.task.is.map", true);
}
@Override
public void process(Object row, int tag) throws HiveException {
- if (currCount < limit) {
+ if (offset <= currCount && currCount < (offset + limit)) {
forward(row, inputObjInspectors[tag]);
currCount++;
+ } else if (offset > currCount) {
+ currCount++;
} else {
setDone(true);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/e7abf72c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java
index 2f4e46b..4cb91d4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java
@@ -42,12 +42,30 @@ public class VectorLimitOperator extends LimitOperator {
public void process(Object row, int tag) throws HiveException {
VectorizedRowBatch batch = (VectorizedRowBatch) row;
- if (currCount < limit) {
- batch.size = Math.min(batch.size, limit - currCount);
- forward(row, inputObjInspectors[tag]);
+ if (currCount + batch.size < offset) {
currCount += batch.size;
- } else {
+ } else if (currCount >= offset + limit) {
setDone(true);
+ } else {
+ int skipSize = 0;
+ if (currCount < offset) {
+ skipSize = offset - currCount;
+ }
+ //skip skipSize rows of batch
+ batch.size = Math.min(batch.size, offset + limit - currCount);
+ if (batch.selectedInUse == false) {
+ batch.selectedInUse = true;
+ batch.selected = new int[batch.size];
+ for (int i = 0; i < batch.size - skipSize; i++) {
+ batch.selected[i] = skipSize + i;
+ }
+ } else {
+ for (int i = 0; i < batch.size - skipSize; i++) {
+ batch.selected[i] = batch.selected[skipSize + i];
+ }
+ }
+ forward(row, inputObjInspectors[tag]);
+ currCount += batch.size;
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/e7abf72c/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
index a1c9651..9cbd496 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
@@ -543,7 +543,8 @@ public final class GenMapRedUtils {
} else {
long sizePerRow = HiveConf.getLongVar(parseCtx.getConf(),
HiveConf.ConfVars.HIVELIMITMAXROWSIZE);
- sizeNeeded = parseCtx.getGlobalLimitCtx().getGlobalLimit() * sizePerRow;
+ sizeNeeded = (parseCtx.getGlobalLimitCtx().getGlobalOffset()
+ + parseCtx.getGlobalLimitCtx().getGlobalLimit()) * sizePerRow;
// for the optimization that reduce number of input file, we limit number
// of files allowed. If more than specific number of files have to be
// selected, we skip this optimization. Since having too many files as
http://git-wip-us.apache.org/repos/asf/hive/blob/e7abf72c/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GlobalLimitOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GlobalLimitOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GlobalLimitOptimizer.java
index 30976af..5f03f5e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GlobalLimitOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GlobalLimitOptimizer.java
@@ -27,6 +27,7 @@ import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.exec.FilterOperator;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
+import org.apache.hadoop.hive.ql.plan.LimitDesc;
import org.apache.hadoop.hive.ql.exec.LimitOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorUtils;
@@ -91,16 +92,19 @@ public class GlobalLimitOptimizer implements Transform {
// SELECT * FROM (SELECT col1 as col2 (SELECT * FROM ...) t1 LIMIT ...) t2);
//
TableScanOperator ts = topOps.values().iterator().next();
- Integer tempGlobalLimit = checkQbpForGlobalLimit(ts);
+ LimitOperator tempGlobalLimit = checkQbpForGlobalLimit(ts);
// query qualify for the optimization
- if (tempGlobalLimit != null && tempGlobalLimit != 0) {
+ if (tempGlobalLimit != null) {
+ LimitDesc tempGlobalLimitDesc = tempGlobalLimit.getConf();
Table tab = ts.getConf().getTableMetadata();
Set<FilterOperator> filterOps = OperatorUtils.findOperators(ts, FilterOperator.class);
if (!tab.isPartitioned()) {
if (filterOps.size() == 0) {
- globalLimitCtx.enableOpt(tempGlobalLimit);
+ Integer tempOffset = tempGlobalLimitDesc.getOffset();
+ globalLimitCtx.enableOpt(tempGlobalLimitDesc.getLimit(),
+ (tempOffset == null) ? 0 : tempOffset);
}
} else {
// check if the pruner only contains partition columns
@@ -112,11 +116,15 @@ public class GlobalLimitOptimizer implements Transform {
// If there is any unknown partition, create a map-reduce job for
// the filter to prune correctly
if (!partsList.hasUnknownPartitions()) {
- globalLimitCtx.enableOpt(tempGlobalLimit);
+ Integer tempOffset = tempGlobalLimitDesc.getOffset();
+ globalLimitCtx.enableOpt(tempGlobalLimitDesc.getLimit(),
+ (tempOffset == null) ? 0 : tempOffset);
}
}
}
if (globalLimitCtx.isEnable()) {
+ LOG.info("Qualify the optimize that reduces input size for 'offset' for offset "
+ + globalLimitCtx.getGlobalOffset());
LOG.info("Qualify the optimize that reduces input size for 'limit' for limit "
+ globalLimitCtx.getGlobalLimit());
}
@@ -141,7 +149,7 @@ public class GlobalLimitOptimizer implements Transform {
* if there is no limit, return 0
* otherwise, return null
*/
- private static Integer checkQbpForGlobalLimit(TableScanOperator ts) {
+ private static LimitOperator checkQbpForGlobalLimit(TableScanOperator ts) {
Set<Class<? extends Operator<?>>> searchedClasses =
new ImmutableSet.Builder<Class<? extends Operator<?>>>()
.add(ReduceSinkOperator.class)
@@ -183,10 +191,10 @@ public class GlobalLimitOptimizer implements Transform {
// Otherwise, return null
Collection<Operator<?>> limitOps = ops.get(LimitOperator.class);
if (limitOps.size() == 1) {
- return ((LimitOperator) limitOps.iterator().next()).getConf().getLimit();
+ return (LimitOperator) limitOps.iterator().next();
}
else if (limitOps.size() == 0) {
- return 0;
+ return null;
}
return null;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/e7abf72c/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java
index e850550..8cae0d6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java
@@ -41,6 +41,7 @@ import org.apache.hadoop.hive.ql.lib.Rule;
import org.apache.hadoop.hive.ql.lib.RuleRegExp;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.LimitDesc;
/**
* Make RS calculate top-K selection for limit clause.
@@ -130,7 +131,9 @@ public class LimitPushdownOptimizer implements Transform {
return false;
}
LimitOperator limit = (LimitOperator) nd;
- rs.getConf().setTopN(limit.getConf().getLimit());
+ LimitDesc limitDesc = limit.getConf();
+ Integer offset = limitDesc.getOffset();
+ rs.getConf().setTopN(limitDesc.getLimit() + ((offset == null) ? 0 : offset));
rs.getConf().setTopNMemoryUsage(((LimitPushdownContext) procCtx).threshold);
if (rs.getNumChild() == 1 && rs.getChildren().get(0) instanceof GroupByOperator) {
rs.getConf().setMapGroupBy(true);
http://git-wip-us.apache.org/repos/asf/hive/blob/e7abf72c/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java
index 3077177..6a47443 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java
@@ -86,6 +86,10 @@ public class HiveSortLimit extends Sort implements HiveRelNode {
return fetch;
}
+ public RexNode getOffsetExpr() {
+ return offset;
+ }
+
public void setInputRefToCallMap(ImmutableMap<Integer, RexNode> refToCall) {
this.mapOfInputRefToRexCall = refToCall;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/e7abf72c/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java
index 1f5d919..425514d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java
@@ -128,9 +128,10 @@ class ASTBuilder {
return ASTBuilder.construct(HiveParser.TOK_HAVING, "TOK_HAVING").add(cond).node();
}
- static ASTNode limit(Object value) {
+ static ASTNode limit(Object offset, Object limit) {
return ASTBuilder.construct(HiveParser.TOK_LIMIT, "TOK_LIMIT")
- .add(HiveParser.Number, value.toString()).node();
+ .add(HiveParser.Number, offset.toString())
+ .add(HiveParser.Number, limit.toString()).node();
}
static ASTNode selectExpr(ASTNode expr, String alias) {
http://git-wip-us.apache.org/repos/asf/hive/blob/e7abf72c/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
index d026e58..3f2267d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
@@ -251,10 +251,13 @@ public class ASTConverter {
hiveAST.order = orderAst;
}
- RexNode limitExpr = hiveSortLimit.getFetchExpr();
- if (limitExpr != null) {
- Object val = ((RexLiteral) limitExpr).getValue2();
- hiveAST.limit = ASTBuilder.limit(val);
+ RexNode offsetExpr = hiveSortLimit.getOffsetExpr();
+ RexNode fetchExpr = hiveSortLimit.getFetchExpr();
+ if (fetchExpr != null) {
+ Object offset = (offsetExpr == null) ?
+ new Integer(0) : ((RexLiteral) offsetExpr).getValue2();
+ Object fetch = ((RexLiteral) fetchExpr).getValue2();
+ hiveAST.limit = ASTBuilder.limit(offset, fetch);
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/e7abf72c/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index c005b1a..51c0eaf 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -26,6 +26,7 @@ import java.util.BitSet;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashMap;
+import java.util.AbstractMap.SimpleEntry;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
@@ -2361,13 +2362,17 @@ public class CalcitePlanner extends SemanticAnalyzer {
private RelNode genLimitLogicalPlan(QB qb, RelNode srcRel) throws SemanticException {
HiveRelNode sortRel = null;
QBParseInfo qbp = getQBParseInfo(qb);
- Integer limit = qbp.getDestToLimit().get(qbp.getClauseNames().iterator().next());
-
- if (limit != null) {
- RexNode fetch = cluster.getRexBuilder().makeExactLiteral(BigDecimal.valueOf(limit));
+ SimpleEntry<Integer,Integer> entry =
+ qbp.getDestToLimit().get(qbp.getClauseNames().iterator().next());
+ Integer offset = (entry == null) ? 0 : entry.getKey();
+ Integer fetch = (entry == null) ? null : entry.getValue();
+
+ if (fetch != null) {
+ RexNode offsetRN = cluster.getRexBuilder().makeExactLiteral(BigDecimal.valueOf(offset));
+ RexNode fetchRN = cluster.getRexBuilder().makeExactLiteral(BigDecimal.valueOf(fetch));
RelTraitSet traitSet = cluster.traitSetOf(HiveRelNode.CONVENTION);
RelCollation canonizedCollation = traitSet.canonize(RelCollations.EMPTY);
- sortRel = new HiveSortLimit(cluster, traitSet, srcRel, canonizedCollation, null, fetch);
+ sortRel = new HiveSortLimit(cluster, traitSet, srcRel, canonizedCollation, offsetRN, fetchRN);
RowResolver outputRR = new RowResolver();
if (!RowResolver.add(outputRR, relToHiveRR.get(srcRel))) {
http://git-wip-us.apache.org/repos/asf/hive/blob/e7abf72c/ql/src/java/org/apache/hadoop/hive/ql/parse/GlobalLimitCtx.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GlobalLimitCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/GlobalLimitCtx.java
index 6cd636c..c37f9ce 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GlobalLimitCtx.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GlobalLimitCtx.java
@@ -27,6 +27,7 @@ public class GlobalLimitCtx {
private boolean enable;
private int globalLimit;
+ private int globalOffset;
private boolean hasTransformOrUDTF;
private LimitDesc lastReduceLimitDesc;
@@ -38,6 +39,10 @@ public class GlobalLimitCtx {
return globalLimit;
}
+ public int getGlobalOffset() {
+ return globalOffset;
+ }
+
public boolean ifHasTransformOrUDTF() {
return hasTransformOrUDTF;
}
@@ -58,20 +63,23 @@ public class GlobalLimitCtx {
return enable;
}
- public void enableOpt(int globalLimit) {
+ public void enableOpt(int globalLimit, int globalOffset) {
this.enable = true;
this.globalLimit = globalLimit;
+ this.globalOffset = globalOffset;
}
public void disableOpt() {
this.enable = false;
this.globalLimit = -1;
+ this.globalOffset = 0;
this.lastReduceLimitDesc = null;
}
public void reset() {
enable = false;
globalLimit = -1;
+ globalOffset = 0;
hasTransformOrUDTF = false;
lastReduceLimitDesc = null;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/e7abf72c/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g
index 395152f..1c72b1c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g
@@ -185,6 +185,7 @@ KW_DEFERRED: 'DEFERRED';
KW_SERDEPROPERTIES: 'SERDEPROPERTIES';
KW_DBPROPERTIES: 'DBPROPERTIES';
KW_LIMIT: 'LIMIT';
+KW_OFFSET: 'OFFSET';
KW_SET: 'SET';
KW_UNSET: 'UNSET';
KW_TBLPROPERTIES: 'TBLPROPERTIES';
http://git-wip-us.apache.org/repos/asf/hive/blob/e7abf72c/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
index f6ea2a3..d5051ce 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
@@ -249,6 +249,7 @@ TOK_INDEXPROPERTIES;
TOK_INDEXPROPLIST;
TOK_TABTYPE;
TOK_LIMIT;
+TOK_OFFSET;
TOK_TABLEPROPERTY;
TOK_IFEXISTS;
TOK_IFNOTEXISTS;
@@ -497,6 +498,7 @@ import org.apache.hadoop.hive.conf.HiveConf;
xlateMap.put("KW_WITH", "WITH");
xlateMap.put("KW_SERDEPROPERTIES", "SERDEPROPERTIES");
xlateMap.put("KW_LIMIT", "LIMIT");
+ xlateMap.put("KW_OFFSET", "OFFSET");
xlateMap.put("KW_SET", "SET");
xlateMap.put("KW_PROPERTIES", "TBLPROPERTIES");
xlateMap.put("KW_VALUE_TYPE", "\$VALUE\$");
@@ -2362,7 +2364,8 @@ limitClause
@init { pushMsg("limit clause", state); }
@after { popMsg(state); }
:
- KW_LIMIT num=Number -> ^(TOK_LIMIT $num)
+ KW_LIMIT ((offset=Number COMMA)? num=Number) -> ^(TOK_LIMIT ($offset)? $num)
+ | KW_LIMIT num=Number KW_OFFSET offset=Number -> ^(TOK_LIMIT ($offset)? $num)
;
//DELETE FROM <tableName> WHERE ...;
http://git-wip-us.apache.org/repos/asf/hive/blob/e7abf72c/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
index bac0d22..15ca754 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
@@ -653,7 +653,7 @@ nonReserved
| KW_ENABLE | KW_ESCAPED | KW_EXCLUSIVE | KW_EXPLAIN | KW_EXPORT | KW_FIELDS | KW_FILE | KW_FILEFORMAT
| KW_FIRST | KW_FORMAT | KW_FORMATTED | KW_FUNCTIONS | KW_HOLD_DDLTIME | KW_HOUR | KW_IDXPROPERTIES | KW_IGNORE
| KW_INDEX | KW_INDEXES | KW_INPATH | KW_INPUTDRIVER | KW_INPUTFORMAT | KW_ITEMS | KW_JAR
- | KW_KEYS | KW_KEY_TYPE | KW_LIMIT | KW_LINES | KW_LOAD | KW_LOCATION | KW_LOCK | KW_LOCKS | KW_LOGICAL | KW_LONG
+ | KW_KEYS | KW_KEY_TYPE | KW_LIMIT | KW_OFFSET | KW_LINES | KW_LOAD | KW_LOCATION | KW_LOCK | KW_LOCKS | KW_LOGICAL | KW_LONG
| KW_MAPJOIN | KW_MATERIALIZED | KW_METADATA | KW_MINUS | KW_MINUTE | KW_MONTH | KW_MSCK | KW_NOSCAN | KW_NO_DROP | KW_OFFLINE
| KW_OPTION | KW_OUTPUTDRIVER | KW_OUTPUTFORMAT | KW_OVERWRITE | KW_OWNER | KW_PARTITIONED | KW_PARTITIONS | KW_PLUS | KW_PRETTY
| KW_PRINCIPALS | KW_PROTECTION | KW_PURGE | KW_READ | KW_READONLY | KW_REBUILD | KW_RECORDREADER | KW_RECORDWRITER
http://git-wip-us.apache.org/repos/asf/hive/blob/e7abf72c/ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java
index d41253f..5b08ed2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java
@@ -215,7 +215,8 @@ public class MapReduceCompiler extends TaskCompiler {
//
long sizePerRow = HiveConf.getLongVar(conf,
HiveConf.ConfVars.HIVELIMITMAXROWSIZE);
- estimatedInput = globalLimitCtx.getGlobalLimit() * sizePerRow;
+ estimatedInput = (globalLimitCtx.getGlobalOffset() +
+ globalLimitCtx.getGlobalLimit()) * sizePerRow;
long minSplitSize = HiveConf.getLongVar(conf,
HiveConf.ConfVars.MAPREDMINSPLITSIZE);
long estimatedNumMap = inputSummary.getLength() / minSplitSize + 1;
http://git-wip-us.apache.org/repos/asf/hive/blob/e7abf72c/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java
index 186c2a8..3a226e7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java
@@ -26,6 +26,7 @@ import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.AbstractMap.SimpleEntry;
import org.antlr.runtime.tree.Tree;
import org.slf4j.Logger;
@@ -99,7 +100,10 @@ public class QBParseInfo {
/* Order by clause */
private final HashMap<String, ASTNode> destToOrderby;
- private final HashMap<String, Integer> destToLimit;
+ // Use SimpleEntry to save the offset and rowcount of limit clause
+ // KEY of SimpleEntry: offset
+ // VALUE of SimpleEntry: rowcount
+ private final HashMap<String, SimpleEntry<Integer, Integer>> destToLimit;
private int outerQueryLimit;
// used by GroupBy
@@ -128,7 +132,7 @@ public class QBParseInfo {
destToDistributeby = new HashMap<String, ASTNode>();
destToSortby = new HashMap<String, ASTNode>();
destToOrderby = new HashMap<String, ASTNode>();
- destToLimit = new HashMap<String, Integer>();
+ destToLimit = new HashMap<String, SimpleEntry<Integer, Integer>>();
insertIntoTables = new HashSet<String>();
destRollups = new HashSet<String>();
destCubes = new HashSet<String>();
@@ -440,12 +444,16 @@ public class QBParseInfo {
exprToColumnAlias.put(expr, alias);
}
- public void setDestLimit(String dest, Integer limit) {
- destToLimit.put(dest, limit);
+ public void setDestLimit(String dest, Integer offset, Integer limit) {
+ destToLimit.put(dest, new SimpleEntry<>(offset, limit));
}
public Integer getDestLimit(String dest) {
- return destToLimit.get(dest);
+ return destToLimit.get(dest) == null ? null : destToLimit.get(dest).getValue();
+ }
+
+ public Integer getDestLimitOffset(String dest) {
+ return destToLimit.get(dest) == null ? 0 : destToLimit.get(dest).getKey();
}
/**
@@ -566,7 +574,7 @@ public class QBParseInfo {
return tableSpecs.get(tName.next());
}
- public HashMap<String, Integer> getDestToLimit() {
+ public HashMap<String, SimpleEntry<Integer,Integer>> getDestToLimit() {
return destToLimit;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/e7abf72c/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 6ec985d..945a6d7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -1325,7 +1325,14 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
break;
case HiveParser.TOK_LIMIT:
- qbp.setDestLimit(ctx_1.dest, new Integer(ast.getChild(0).getText()));
+ if (ast.getChildCount() == 2) {
+ qbp.setDestLimit(ctx_1.dest,
+ new Integer(ast.getChild(0).getText()),
+ new Integer(ast.getChild(1).getText()));
+ } else {
+ qbp.setDestLimit(ctx_1.dest, new Integer(0),
+ new Integer(ast.getChild(0).getText()));
+ }
break;
case HiveParser.TOK_ANALYZE:
@@ -6798,7 +6805,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
}
@SuppressWarnings("nls")
- private Operator genLimitPlan(String dest, QB qb, Operator input, int limit)
+ private Operator genLimitPlan(String dest, QB qb, Operator input, int offset, int limit)
throws SemanticException {
// A map-only job can be optimized - instead of converting it to a
// map-reduce job, we can have another map
@@ -6809,7 +6816,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
RowResolver inputRR = opParseCtx.get(input).getRowResolver();
- LimitDesc limitDesc = new LimitDesc(limit);
+ LimitDesc limitDesc = new LimitDesc(offset, limit);
globalLimitCtx.setLastReduceLimitDesc(limitDesc);
Operator limitMap = putOpInsertMap(OperatorFactory.getAndMakeChild(
@@ -6919,14 +6926,14 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
@SuppressWarnings("nls")
private Operator genLimitMapRedPlan(String dest, QB qb, Operator input,
- int limit, boolean extraMRStep) throws SemanticException {
+ int offset, int limit, boolean extraMRStep) throws SemanticException {
// A map-only job can be optimized - instead of converting it to a
// map-reduce job, we can have another map
// job to do the same to avoid the cost of sorting in the map-reduce phase.
// A better approach would be to
// write into a local file and then have a map-only job.
// Add the limit operator to get the value fields
- Operator curr = genLimitPlan(dest, qb, input, limit);
+ Operator curr = genLimitPlan(dest, qb, input, offset, limit);
// the client requested that an extra map-reduce step be performed
if (!extraMRStep) {
@@ -6935,7 +6942,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
// Create a reduceSink operator followed by another limit
curr = genReduceSinkPlan(dest, qb, curr, 1, false);
- return genLimitPlan(dest, qb, curr, limit);
+ return genLimitPlan(dest, qb, curr, offset, limit);
}
private ArrayList<ExprNodeDesc> getPartitionColsFromBucketCols(String dest, QB qb, Table tab,
@@ -8868,6 +8875,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
curr = genSelectPlan(dest, qb, curr, gbySource);
Integer limit = qbp.getDestLimit(dest);
+ Integer offset = (qbp.getDestLimitOffset(dest) == null) ? 0 : qbp.getDestLimitOffset(dest);
// Expressions are not supported currently without a alias.
@@ -8912,7 +8920,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
if (limit != null) {
// In case of order by, only 1 reducer is used, so no need of
// another shuffle
- curr = genLimitMapRedPlan(dest, qb, curr, limit.intValue(), !hasOrderBy);
+ curr = genLimitMapRedPlan(dest, qb, curr, offset.intValue(),
+ limit.intValue(), !hasOrderBy);
}
} else {
// exact limit can be taken care of by the fetch operator
@@ -8925,8 +8934,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
extraMRStep = false;
}
- curr = genLimitMapRedPlan(dest, qb, curr, limit.intValue(),
- extraMRStep);
+ curr = genLimitMapRedPlan(dest, qb, curr, offset.intValue(),
+ limit.intValue(), extraMRStep);
qb.getParseInfo().setOuterQueryLimit(limit.intValue());
}
if (!SessionState.get().getHiveOperation().equals(HiveOperation.CREATEVIEW)) {
http://git-wip-us.apache.org/repos/asf/hive/blob/e7abf72c/ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java
index f88bf63..8448a41 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java
@@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.plan.Explain.Level;
@Explain(displayName = "Limit", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED })
public class LimitDesc extends AbstractOperatorDesc {
private static final long serialVersionUID = 1L;
+ private int offset = 0;
private int limit;
private int leastRows = -1;
@@ -37,6 +38,24 @@ public class LimitDesc extends AbstractOperatorDesc {
this.limit = limit;
}
+ public LimitDesc(final int offset, final int limit) {
+ this.offset = offset;
+ this.limit = limit;
+ }
+
+ /**
+ * not to print the offset if it is 0 we need to turn null.
+ * use Integer instead of int.
+ */
+ @Explain(displayName = "Offset of rows", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED })
+ public Integer getOffset() {
+ return (offset == 0) ? null : new Integer(offset);
+ }
+
+ public void setOffset(Integer offset) {
+ this.offset = offset;
+ }
+
@Explain(displayName = "Number of rows", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED })
public int getLimit() {
return limit;
http://git-wip-us.apache.org/repos/asf/hive/blob/e7abf72c/ql/src/test/queries/clientpositive/offset_limit.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/offset_limit.q b/ql/src/test/queries/clientpositive/offset_limit.q
new file mode 100644
index 0000000..80d559a
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/offset_limit.q
@@ -0,0 +1,25 @@
+EXPLAIN
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 10,10;
+
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 10,10;
+
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 0,10;
+
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 1,10;
+
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 300,100;
+
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 100 OFFSET 300;
+
+set hive.cbo.enable=false;
+
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 10,10;
+
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 0,10;
+
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 1,10;
+
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 300,100;
+
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 100 OFFSET 300;
+
http://git-wip-us.apache.org/repos/asf/hive/blob/e7abf72c/ql/src/test/queries/clientpositive/offset_limit_global_optimizer.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/offset_limit_global_optimizer.q b/ql/src/test/queries/clientpositive/offset_limit_global_optimizer.q
new file mode 100644
index 0000000..5ddb9a6
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/offset_limit_global_optimizer.q
@@ -0,0 +1,45 @@
+set hive.limit.optimize.enable=true;
+set hive.limit.row.max.size=12;
+set hive.mapred.mode=nonstrict;
+
+EXPLAIN EXTENDED
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 400,10;
+
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 400,10;
+
+EXPLAIN EXTENDED
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,10;
+
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,10;
+
+EXPLAIN EXTENDED
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,20;
+
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,20;
+
+EXPLAIN EXTENDED
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,600;
+
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,600;
+
+set hive.cbo.enable=false;
+
+EXPLAIN EXTENDED
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 400,10;
+
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 400,10;
+
+EXPLAIN EXTENDED
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,10;
+
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,10;
+
+EXPLAIN EXTENDED
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,20;
+
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,20;
+
+EXPLAIN EXTENDED
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,600;
+
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,600;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/e7abf72c/ql/src/test/queries/clientpositive/offset_limit_ppd_optimizer.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/offset_limit_ppd_optimizer.q b/ql/src/test/queries/clientpositive/offset_limit_ppd_optimizer.q
new file mode 100644
index 0000000..2895203
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/offset_limit_ppd_optimizer.q
@@ -0,0 +1,80 @@
+set hive.explain.user=false;
+set hive.limit.pushdown.memory.usage=0.3f;
+set hive.optimize.reducededuplication.min.reducer=1;
+
+explain
+select key,value from src order by key limit 10,20;
+select key,value from src order by key limit 10,20;
+
+explain
+select key,value from src order by key desc limit 10,20;
+select key,value from src order by key desc limit 10,20;
+
+explain
+select value, sum(key + 1) as sum from src group by value order by value limit 10,20;
+select value, sum(key + 1) as sum from src group by value order by value limit 10,20;
+
+-- deduped RS
+explain
+select value,avg(key + 1) from src group by value order by value limit 10,20;
+select value,avg(key + 1) from src group by value order by value limit 10,20;
+
+-- distincts
+explain
+select distinct(cdouble) as dis from alltypesorc order by dis limit 10,20;
+select distinct(cdouble) as dis from alltypesorc order by dis limit 10,20;
+
+explain
+select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 10,20;
+select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 10,20;
+
+explain
+select ctinyint, count(cdouble) from (select ctinyint, cdouble from alltypesorc group by ctinyint, cdouble) t1 group by ctinyint order by ctinyint limit 10,20;
+select ctinyint, count(cdouble) from (select ctinyint, cdouble from alltypesorc group by ctinyint, cdouble) t1 group by ctinyint order by ctinyint limit 10,20;
+
+-- multi distinct
+explain
+select ctinyint, count(distinct(cstring1)), count(distinct(cstring2)) from alltypesorc group by ctinyint order by ctinyint limit 10,20;
+select ctinyint, count(distinct(cstring1)), count(distinct(cstring2)) from alltypesorc group by ctinyint order by ctinyint limit 10,20;
+
+-- limit zero
+explain
+select key,value from src order by key limit 0,0;
+select key,value from src order by key limit 0,0;
+
+-- 2MR (applied to last RS)
+explain
+select value, sum(key) as sum from src group by value order by sum limit 10,20;
+select value, sum(key) as sum from src group by value order by sum limit 10,20;
+
+set hive.map.aggr=false;
+-- map aggregation disabled
+explain
+select value, sum(key) as sum from src group by value order by value limit 10,20;
+select value, sum(key) as sum from src group by value order by value limit 10,20;
+
+set hive.limit.pushdown.memory.usage=0.00002f;
+
+-- flush for order-by
+explain
+select key,value,value,value,value,value,value,value,value from src order by key limit 30,70;
+select key,value,value,value,value,value,value,value,value from src order by key limit 30,70;
+
+-- flush for group-by
+explain
+select sum(key) as sum from src group by concat(key,value,value,value,value,value,value,value,value,value) order by sum limit 30,70;
+select sum(key) as sum from src group by concat(key,value,value,value,value,value,value,value,value,value) order by sum limit 30,70;
+
+-- subqueries
+explain
+select * from
+(select key, count(1) from src group by key order by key limit 10,20) subq
+join
+(select key, count(1) from src group by key limit 20,20) subq2
+on subq.key=subq2.key limit 3,5;
+select * from
+(select key, count(1) from src group by key order by key limit 10,20) subq
+join
+(select key, count(1) from src group by key order by key limit 20,20) subq2
+on subq.key=subq2.key limit 3,5;
+
http://git-wip-us.apache.org/repos/asf/hive/blob/e7abf72c/ql/src/test/queries/clientpositive/vectorization_offset_limit.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vectorization_offset_limit.q b/ql/src/test/queries/clientpositive/vectorization_offset_limit.q
new file mode 100644
index 0000000..3d01154
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vectorization_offset_limit.q
@@ -0,0 +1,10 @@
+set hive.explain.user=false;
+SET hive.vectorized.execution.enabled=true;
+set hive.mapred.mode=nonstrict;
+
+explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2;
+SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2;
+
+explain
+select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 10,3;
+select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 10,3;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/e7abf72c/ql/src/test/results/clientpositive/offset_limit.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/offset_limit.q.out b/ql/src/test/results/clientpositive/offset_limit.q.out
new file mode 100644
index 0000000..2092c1d
--- /dev/null
+++ b/ql/src/test/results/clientpositive/offset_limit.q.out
@@ -0,0 +1,257 @@
+PREHOOK: query: EXPLAIN
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 10,10
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 10,10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(substr(value, 5))
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: double)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: double)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Offset of rows: 10
+ Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 10,10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 10,10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+116 116.0
+118 236.0
+119 357.0
+12 24.0
+120 240.0
+125 250.0
+126 126.0
+128 384.0
+129 258.0
+131 131.0
+PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 0,10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 0,10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 0.0
+10 10.0
+100 200.0
+103 206.0
+104 208.0
+105 105.0
+11 11.0
+111 111.0
+113 226.0
+114 114.0
+PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 1,10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 1,10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+10 10.0
+100 200.0
+103 206.0
+104 208.0
+105 105.0
+11 11.0
+111 111.0
+113 226.0
+114 114.0
+116 116.0
+PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 300,100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 300,100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+86 86.0
+87 87.0
+9 9.0
+90 270.0
+92 92.0
+95 190.0
+96 96.0
+97 194.0
+98 196.0
+PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 100 OFFSET 300
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 100 OFFSET 300
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+86 86.0
+87 87.0
+9 9.0
+90 270.0
+92 92.0
+95 190.0
+96 96.0
+97 194.0
+98 196.0
+PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 10,10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 10,10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+116 116.0
+118 236.0
+119 357.0
+12 24.0
+120 240.0
+125 250.0
+126 126.0
+128 384.0
+129 258.0
+131 131.0
+PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 0,10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 0,10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 0.0
+10 10.0
+100 200.0
+103 206.0
+104 208.0
+105 105.0
+11 11.0
+111 111.0
+113 226.0
+114 114.0
+PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 1,10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 1,10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+10 10.0
+100 200.0
+103 206.0
+104 208.0
+105 105.0
+11 11.0
+111 111.0
+113 226.0
+114 114.0
+116 116.0
+PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 300,100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 300,100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+86 86.0
+87 87.0
+9 9.0
+90 270.0
+92 92.0
+95 190.0
+96 96.0
+97 194.0
+98 196.0
+PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 100 OFFSET 300
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 100 OFFSET 300
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+86 86.0
+87 87.0
+9 9.0
+90 270.0
+92 92.0
+95 190.0
+96 96.0
+97 194.0
+98 196.0
[2/3] hive git commit: HIVE-11531: Add mysql-style LIMIT support to
Hive, or improve ROW_NUMBER performance-wise (Hui Zheng,
reviewed by Sergey Shelukhin, Jesus Camacho Rodriguez)
Posted by jc...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/e7abf72c/ql/src/test/results/clientpositive/offset_limit_global_optimizer.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/offset_limit_global_optimizer.q.out b/ql/src/test/results/clientpositive/offset_limit_global_optimizer.q.out
new file mode 100644
index 0000000..999c5e6
--- /dev/null
+++ b/ql/src/test/results/clientpositive/offset_limit_global_optimizer.q.out
@@ -0,0 +1,3390 @@
+PREHOOK: query: EXPLAIN EXTENDED
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 400,10
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN EXTENDED
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 400,10
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+
+TOK_QUERY
+ TOK_FROM
+ TOK_TABREF
+ TOK_TABNAME
+ srcpart
+ TOK_INSERT
+ TOK_DESTINATION
+ TOK_DIR
+ TOK_TMP_FILE
+ TOK_SELECT
+ TOK_SELEXPR
+ .
+ TOK_TABLE_OR_COL
+ srcpart
+ key
+ TOK_SELEXPR
+ TOK_FUNCTION
+ substr
+ .
+ TOK_TABLE_OR_COL
+ srcpart
+ value
+ 5
+ TOK_SELEXPR
+ TOK_TABLE_OR_COL
+ ds
+ TOK_SELEXPR
+ TOK_TABLE_OR_COL
+ hr
+ TOK_LIMIT
+ 400
+ 10
+
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: srcpart
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Select Operator
+ expressions: key (type: string), substr(value, 5) (type: string), ds (type: string), hr (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Offset of rows: 400
+ Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types string:string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: kv1.txt
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ hr 11
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ numFiles 1
+ numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
+ rawDataSize 5312
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcpart
+ name: default.srcpart
+ Truncated Path -> Alias:
+ /srcpart/ds=2008-04-08/hr=11/kv1.txt [srcpart]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 400,10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 400,10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+70 70 2008-04-08 11
+163 163 2008-04-08 11
+438 438 2008-04-08 11
+119 119 2008-04-08 11
+414 414 2008-04-08 11
+200 200 2008-04-08 11
+491 491 2008-04-08 11
+237 237 2008-04-08 11
+439 439 2008-04-08 11
+360 360 2008-04-08 11
+PREHOOK: query: EXPLAIN EXTENDED
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,10
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN EXTENDED
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,10
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+
+TOK_QUERY
+ TOK_FROM
+ TOK_TABREF
+ TOK_TABNAME
+ srcpart
+ TOK_INSERT
+ TOK_DESTINATION
+ TOK_DIR
+ TOK_TMP_FILE
+ TOK_SELECT
+ TOK_SELEXPR
+ .
+ TOK_TABLE_OR_COL
+ srcpart
+ key
+ TOK_SELEXPR
+ TOK_FUNCTION
+ substr
+ .
+ TOK_TABLE_OR_COL
+ srcpart
+ value
+ 5
+ TOK_SELEXPR
+ TOK_TABLE_OR_COL
+ ds
+ TOK_SELEXPR
+ TOK_TABLE_OR_COL
+ hr
+ TOK_LIMIT
+ 490
+ 10
+
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: srcpart
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Select Operator
+ expressions: key (type: string), substr(value, 5) (type: string), ds (type: string), hr (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Offset of rows: 490
+ Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types string:string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: hr=11
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ hr 11
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ numFiles 1
+ numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
+ rawDataSize 5312
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcpart
+ name: default.srcpart
+#### A masked pattern was here ####
+ Partition
+ base file name: hr=12
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ hr 12
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ numFiles 1
+ numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
+ rawDataSize 5312
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcpart
+ name: default.srcpart
+#### A masked pattern was here ####
+ Partition
+ base file name: hr=11
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-09
+ hr 11
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ numFiles 1
+ numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
+ rawDataSize 5312
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcpart
+ name: default.srcpart
+#### A masked pattern was here ####
+ Partition
+ base file name: hr=12
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-09
+ hr 12
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ numFiles 1
+ numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
+ rawDataSize 5312
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcpart
+ name: default.srcpart
+ Truncated Path -> Alias:
+ /srcpart/ds=2008-04-08/hr=11 [srcpart]
+ /srcpart/ds=2008-04-08/hr=12 [srcpart]
+ /srcpart/ds=2008-04-09/hr=11 [srcpart]
+ /srcpart/ds=2008-04-09/hr=12 [srcpart]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+414 414 2008-04-08 11
+477 477 2008-04-08 11
+222 222 2008-04-08 11
+126 126 2008-04-08 11
+90 90 2008-04-08 11
+169 169 2008-04-08 11
+403 403 2008-04-08 11
+400 400 2008-04-08 11
+200 200 2008-04-08 11
+97 97 2008-04-08 11
+PREHOOK: query: EXPLAIN EXTENDED
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,20
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN EXTENDED
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,20
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+
+TOK_QUERY
+ TOK_FROM
+ TOK_TABREF
+ TOK_TABNAME
+ srcpart
+ TOK_INSERT
+ TOK_DESTINATION
+ TOK_DIR
+ TOK_TMP_FILE
+ TOK_SELECT
+ TOK_SELEXPR
+ .
+ TOK_TABLE_OR_COL
+ srcpart
+ key
+ TOK_SELEXPR
+ TOK_FUNCTION
+ substr
+ .
+ TOK_TABLE_OR_COL
+ srcpart
+ value
+ 5
+ TOK_SELEXPR
+ TOK_TABLE_OR_COL
+ ds
+ TOK_SELEXPR
+ TOK_TABLE_OR_COL
+ hr
+ TOK_LIMIT
+ 490
+ 20
+
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: srcpart
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Select Operator
+ expressions: key (type: string), substr(value, 5) (type: string), ds (type: string), hr (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
+ Offset of rows: 490
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types string:string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: hr=11
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ hr 11
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ numFiles 1
+ numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
+ rawDataSize 5312
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcpart
+ name: default.srcpart
+#### A masked pattern was here ####
+ Partition
+ base file name: hr=12
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ hr 12
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ numFiles 1
+ numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
+ rawDataSize 5312
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcpart
+ name: default.srcpart
+#### A masked pattern was here ####
+ Partition
+ base file name: hr=11
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-09
+ hr 11
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ numFiles 1
+ numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
+ rawDataSize 5312
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcpart
+ name: default.srcpart
+#### A masked pattern was here ####
+ Partition
+ base file name: hr=12
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-09
+ hr 12
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ numFiles 1
+ numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
+ rawDataSize 5312
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcpart
+ name: default.srcpart
+ Truncated Path -> Alias:
+ /srcpart/ds=2008-04-08/hr=11 [srcpart]
+ /srcpart/ds=2008-04-08/hr=12 [srcpart]
+ /srcpart/ds=2008-04-09/hr=11 [srcpart]
+ /srcpart/ds=2008-04-09/hr=12 [srcpart]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 20
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+414 414 2008-04-08 11
+477 477 2008-04-08 11
+222 222 2008-04-08 11
+126 126 2008-04-08 11
+90 90 2008-04-08 11
+169 169 2008-04-08 11
+403 403 2008-04-08 11
+400 400 2008-04-08 11
+200 200 2008-04-08 11
+97 97 2008-04-08 11
+238 238 2008-04-08 12
+86 86 2008-04-08 12
+311 311 2008-04-08 12
+27 27 2008-04-08 12
+165 165 2008-04-08 12
+409 409 2008-04-08 12
+255 255 2008-04-08 12
+278 278 2008-04-08 12
+98 98 2008-04-08 12
+484 484 2008-04-08 12
+PREHOOK: query: EXPLAIN EXTENDED
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,600
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN EXTENDED
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,600
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+
+TOK_QUERY
+ TOK_FROM
+ TOK_TABREF
+ TOK_TABNAME
+ srcpart
+ TOK_INSERT
+ TOK_DESTINATION
+ TOK_DIR
+ TOK_TMP_FILE
+ TOK_SELECT
+ TOK_SELEXPR
+ .
+ TOK_TABLE_OR_COL
+ srcpart
+ key
+ TOK_SELEXPR
+ TOK_FUNCTION
+ substr
+ .
+ TOK_TABLE_OR_COL
+ srcpart
+ value
+ 5
+ TOK_SELEXPR
+ TOK_TABLE_OR_COL
+ ds
+ TOK_SELEXPR
+ TOK_TABLE_OR_COL
+ hr
+ TOK_LIMIT
+ 490
+ 600
+
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: srcpart
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Select Operator
+ expressions: key (type: string), substr(value, 5) (type: string), ds (type: string), hr (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 600
+ Offset of rows: 490
+ Statistics: Num rows: 600 Data size: 6000 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 600 Data size: 6000 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types string:string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: hr=11
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ hr 11
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ numFiles 1
+ numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
+ rawDataSize 5312
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcpart
+ name: default.srcpart
+#### A masked pattern was here ####
+ Partition
+ base file name: hr=12
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ hr 12
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ numFiles 1
+ numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
+ rawDataSize 5312
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcpart
+ name: default.srcpart
+#### A masked pattern was here ####
+ Partition
+ base file name: hr=11
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-09
+ hr 11
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ numFiles 1
+ numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
+ rawDataSize 5312
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcpart
+ name: default.srcpart
+#### A masked pattern was here ####
+ Partition
+ base file name: hr=12
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-09
+ hr 12
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ numFiles 1
+ numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
+ rawDataSize 5312
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcpart
+ name: default.srcpart
+ Truncated Path -> Alias:
+ /srcpart/ds=2008-04-08/hr=11 [srcpart]
+ /srcpart/ds=2008-04-08/hr=12 [srcpart]
+ /srcpart/ds=2008-04-09/hr=11 [srcpart]
+ /srcpart/ds=2008-04-09/hr=12 [srcpart]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 600
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,600
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,600
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+414 414 2008-04-08 11
+477 477 2008-04-08 11
+222 222 2008-04-08 11
+126 126 2008-04-08 11
+90 90 2008-04-08 11
+169 169 2008-04-08 11
+403 403 2008-04-08 11
+400 400 2008-04-08 11
+200 200 2008-04-08 11
+97 97 2008-04-08 11
+238 238 2008-04-08 12
+86 86 2008-04-08 12
+311 311 2008-04-08 12
+27 27 2008-04-08 12
+165 165 2008-04-08 12
+409 409 2008-04-08 12
+255 255 2008-04-08 12
+278 278 2008-04-08 12
+98 98 2008-04-08 12
+484 484 2008-04-08 12
+265 265 2008-04-08 12
+193 193 2008-04-08 12
+401 401 2008-04-08 12
+150 150 2008-04-08 12
+273 273 2008-04-08 12
+224 224 2008-04-08 12
+369 369 2008-04-08 12
+66 66 2008-04-08 12
+128 128 2008-04-08 12
+213 213 2008-04-08 12
+146 146 2008-04-08 12
+406 406 2008-04-08 12
+429 429 2008-04-08 12
+374 374 2008-04-08 12
+152 152 2008-04-08 12
+469 469 2008-04-08 12
+145 145 2008-04-08 12
+495 495 2008-04-08 12
+37 37 2008-04-08 12
+327 327 2008-04-08 12
+281 281 2008-04-08 12
+277 277 2008-04-08 12
+209 209 2008-04-08 12
+15 15 2008-04-08 12
+82 82 2008-04-08 12
+403 403 2008-04-08 12
+166 166 2008-04-08 12
+417 417 2008-04-08 12
+430 430 2008-04-08 12
+252 252 2008-04-08 12
+292 292 2008-04-08 12
+219 219 2008-04-08 12
+287 287 2008-04-08 12
+153 153 2008-04-08 12
+193 193 2008-04-08 12
+338 338 2008-04-08 12
+446 446 2008-04-08 12
+459 459 2008-04-08 12
+394 394 2008-04-08 12
+237 237 2008-04-08 12
+482 482 2008-04-08 12
+174 174 2008-04-08 12
+413 413 2008-04-08 12
+494 494 2008-04-08 12
+207 207 2008-04-08 12
+199 199 2008-04-08 12
+466 466 2008-04-08 12
+208 208 2008-04-08 12
+174 174 2008-04-08 12
+399 399 2008-04-08 12
+396 396 2008-04-08 12
+247 247 2008-04-08 12
+417 417 2008-04-08 12
+489 489 2008-04-08 12
+162 162 2008-04-08 12
+377 377 2008-04-08 12
+397 397 2008-04-08 12
+309 309 2008-04-08 12
+365 365 2008-04-08 12
+266 266 2008-04-08 12
+439 439 2008-04-08 12
+342 342 2008-04-08 12
+367 367 2008-04-08 12
+325 325 2008-04-08 12
+167 167 2008-04-08 12
+195 195 2008-04-08 12
+475 475 2008-04-08 12
+17 17 2008-04-08 12
+113 113 2008-04-08 12
+155 155 2008-04-08 12
+203 203 2008-04-08 12
+339 339 2008-04-08 12
+0 0 2008-04-08 12
+455 455 2008-04-08 12
+128 128 2008-04-08 12
+311 311 2008-04-08 12
+316 316 2008-04-08 12
+57 57 2008-04-08 12
+302 302 2008-04-08 12
+205 205 2008-04-08 12
+149 149 2008-04-08 12
+438 438 2008-04-08 12
+345 345 2008-04-08 12
+129 129 2008-04-08 12
+170 170 2008-04-08 12
+20 20 2008-04-08 12
+489 489 2008-04-08 12
+157 157 2008-04-08 12
+378 378 2008-04-08 12
+221 221 2008-04-08 12
+92 92 2008-04-08 12
+111 111 2008-04-08 12
+47 47 2008-04-08 12
+72 72 2008-04-08 12
+4 4 2008-04-08 12
+280 280 2008-04-08 12
+35 35 2008-04-08 12
+427 427 2008-04-08 12
+277 277 2008-04-08 12
+208 208 2008-04-08 12
+356 356 2008-04-08 12
+399 399 2008-04-08 12
+169 169 2008-04-08 12
+382 382 2008-04-08 12
+498 498 2008-04-08 12
+125 125 2008-04-08 12
+386 386 2008-04-08 12
+437 437 2008-04-08 12
+469 469 2008-04-08 12
+192 192 2008-04-08 12
+286 286 2008-04-08 12
+187 187 2008-04-08 12
+176 176 2008-04-08 12
+54 54 2008-04-08 12
+459 459 2008-04-08 12
+51 51 2008-04-08 12
+138 138 2008-04-08 12
+103 103 2008-04-08 12
+239 239 2008-04-08 12
+213 213 2008-04-08 12
+216 216 2008-04-08 12
+430 430 2008-04-08 12
+278 278 2008-04-08 12
+176 176 2008-04-08 12
+289 289 2008-04-08 12
+221 221 2008-04-08 12
+65 65 2008-04-08 12
+318 318 2008-04-08 12
+332 332 2008-04-08 12
+311 311 2008-04-08 12
+275 275 2008-04-08 12
+137 137 2008-04-08 12
+241 241 2008-04-08 12
+83 83 2008-04-08 12
+333 333 2008-04-08 12
+180 180 2008-04-08 12
+284 284 2008-04-08 12
+12 12 2008-04-08 12
+230 230 2008-04-08 12
+181 181 2008-04-08 12
+67 67 2008-04-08 12
+260 260 2008-04-08 12
+404 404 2008-04-08 12
+384 384 2008-04-08 12
+489 489 2008-04-08 12
+353 353 2008-04-08 12
+373 373 2008-04-08 12
+272 272 2008-04-08 12
+138 138 2008-04-08 12
+217 217 2008-04-08 12
+84 84 2008-04-08 12
+348 348 2008-04-08 12
+466 466 2008-04-08 12
+58 58 2008-04-08 12
+8 8 2008-04-08 12
+411 411 2008-04-08 12
+230 230 2008-04-08 12
+208 208 2008-04-08 12
+348 348 2008-04-08 12
+24 24 2008-04-08 12
+463 463 2008-04-08 12
+431 431 2008-04-08 12
+179 179 2008-04-08 12
+172 172 2008-04-08 12
+42 42 2008-04-08 12
+129 129 2008-04-08 12
+158 158 2008-04-08 12
+119 119 2008-04-08 12
+496 496 2008-04-08 12
+0 0 2008-04-08 12
+322 322 2008-04-08 12
+197 197 2008-04-08 12
+468 468 2008-04-08 12
+393 393 2008-04-08 12
+454 454 2008-04-08 12
+100 100 2008-04-08 12
+298 298 2008-04-08 12
+199 199 2008-04-08 12
+191 191 2008-04-08 12
+418 418 2008-04-08 12
+96 96 2008-04-08 12
+26 26 2008-04-08 12
+165 165 2008-04-08 12
+327 327 2008-04-08 12
+230 230 2008-04-08 12
+205 205 2008-04-08 12
+120 120 2008-04-08 12
+131 131 2008-04-08 12
+51 51 2008-04-08 12
+404 404 2008-04-08 12
+43 43 2008-04-08 12
+436 436 2008-04-08 12
+156 156 2008-04-08 12
+469 469 2008-04-08 12
+468 468 2008-04-08 12
+308 308 2008-04-08 12
+95 95 2008-04-08 12
+196 196 2008-04-08 12
+288 288 2008-04-08 12
+481 481 2008-04-08 12
+457 457 2008-04-08 12
+98 98 2008-04-08 12
+282 282 2008-04-08 12
+197 197 2008-04-08 12
+187 187 2008-04-08 12
+318 318 2008-04-08 12
+318 318 2008-04-08 12
+409 409 2008-04-08 12
+470 470 2008-04-08 12
+137 137 2008-04-08 12
+369 369 2008-04-08 12
+316 316 2008-04-08 12
+169 169 2008-04-08 12
+413 413 2008-04-08 12
+85 85 2008-04-08 12
+77 77 2008-04-08 12
+0 0 2008-04-08 12
+490 490 2008-04-08 12
+87 87 2008-04-08 12
+364 364 2008-04-08 12
+179 179 2008-04-08 12
+118 118 2008-04-08 12
+134 134 2008-04-08 12
+395 395 2008-04-08 12
+282 282 2008-04-08 12
+138 138 2008-04-08 12
+238 238 2008-04-08 12
+419 419 2008-04-08 12
+15 15 2008-04-08 12
+118 118 2008-04-08 12
+72 72 2008-04-08 12
+90 90 2008-04-08 12
+307 307 2008-04-08 12
+19 19 2008-04-08 12
+435 435 2008-04-08 12
+10 10 2008-04-08 12
+277 277 2008-04-08 12
+273 273 2008-04-08 12
+306 306 2008-04-08 12
+224 224 2008-04-08 12
+309 309 2008-04-08 12
+389 389 2008-04-08 12
+327 327 2008-04-08 12
+242 242 2008-04-08 12
+369 369 2008-04-08 12
+392 392 2008-04-08 12
+272 272 2008-04-08 12
+331 331 2008-04-08 12
+401 401 2008-04-08 12
+242 242 2008-04-08 12
+452 452 2008-04-08 12
+177 177 2008-04-08 12
+226 226 2008-04-08 12
+5 5 2008-04-08 12
+497 497 2008-04-08 12
+402 402 2008-04-08 12
+396 396 2008-04-08 12
+317 317 2008-04-08 12
+395 395 2008-04-08 12
+58 58 2008-04-08 12
+35 35 2008-04-08 12
+336 336 2008-04-08 12
+95 95 2008-04-08 12
+11 11 2008-04-08 12
+168 168 2008-04-08 12
+34 34 2008-04-08 12
+229 229 2008-04-08 12
+233 233 2008-04-08 12
+143 143 2008-04-08 12
+472 472 2008-04-08 12
+322 322 2008-04-08 12
+498 498 2008-04-08 12
+160 160 2008-04-08 12
+195 195 2008-04-08 12
+42 42 2008-04-08 12
+321 321 2008-04-08 12
+430 430 2008-04-08 12
+119 119 2008-04-08 12
+489 489 2008-04-08 12
+458 458 2008-04-08 12
+78 78 2008-04-08 12
+76 76 2008-04-08 12
+41 41 2008-04-08 12
+223 223 2008-04-08 12
+492 492 2008-04-08 12
+149 149 2008-04-08 12
+449 449 2008-04-08 12
+218 218 2008-04-08 12
+228 228 2008-04-08 12
+138 138 2008-04-08 12
+453 453 2008-04-08 12
+30 30 2008-04-08 12
+209 209 2008-04-08 12
+64 64 2008-04-08 12
+468 468 2008-04-08 12
+76 76 2008-04-08 12
+74 74 2008-04-08 12
+342 342 2008-04-08 12
+69 69 2008-04-08 12
+230 230 2008-04-08 12
+33 33 2008-04-08 12
+368 368 2008-04-08 12
+103 103 2008-04-08 12
+296 296 2008-04-08 12
+113 113 2008-04-08 12
+216 216 2008-04-08 12
+367 367 2008-04-08 12
+344 344 2008-04-08 12
+167 167 2008-04-08 12
+274 274 2008-04-08 12
+219 219 2008-04-08 12
+239 239 2008-04-08 12
+485 485 2008-04-08 12
+116 116 2008-04-08 12
+223 223 2008-04-08 12
+256 256 2008-04-08 12
+263 263 2008-04-08 12
+70 70 2008-04-08 12
+487 487 2008-04-08 12
+480 480 2008-04-08 12
+401 401 2008-04-08 12
+288 288 2008-04-08 12
+191 191 2008-04-08 12
+5 5 2008-04-08 12
+244 244 2008-04-08 12
+438 438 2008-04-08 12
+128 128 2008-04-08 12
+467 467 2008-04-08 12
+432 432 2008-04-08 12
+202 202 2008-04-08 12
+316 316 2008-04-08 12
+229 229 2008-04-08 12
+469 469 2008-04-08 12
+463 463 2008-04-08 12
+280 280 2008-04-08 12
+2 2 2008-04-08 12
+35 35 2008-04-08 12
+283 283 2008-04-08 12
+331 331 2008-04-08 12
+235 235 2008-04-08 12
+80 80 2008-04-08 12
+44 44 2008-04-08 12
+193 193 2008-04-08 12
+321 321 2008-04-08 12
+335 335 2008-04-08 12
+104 104 2008-04-08 12
+466 466 2008-04-08 12
+366 366 2008-04-08 12
+175 175 2008-04-08 12
+403 403 2008-04-08 12
+483 483 2008-04-08 12
+53 53 2008-04-08 12
+105 105 2008-04-08 12
+257 257 2008-04-08 12
+406 406 2008-04-08 12
+409 409 2008-04-08 12
+190 190 2008-04-08 12
+406 406 2008-04-08 12
+401 401 2008-04-08 12
+114 114 2008-04-08 12
+258 258 2008-04-08 12
+90 90 2008-04-08 12
+203 203 2008-04-08 12
+262 262 2008-04-08 12
+348 348 2008-04-08 12
+424 424 2008-04-08 12
+12 12 2008-04-08 12
+396 396 2008-04-08 12
+201 201 2008-04-08 12
+217 217 2008-04-08 12
+164 164 2008-04-08 12
+431 431 2008-04-08 12
+454 454 2008-04-08 12
+478 478 2008-04-08 12
+298 298 2008-04-08 12
+125 125 2008-04-08 12
+431 431 2008-04-08 12
+164 164 2008-04-08 12
+424 424 2008-04-08 12
+187 187 2008-04-08 12
+382 382 2008-04-08 12
+5 5 2008-04-08 12
+70 70 2008-04-08 12
+397 397 2008-04-08 12
+480 480 2008-04-08 12
+291 291 2008-04-08 12
+24 24 2008-04-08 12
+351 351 2008-04-08 12
+255 255 2008-04-08 12
+104 104 2008-04-08 12
+70 70 2008-04-08 12
+163 163 2008-04-08 12
+438 438 2008-04-08 12
+119 119 2008-04-08 12
+414 414 2008-04-08 12
+200 200 2008-04-08 12
+491 491 2008-04-08 12
+237 237 2008-04-08 12
+439 439 2008-04-08 12
+360 360 2008-04-08 12
+248 248 2008-04-08 12
+479 479 2008-04-08 12
+305 305 2008-04-08 12
+417 417 2008-04-08 12
+199 199 2008-04-08 12
+444 444 2008-04-08 12
+120 120 2008-04-08 12
+429 429 2008-04-08 12
+169 169 2008-04-08 12
+443 443 2008-04-08 12
+323 323 2008-04-08 12
+325 325 2008-04-08 12
+277 277 2008-04-08 12
+230 230 2008-04-08 12
+478 478 2008-04-08 12
+178 178 2008-04-08 12
+468 468 2008-04-08 12
+310 310 2008-04-08 12
+317 317 2008-04-08 12
+333 333 2008-04-08 12
+493 493 2008-04-08 12
+460 460 2008-04-08 12
+207 207 2008-04-08 12
+249 249 2008-04-08 12
+265 265 2008-04-08 12
+480 480 2008-04-08 12
+83 83 2008-04-08 12
+136 136 2008-04-08 12
+353 353 2008-04-08 12
+172 172 2008-04-08 12
+214 214 2008-04-08 12
+462 462 2008-04-08 12
+233 233 2008-04-08 12
+406 406 2008-04-08 12
+133 133 2008-04-08 12
+175 175 2008-04-08 12
+189 189 2008-04-08 12
+454 454 2008-04-08 12
+375 375 2008-04-08 12
+401 401 2008-04-08 12
+421 421 2008-04-08 12
+407 407 2008-04-08 12
+384 384 2008-04-08 12
+256 256 2008-04-08 12
+26 26 2008-04-08 12
+134 134 2008-04-08 12
+67 67 2008-04-08 12
+384 384 2008-04-08 12
+379 379 2008-04-08 12
+18 18 2008-04-08 12
+462 462 2008-04-08 12
+492 492 2008-04-08 12
+100 100 2008-04-08 12
+298 298 2008-04-08 12
+9 9 2008-04-08 12
+341 341 2008-04-08 12
+498 498 2008-04-08 12
+146 146 2008-04-08 12
+458 458 2008-04-08 12
+362 362 2008-04-08 12
+186 186 2008-04-08 12
+285 285 2008-04-08 12
+348 348 2008-04-08 12
+167 167 2008-04-08 12
+18 18 2008-04-08 12
+273 273 2008-04-08 12
+183 183 2008-04-08 12
+281 281 2008-04-08 12
+344 344 2008-04-08 12
+97 97 2008-04-08 12
+469 469 2008-04-08 12
+315 315 2008-04-08 12
+84 84 2008-04-08 12
+28 28 2008-04-08 12
+37 37 2008-04-08 12
+448 448 2008-04-08 12
+152 152 2008-04-08 12
+348 348 2008-04-08 12
+307 307 2008-04-08 12
+194 194 2008-04-08 12
+414 414 2008-04-08 12
+477 477 2008-04-08 12
+222 222 2008-04-08 12
+126 126 2008-04-08 12
+90 90 2008-04-08 12
+169 169 2008-04-08 12
+403 403 2008-04-08 12
+400 400 2008-04-08 12
+200 200 2008-04-08 12
+97 97 2008-04-08 12
+238 238 2008-04-09 11
+86 86 2008-04-09 11
+311 311 2008-04-09 11
+27 27 2008-04-09 11
+165 165 2008-04-09 11
+409 409 2008-04-09 11
+255 255 2008-04-09 11
+278 278 2008-04-09 11
+98 98 2008-04-09 11
+484 484 2008-04-09 11
+265 265 2008-04-09 11
+193 193 2008-04-09 11
+401 401 2008-04-09 11
+150 150 2008-04-09 11
+273 273 2008-04-09 11
+224 224 2008-04-09 11
+369 369 2008-04-09 11
+66 66 2008-04-09 11
+128 128 2008-04-09 11
+213 213 2008-04-09 11
+146 146 2008-04-09 11
+406 406 2008-04-09 11
+429 429 2008-04-09 11
+374 374 2008-04-09 11
+152 152 2008-04-09 11
+469 469 2008-04-09 11
+145 145 2008-04-09 11
+495 495 2008-04-09 11
+37 37 2008-04-09 11
+327 327 2008-04-09 11
+281 281 2008-04-09 11
+277 277 2008-04-09 11
+209 209 2008-04-09 11
+15 15 2008-04-09 11
+82 82 2008-04-09 11
+403 403 2008-04-09 11
+166 166 2008-04-09 11
+417 417 2008-04-09 11
+430 430 2008-04-09 11
+252 252 2008-04-09 11
+292 292 2008-04-09 11
+219 219 2008-04-09 11
+287 287 2008-04-09 11
+153 153 2008-04-09 11
+193 193 2008-04-09 11
+338 338 2008-04-09 11
+446 446 2008-04-09 11
+459 459 2008-04-09 11
+394 394 2008-04-09 11
+237 237 2008-04-09 11
+482 482 2008-04-09 11
+174 174 2008-04-09 11
+413 413 2008-04-09 11
+494 494 2008-04-09 11
+207 207 2008-04-09 11
+199 199 2008-04-09 11
+466 466 2008-04-09 11
+208 208 2008-04-09 11
+174 174 2008-04-09 11
+399 399 2008-04-09 11
+396 396 2008-04-09 11
+247 247 2008-04-09 11
+417 417 2008-04-09 11
+489 489 2008-04-09 11
+162 162 2008-04-09 11
+377 377 2008-04-09 11
+397 397 2008-04-09 11
+309 309 2008-04-09 11
+365 365 2008-04-09 11
+266 266 2008-04-09 11
+439 439 2008-04-09 11
+342 342 2008-04-09 11
+367 367 2008-04-09 11
+325 325 2008-04-09 11
+167 167 2008-04-09 11
+195 195 2008-04-09 11
+475 475 2008-04-09 11
+17 17 2008-04-09 11
+113 113 2008-04-09 11
+155 155 2008-04-09 11
+203 203 2008-04-09 11
+339 339 2008-04-09 11
+0 0 2008-04-09 11
+455 455 2008-04-09 11
+128 128 2008-04-09 11
+311 311 2008-04-09 11
+316 316 2008-04-09 11
+57 57 2008-04-09 11
+302 302 2008-04-09 11
+205 205 2008-04-09 11
+PREHOOK: query: EXPLAIN EXTENDED
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 400,10
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN EXTENDED
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 400,10
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+
+TOK_QUERY
+ TOK_FROM
+ TOK_TABREF
+ TOK_TABNAME
+ srcpart
+ TOK_INSERT
+ TOK_DESTINATION
+ TOK_DIR
+ TOK_TMP_FILE
+ TOK_SELECT
+ TOK_SELEXPR
+ .
+ TOK_TABLE_OR_COL
+ srcpart
+ key
+ TOK_SELEXPR
+ TOK_FUNCTION
+ substr
+ .
+ TOK_TABLE_OR_COL
+ srcpart
+ value
+ 5
+ TOK_SELEXPR
+ TOK_TABLE_OR_COL
+ ds
+ TOK_SELEXPR
+ TOK_TABLE_OR_COL
+ hr
+ TOK_LIMIT
+ 400
+ 10
+
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: srcpart
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Select Operator
+ expressions: key (type: string), substr(value, 5) (type: string), ds (type: string), hr (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Offset of rows: 400
+ Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types string:string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: kv1.txt
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ hr 11
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ numFiles 1
+ numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
+ rawDataSize 5312
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcpart
+ name: default.srcpart
+ Truncated Path -> Alias:
+ /srcpart/ds=2008-04-08/hr=11/kv1.txt [srcpart]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 400,10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 400,10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+70 70 2008-04-08 11
+163 163 2008-04-08 11
+438 438 2008-04-08 11
+119 119 2008-04-08 11
+414 414 2008-04-08 11
+200 200 2008-04-08 11
+491 491 2008-04-08 11
+237 237 2008-04-08 11
+439 439 2008-04-08 11
+360 360 2008-04-08 11
+PREHOOK: query: EXPLAIN EXTENDED
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,10
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN EXTENDED
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,10
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+
+TOK_QUERY
+ TOK_FROM
+ TOK_TABREF
+ TOK_TABNAME
+ srcpart
+ TOK_INSERT
+ TOK_DESTINATION
+ TOK_DIR
+ TOK_TMP_FILE
+ TOK_SELECT
+ TOK_SELEXPR
+ .
+ TOK_TABLE_OR_COL
+ srcpart
+ key
+ TOK_SELEXPR
+ TOK_FUNCTION
+ substr
+ .
+ TOK_TABLE_OR_COL
+ srcpart
+ value
+ 5
+ TOK_SELEXPR
+ TOK_TABLE_OR_COL
+ ds
+ TOK_SELEXPR
+ TOK_TABLE_OR_COL
+ hr
+ TOK_LIMIT
+ 490
+ 10
+
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: srcpart
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Select Operator
+ expressions: key (type: string), substr(value, 5) (type: string), ds (type: string), hr (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Offset of rows: 490
+ Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types string:string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: hr=11
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ hr 11
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ numFiles 1
+ numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
+ rawDataSize 5312
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcpart
+ name: default.srcpart
+#### A masked pattern was here ####
+ Partition
+ base file name: hr=12
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ hr 12
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ numFiles 1
+ numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
+ rawDataSize 5312
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcpart
+ name: default.srcpart
+#### A masked pattern was here ####
+ Partition
+ base file name: hr=11
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-09
+ hr 11
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ numFiles 1
+ numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
+ rawDataSize 5312
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcpart
+ name: default.srcpart
+#### A masked pattern was here ####
+ Partition
+ base file name: hr=12
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-09
+ hr 12
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ numFiles 1
+ numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
+ rawDataSize 5312
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcpart
+ name: default.srcpart
+ Truncated Path -> Alias:
+ /srcpart/ds=2008-04-08/hr=11 [srcpart]
+ /srcpart/ds=2008-04-08/hr=12 [srcpart]
+ /srcpart/ds=2008-04-09/hr=11 [srcpart]
+ /srcpart/ds=2008-04-09/hr=12 [srcpart]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+414 414 2008-04-08 11
+477 477 2008-04-08 11
+222 222 2008-04-08 11
+126 126 2008-04-08 11
+90 90 2008-04-08 11
+169 169 2008-04-08 11
+403 403 2008-04-08 11
+400 400 2008-04-08 11
+200 200 2008-04-08 11
+97 97 2008-04-08 11
+PREHOOK: query: EXPLAIN EXTENDED
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,20
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN EXTENDED
+SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,20
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+
+TOK_QUERY
+ TOK_FROM
+ TOK_TABREF
+ TOK_TABNAME
+ srcpart
+ TOK_INSERT
+ TOK_DESTINATION
+ TOK_DIR
+ TOK_TMP_FILE
+ TOK_SELECT
+ TOK_SELEXPR
+ .
+ TOK_TABLE_OR_COL
+ srcpart
+ key
+ TOK_SELEXPR
+ TOK_FUNCTION
+ substr
+ .
+ TOK_TABLE_OR_COL
+ srcpart
+ value
+ 5
+ TOK_SELEXPR
+ TOK_TABLE_OR_COL
+ ds
+ TOK_SELEXPR
+ TOK_TABLE_OR_COL
+ hr
+ TOK_LIMIT
+ 490
+ 20
+
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: srcpart
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Select Operator
+ expressions: key (type: string), substr(value, 5) (type: string), ds (type: string), hr (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
+ Offset of rows: 490
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types string:string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: hr=11
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ hr 11
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ numFiles 1
+ numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
+ rawDataSize 5312
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcpart
+ name: default.srcpart
+#### A masked pattern was here ####
+ Partition
+ base file name: hr=12
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ hr 12
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ numFiles 1
+ numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
+ rawDataSize 5312
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcpart
+ name: default.srcpart
+#### A masked pattern was here ####
+ Partition
+ base file name: hr=11
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-09
+ hr 11
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ numFiles 1
+ numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
+ rawDataSize 5312
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcpart
+ name: default.srcpart
+#### A masked pattern was here ####
+ Partition
+ base file name: hr=12
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-09
+ hr 12
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ numFiles 1
+ numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
+ rawDataSize 5312
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcpart
+ name: default.srcpart
+ Truncated Path -> Alias:
+ /srcpart/ds=2008-04-08/hr=11 [srcpart]
+ /srcpart/ds=2008-04-08/hr=12 [srcpart]
+ /srcpart/ds=2008-04-09/hr=11 [srcpart]
+ /srcpart/ds=2008-04-09/hr=12 [srcpart]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 20
+ Processor Tree:
+ ListSink
+
+
<TRUNCATED>