You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2018/07/30 22:36:34 UTC
[1/3] hive git commit: HIVE-20210 : Simple Fetch optimizer should
lead to MapReduce when filter on non-partition column and conversion is
minimal (Jeffery Yan via Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master 6fa9f6339 -> 65f02d2f9
http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out
index b256f4f..889f23c 100644
--- a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out
+++ b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out
@@ -194,14 +194,55 @@ OPTIMIZED SQL: SELECT CAST('145' AS STRING) AS `key`, `value`, CAST('1' AS STRIN
FROM `default`.`fact_daily_n3`
WHERE `ds` = '1' AND `hr` = '1' AND `key` = '145'
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Partition Description:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: fact_daily_n3
+ filterExpr: ((ds = '1') and (hr = '1') and (key = '145')) (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: (key = '145') (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: '145' (type: string), value (type: string), '1' (type: string), '1' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types string:string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
Partition
+ base file name: hr=1
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -248,21 +289,14 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.fact_daily_n3
name: default.fact_daily_n3
+ Truncated Path -> Alias:
+ /fact_daily_n3/ds=1/hr=1 [fact_daily_n3]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
Processor Tree:
- TableScan
- alias: fact_daily_n3
- filterExpr: ((ds = '1') and (hr = '1') and (key = '145')) (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: (key = '145') (type: boolean)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: '145' (type: string), value (type: string), '1' (type: string), '1' (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: select * from fact_daily_n3 where ds = '1' and hr='1' and key='145'
PREHOOK: type: QUERY
@@ -313,14 +347,55 @@ OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `key`, CAST('val_484' AS STRING)
FROM `default`.`fact_daily_n3`
WHERE `ds` = '1' AND `hr` = '2' AND `key` = '484' AND `value` = 'val_484'
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Partition Description:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: fact_daily_n3
+ filterExpr: ((key = '484') and (value = 'val_484')) (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((key = '484') and (value = 'val_484')) (type: boolean)
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: '484' (type: string), 'val_484' (type: string), '1' (type: string), '2' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types string:string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
Partition
+ base file name: value=val_484
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -367,21 +442,14 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.fact_daily_n3
name: default.fact_daily_n3
+ Truncated Path -> Alias:
+ /fact_daily_n3/ds=1/hr=2/key=484/value=val_484 [fact_daily_n3]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
Processor Tree:
- TableScan
- alias: fact_daily_n3
- filterExpr: ((key = '484') and (value = 'val_484')) (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: ((key = '484') and (value = 'val_484')) (type: boolean)
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: '484' (type: string), 'val_484' (type: string), '1' (type: string), '2' (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: SELECT * FROM fact_daily_n3 WHERE ds='1' and hr='2' and (key='484' and value='val_484')
PREHOOK: type: QUERY
@@ -404,14 +472,55 @@ OPTIMIZED SQL: SELECT CAST('327' AS STRING) AS `key`, CAST('val_327' AS STRING)
FROM `default`.`fact_daily_n3`
WHERE `ds` = '1' AND `hr` = '3' AND `key` = '327' AND `value` = 'val_327'
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Partition Description:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: fact_daily_n3
+ filterExpr: ((key = '327') and (value = 'val_327')) (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((key = '327') and (value = 'val_327')) (type: boolean)
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: '327' (type: string), 'val_327' (type: string), '1' (type: string), '3' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types string:string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
Partition
+ base file name: value=val_327
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -458,21 +567,14 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.fact_daily_n3
name: default.fact_daily_n3
+ Truncated Path -> Alias:
+ /fact_daily_n3/ds=1/hr=3/key=327/value=val_327 [fact_daily_n3]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
Processor Tree:
- TableScan
- alias: fact_daily_n3
- filterExpr: ((key = '327') and (value = 'val_327')) (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: ((key = '327') and (value = 'val_327')) (type: boolean)
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: '327' (type: string), 'val_327' (type: string), '1' (type: string), '3' (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: SELECT * FROM fact_daily_n3 WHERE ds='1' and hr='3' and (key='327' and value='val_327')
PREHOOK: type: QUERY
http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/list_bucket_query_oneskew_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/list_bucket_query_oneskew_1.q.out b/ql/src/test/results/clientpositive/list_bucket_query_oneskew_1.q.out
index d4304b1..dcff8a5 100644
--- a/ql/src/test/results/clientpositive/list_bucket_query_oneskew_1.q.out
+++ b/ql/src/test/results/clientpositive/list_bucket_query_oneskew_1.q.out
@@ -135,14 +135,55 @@ OPTIMIZED SQL: SELECT CAST(484 AS INTEGER) AS `x`
FROM `default`.`fact_daily_n4`
WHERE `ds` = '1' AND `x` = 484
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Partition Description:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: fact_daily_n4
+ filterExpr: ((ds = '1') and (x = 484)) (type: boolean)
+ Statistics: Num rows: 1 Data size: 80 Basic stats: PARTIAL Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: (x = 484) (type: boolean)
+ Statistics: Num rows: 1 Data size: 80 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: 484 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 80 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 80 Basic stats: PARTIAL Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0
+ columns.types int
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
Partition
+ base file name: x=484
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -186,21 +227,14 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.fact_daily_n4
name: default.fact_daily_n4
+ Truncated Path -> Alias:
+ /fact_tz/ds=1/x=484 [fact_daily_n4]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
Processor Tree:
- TableScan
- alias: fact_daily_n4
- filterExpr: ((ds = '1') and (x = 484)) (type: boolean)
- Statistics: Num rows: 1 Data size: 80 Basic stats: PARTIAL Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: (x = 484) (type: boolean)
- Statistics: Num rows: 1 Data size: 80 Basic stats: PARTIAL Column stats: NONE
- Select Operator
- expressions: 484 (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 80 Basic stats: PARTIAL Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: SELECT x FROM fact_daily_n4 WHERE ds='1' and x=484
PREHOOK: type: QUERY
@@ -221,14 +255,55 @@ OPTIMIZED SQL: SELECT CAST(495 AS INTEGER) AS `x`
FROM `default`.`fact_daily_n4`
WHERE `ds` = '1' AND `x` = 495
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Partition Description:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: fact_daily_n4
+ filterExpr: ((ds = '1') and (x = 495)) (type: boolean)
+ Statistics: Num rows: 1 Data size: 80 Basic stats: PARTIAL Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: (x = 495) (type: boolean)
+ Statistics: Num rows: 1 Data size: 80 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: 495 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 80 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 80 Basic stats: PARTIAL Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0
+ columns.types int
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
Partition
+ base file name: HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -272,21 +347,14 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.fact_daily_n4
name: default.fact_daily_n4
+ Truncated Path -> Alias:
+ /fact_tz/ds=1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME [fact_daily_n4]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
Processor Tree:
- TableScan
- alias: fact_daily_n4
- filterExpr: ((ds = '1') and (x = 495)) (type: boolean)
- Statistics: Num rows: 1 Data size: 80 Basic stats: PARTIAL Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: (x = 495) (type: boolean)
- Statistics: Num rows: 1 Data size: 80 Basic stats: PARTIAL Column stats: NONE
- Select Operator
- expressions: 495 (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 80 Basic stats: PARTIAL Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: SELECT x FROM fact_daily_n4 WHERE ds='1' and x=495
PREHOOK: type: QUERY
@@ -307,14 +375,55 @@ OPTIMIZED SQL: SELECT CAST(1 AS INTEGER) AS `x`
FROM `default`.`fact_daily_n4`
WHERE `ds` = '1' AND `x` = 1
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Partition Description:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: fact_daily_n4
+ filterExpr: ((ds = '1') and (x = 1)) (type: boolean)
+ Statistics: Num rows: 1 Data size: 80 Basic stats: PARTIAL Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: (x = 1) (type: boolean)
+ Statistics: Num rows: 1 Data size: 80 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: 1 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 80 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 80 Basic stats: PARTIAL Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0
+ columns.types int
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
Partition
+ base file name: HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -358,21 +467,14 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.fact_daily_n4
name: default.fact_daily_n4
+ Truncated Path -> Alias:
+ /fact_tz/ds=1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME [fact_daily_n4]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
Processor Tree:
- TableScan
- alias: fact_daily_n4
- filterExpr: ((ds = '1') and (x = 1)) (type: boolean)
- Statistics: Num rows: 1 Data size: 80 Basic stats: PARTIAL Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: (x = 1) (type: boolean)
- Statistics: Num rows: 1 Data size: 80 Basic stats: PARTIAL Column stats: NONE
- Select Operator
- expressions: 1 (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 80 Basic stats: PARTIAL Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: SELECT x FROM fact_daily_n4 WHERE ds='1' and x=1
PREHOOK: type: QUERY
http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/list_bucket_query_oneskew_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/list_bucket_query_oneskew_2.q.out b/ql/src/test/results/clientpositive/list_bucket_query_oneskew_2.q.out
index 465805a..3251dc4 100644
--- a/ql/src/test/results/clientpositive/list_bucket_query_oneskew_2.q.out
+++ b/ql/src/test/results/clientpositive/list_bucket_query_oneskew_2.q.out
@@ -138,14 +138,55 @@ OPTIMIZED SQL: SELECT CAST(484 AS INTEGER) AS `x`
FROM `default`.`fact_daily_n5`
WHERE `ds` = '1' AND `x` = 484
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Partition Description:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: fact_daily_n5
+ filterExpr: ((ds = '1') and (x = 484)) (type: boolean)
+ Statistics: Num rows: 1 Data size: 240 Basic stats: PARTIAL Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: (x = 484) (type: boolean)
+ Statistics: Num rows: 1 Data size: 240 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: 484 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 240 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 240 Basic stats: PARTIAL Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0
+ columns.types int
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
Partition
+ base file name: x=484
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -189,21 +230,14 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.fact_daily_n5
name: default.fact_daily_n5
+ Truncated Path -> Alias:
+ /fact_tz/ds=1/x=484 [fact_daily_n5]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
Processor Tree:
- TableScan
- alias: fact_daily_n5
- filterExpr: ((ds = '1') and (x = 484)) (type: boolean)
- Statistics: Num rows: 1 Data size: 240 Basic stats: PARTIAL Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: (x = 484) (type: boolean)
- Statistics: Num rows: 1 Data size: 240 Basic stats: PARTIAL Column stats: NONE
- Select Operator
- expressions: 484 (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 240 Basic stats: PARTIAL Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: select x from (select * from fact_daily_n5 where ds = '1') subq where x = 484
PREHOOK: type: QUERY
@@ -224,14 +258,55 @@ OPTIMIZED SQL: SELECT CAST(484 AS INTEGER) AS `x1`, `y` AS `y1`
FROM `default`.`fact_daily_n5`
WHERE `ds` = '1' AND `x` = 484
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Partition Description:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: fact_daily_n5
+ filterExpr: ((ds = '1') and (x = 484)) (type: boolean)
+ Statistics: Num rows: 1 Data size: 240 Basic stats: PARTIAL Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: (x = 484) (type: boolean)
+ Statistics: Num rows: 1 Data size: 240 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: 484 (type: int), y (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 240 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 240 Basic stats: PARTIAL Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types int:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
Partition
+ base file name: x=484
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -275,21 +350,14 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.fact_daily_n5
name: default.fact_daily_n5
+ Truncated Path -> Alias:
+ /fact_tz/ds=1/x=484 [fact_daily_n5]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
Processor Tree:
- TableScan
- alias: fact_daily_n5
- filterExpr: ((ds = '1') and (x = 484)) (type: boolean)
- Statistics: Num rows: 1 Data size: 240 Basic stats: PARTIAL Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: (x = 484) (type: boolean)
- Statistics: Num rows: 1 Data size: 240 Basic stats: PARTIAL Column stats: NONE
- Select Operator
- expressions: 484 (type: int), y (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 240 Basic stats: PARTIAL Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: select x1, y1 from(select x as x1, y as y1 from fact_daily_n5 where ds ='1') subq where x1 = 484
PREHOOK: type: QUERY
http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/list_bucket_query_oneskew_3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/list_bucket_query_oneskew_3.q.out b/ql/src/test/results/clientpositive/list_bucket_query_oneskew_3.q.out
index bab5179..a61d9d8 100644
--- a/ql/src/test/results/clientpositive/list_bucket_query_oneskew_3.q.out
+++ b/ql/src/test/results/clientpositive/list_bucket_query_oneskew_3.q.out
@@ -158,14 +158,55 @@ OPTIMIZED SQL: SELECT `x`
FROM `default`.`fact_daily_n0`
WHERE `ds` = '1' AND `x` <> 86
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Partition Description:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: fact_daily_n0
+ filterExpr: ((ds = '1') and (x <> 86)) (type: boolean)
+ Statistics: Num rows: 2 Data size: 1170 Basic stats: PARTIAL Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: (x <> 86) (type: boolean)
+ Statistics: Num rows: 2 Data size: 1170 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: x (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 1170 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 2 Data size: 1170 Basic stats: PARTIAL Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0
+ columns.types int
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
Partition
+ base file name: HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -209,21 +250,108 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.fact_daily_n0
name: default.fact_daily_n0
+#### A masked pattern was here ####
+ Partition
+ base file name: x=238
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 1
+ properties:
+ bucket_count -1
+ column.name.delimiter ,
+ columns x,y,z
+ columns.comments
+ columns.types int:string:string
+#### A masked pattern was here ####
+ name default.fact_daily_n0
+ numFiles 3
+ partition_columns ds
+ partition_columns.types string
+ serialization.ddl struct fact_daily_n0 { i32 x, string y, string z}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 117
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ EXTERNAL TRUE
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns x,y,z
+ columns.comments
+ columns.types int:string:string
+#### A masked pattern was here ####
+ name default.fact_daily_n0
+ partition_columns ds
+ partition_columns.types string
+ serialization.ddl struct fact_daily_n0 { i32 x, string y, string z}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.fact_daily_n0
+ name: default.fact_daily_n0
+#### A masked pattern was here ####
+ Partition
+ base file name: x=484
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 1
+ properties:
+ bucket_count -1
+ column.name.delimiter ,
+ columns x,y,z
+ columns.comments
+ columns.types int:string:string
+#### A masked pattern was here ####
+ name default.fact_daily_n0
+ numFiles 3
+ partition_columns ds
+ partition_columns.types string
+ serialization.ddl struct fact_daily_n0 { i32 x, string y, string z}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 117
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ EXTERNAL TRUE
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns x,y,z
+ columns.comments
+ columns.types int:string:string
+#### A masked pattern was here ####
+ name default.fact_daily_n0
+ partition_columns ds
+ partition_columns.types string
+ serialization.ddl struct fact_daily_n0 { i32 x, string y, string z}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.fact_daily_n0
+ name: default.fact_daily_n0
+ Truncated Path -> Alias:
+ /fact_tz/ds=1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME [fact_daily_n0]
+ /fact_tz/ds=1/x=238 [fact_daily_n0]
+ /fact_tz/ds=1/x=484 [fact_daily_n0]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
Processor Tree:
- TableScan
- alias: fact_daily_n0
- filterExpr: ((ds = '1') and (x <> 86)) (type: boolean)
- Statistics: Num rows: 2 Data size: 1170 Basic stats: PARTIAL Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: (x <> 86) (type: boolean)
- Statistics: Num rows: 2 Data size: 1170 Basic stats: PARTIAL Column stats: NONE
- Select Operator
- expressions: x (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 1170 Basic stats: PARTIAL Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: SELECT x FROM fact_daily_n0 WHERE ds='1' and not (x = 86)
PREHOOK: type: QUERY
http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/nonmr_fetch.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/nonmr_fetch.q.out b/ql/src/test/results/clientpositive/nonmr_fetch.q.out
index cd8d78b..366dc1e 100644
--- a/ql/src/test/results/clientpositive/nonmr_fetch.q.out
+++ b/ql/src/test/results/clientpositive/nonmr_fetch.q.out
@@ -127,33 +127,109 @@ POSTHOOK: Input: default@src
278
98
484
+PREHOOK: query: explain select * from srcpart where ds='2008-04-08' AND key > 100 limit 10
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from srcpart where ds='2008-04-08' AND key > 100 limit 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: srcpart
+ filterExpr: ((ds = '2008-04-08') and (UDFToDouble(key) > 100.0D)) (type: boolean)
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (UDFToDouble(key) > 100.0D) (type: boolean)
+ Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from srcpart where ds='2008-04-08' AND key > 100 limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+#### A masked pattern was here ####
+POSTHOOK: query: select * from srcpart where ds='2008-04-08' AND key > 100 limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+#### A masked pattern was here ####
+238 val_238 2008-04-08 11
+311 val_311 2008-04-08 11
+165 val_165 2008-04-08 11
+409 val_409 2008-04-08 11
+255 val_255 2008-04-08 11
+278 val_278 2008-04-08 11
+484 val_484 2008-04-08 11
+265 val_265 2008-04-08 11
+193 val_193 2008-04-08 11
+401 val_401 2008-04-08 11
PREHOOK: query: explain select * from srcpart where key > 100 limit 10
PREHOOK: type: QUERY
POSTHOOK: query: explain select * from srcpart where key > 100 limit 10
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: srcpart
+ filterExpr: (UDFToDouble(key) > 100.0D) (type: boolean)
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (UDFToDouble(key) > 100.0D) (type: boolean)
+ Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string), ds (type: string), hr (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
Stage: Stage-0
Fetch Operator
limit: 10
Processor Tree:
- TableScan
- alias: srcpart
- filterExpr: (UDFToDouble(key) > 100.0D) (type: boolean)
- Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (UDFToDouble(key) > 100.0D) (type: boolean)
- Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string), ds (type: string), hr (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE
- Limit
- Number of rows: 10
- Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: select * from srcpart where key > 100 limit 10
PREHOOK: type: QUERY
http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/ppr_pushdown3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/ppr_pushdown3.q.out b/ql/src/test/results/clientpositive/ppr_pushdown3.q.out
index 56e90d7..f802ff7 100644
--- a/ql/src/test/results/clientpositive/ppr_pushdown3.q.out
+++ b/ql/src/test/results/clientpositive/ppr_pushdown3.q.out
@@ -3,25 +3,38 @@ PREHOOK: type: QUERY
POSTHOOK: query: explain select * from srcpart where key < 10
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: srcpart
+ filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean)
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (UDFToDouble(key) < 10.0D) (type: boolean)
+ Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string), ds (type: string), hr (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- TableScan
- alias: srcpart
- filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean)
- Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (UDFToDouble(key) < 10.0D) (type: boolean)
- Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string), ds (type: string), hr (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: select * from srcpart where key < 10
PREHOOK: type: QUERY
http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/rand_partitionpruner3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/rand_partitionpruner3.q.out b/ql/src/test/results/clientpositive/rand_partitionpruner3.q.out
index 232d188..3f6efd9 100644
--- a/ql/src/test/results/clientpositive/rand_partitionpruner3.q.out
+++ b/ql/src/test/results/clientpositive/rand_partitionpruner3.q.out
@@ -6,14 +6,54 @@ OPTIMIZED SQL: SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds`, `hr`
FROM `default`.`srcpart`
WHERE RAND(1) < 0.1 AND `ds` = '2008-04-08' AND `key` <= 50 AND `key` >= 10 AND `hr` LIKE '%2'
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Partition Description:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((UDFToDouble(key) <= 50.0D) and (UDFToDouble(key) >= 10.0D) and (rand(1) < 0.1D)) (type: boolean)
+ Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types string:string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
Partition
+ base file name: hr=12
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -60,20 +100,14 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart
name: default.srcpart
+ Truncated Path -> Alias:
+ /srcpart/ds=2008-04-08/hr=12 [a]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
Processor Tree:
- TableScan
- alias: a
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: ((UDFToDouble(key) <= 50.0D) and (UDFToDouble(key) >= 10.0D) and (rand(1) < 0.1D)) (type: boolean)
- Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: select a.* from srcpart a where rand(1) < 0.1 and a.ds = '2008-04-08' and not(key > 50 or key < 10) and a.hr like '%2'
PREHOOK: type: QUERY
@@ -98,14 +132,55 @@ OPTIMIZED SQL: SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds`, `hr`
FROM `default`.`srcpart`
WHERE `ds` = '2008-04-08' AND `key` <= 50 AND `key` >= 10 AND `hr` LIKE '%2'
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Partition Description:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ filterExpr: ((UDFToDouble(key) <= 50.0D) and (UDFToDouble(key) >= 10.0D)) (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((UDFToDouble(key) <= 50.0D) and (UDFToDouble(key) >= 10.0D)) (type: boolean)
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types string:string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
Partition
+ base file name: hr=12
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -152,21 +227,14 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart
name: default.srcpart
+ Truncated Path -> Alias:
+ /srcpart/ds=2008-04-08/hr=12 [a]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
Processor Tree:
- TableScan
- alias: a
- filterExpr: ((UDFToDouble(key) <= 50.0D) and (UDFToDouble(key) >= 10.0D)) (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: ((UDFToDouble(key) <= 50.0D) and (UDFToDouble(key) >= 10.0D)) (type: boolean)
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: select a.* from srcpart a where a.ds = '2008-04-08' and not(key > 50 or key < 10) and a.hr like '%2'
PREHOOK: type: QUERY
http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/truncate_column_list_bucket.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/truncate_column_list_bucket.q.out b/ql/src/test/results/clientpositive/truncate_column_list_bucket.q.out
index 27d21f3..60e5cd8 100644
--- a/ql/src/test/results/clientpositive/truncate_column_list_bucket.q.out
+++ b/ql/src/test/results/clientpositive/truncate_column_list_bucket.q.out
@@ -57,14 +57,55 @@ OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `key`, `value`, CAST('1' AS STRIN
FROM `default`.`test_tab_n3`
WHERE `part` = '1' AND `key` = '484'
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Partition Description:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: test_tab_n3
+ filterExpr: ((part = '1') and (key = '484')) (type: boolean)
+ Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: (key = '484') (type: boolean)
+ Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: '484' (type: string), value (type: string), '1' (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2
+ columns.types string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
Partition
+ base file name: key=484
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
partition values:
@@ -109,21 +150,14 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
name: default.test_tab_n3
name: default.test_tab_n3
+ Truncated Path -> Alias:
+ /test_tab_n3/part=1/key=484 [test_tab_n3]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
Processor Tree:
- TableScan
- alias: test_tab_n3
- filterExpr: ((part = '1') and (key = '484')) (type: boolean)
- Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: (key = '484') (type: boolean)
- Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: '484' (type: string), value (type: string), '1' (type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: SELECT * FROM test_tab_n3 WHERE part = '1' AND key = '484'
PREHOOK: type: QUERY
@@ -144,14 +178,55 @@ OPTIMIZED SQL: SELECT CAST('0' AS STRING) AS `key`, `value`, CAST('1' AS STRING)
FROM `default`.`test_tab_n3`
WHERE `part` = '1' AND `key` = '0'
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Partition Description:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: test_tab_n3
+ filterExpr: ((part = '1') and (key = '0')) (type: boolean)
+ Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: (key = '0') (type: boolean)
+ Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: '0' (type: string), value (type: string), '1' (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2
+ columns.types string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
Partition
+ base file name: HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
partition values:
@@ -196,21 +271,14 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
name: default.test_tab_n3
name: default.test_tab_n3
+ Truncated Path -> Alias:
+ /test_tab_n3/part=1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME [test_tab_n3]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
Processor Tree:
- TableScan
- alias: test_tab_n3
- filterExpr: ((part = '1') and (key = '0')) (type: boolean)
- Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: (key = '0') (type: boolean)
- Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: '0' (type: string), value (type: string), '1' (type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: SELECT * FROM test_tab_n3 WHERE part = '1' AND key = '0'
PREHOOK: type: QUERY
http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/union_view.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/union_view.q.out b/ql/src/test/results/clientpositive/union_view.q.out
index ba98ef0..8d844b8 100644
--- a/ql/src/test/results/clientpositive/union_view.q.out
+++ b/ql/src/test/results/clientpositive/union_view.q.out
@@ -23,67 +23,106 @@ POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@src_union_3_n0
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src_union_1_n0
+ filterExpr: ((key = 86) and (ds = '1')) (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key = 86) (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 86 (type: int), value (type: string), '1' (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- TableScan
- alias: src_union_1_n0
- filterExpr: ((key = 86) and (ds = '1')) (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (key = 86) (type: boolean)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: 86 (type: int), value (type: string), '1' (type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ ListSink
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src_union_2_n0
+ filterExpr: ((key = 86) and (ds = '2')) (type: boolean)
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key = 86) (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 86 (type: int), value (type: string), '2' (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- TableScan
- alias: src_union_2_n0
- filterExpr: ((key = 86) and (ds = '2')) (type: boolean)
- Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (key = 86) (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: 86 (type: int), value (type: string), '2' (type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ ListSink
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src_union_3_n0
+ filterExpr: ((key = 86) and (ds = '3')) (type: boolean)
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key = 86) (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 86 (type: int), value (type: string), '3' (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- TableScan
- alias: src_union_3_n0
- filterExpr: ((key = 86) and (ds = '3')) (type: boolean)
- Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (key = 86) (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: 86 (type: int), value (type: string), '3' (type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ ListSink
86 val_86 1
86 val_86 2
[3/3] hive git commit: HIVE-20210 : Simple Fetch optimizer should
lead to MapReduce when filter on non-partition column and conversion is
minimal (Jeffery Yan via Ashutosh Chauhan)
Posted by ha...@apache.org.
HIVE-20210 : Simple Fetch optimizer should lead to MapReduce when filter on non-partition column and conversion is minimal (Jeffery Yan via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/65f02d2f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/65f02d2f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/65f02d2f
Branch: refs/heads/master
Commit: 65f02d2f99b990cb28fd6a832fa3425042e60a04
Parents: 6fa9f63
Author: Jeffrey(Xilang) Yan <xi...@gmail.com>
Authored: Mon Jul 30 15:35:56 2018 -0700
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Mon Jul 30 15:35:56 2018 -0700
----------------------------------------------------------------------
.../hive/ql/optimizer/SimpleFetchOptimizer.java | 31 +-
.../test/queries/clientpositive/nonmr_fetch.q | 4 +
.../clientpositive/annotate_stats_part.q.out | 117 ++++---
.../clientpositive/cbo_rp_simple_select.q.out | 232 +++++++++-----
.../clientpositive/cbo_simple_select.q.out | 232 +++++++++-----
.../clientpositive/filter_in_or_dup.q.out | 117 ++++---
.../test/results/clientpositive/input42.q.out | 148 ++++++---
.../results/clientpositive/input_part9.q.out | 75 +++--
.../clientpositive/list_bucket_dml_1.q.out | 72 +++--
.../clientpositive/list_bucket_dml_11.q.out | 72 +++--
.../clientpositive/list_bucket_dml_12.q.out | 144 ++++++---
.../clientpositive/list_bucket_dml_13.q.out | 72 +++--
.../clientpositive/list_bucket_dml_2.q.out | 72 +++--
.../clientpositive/list_bucket_dml_3.q.out | 72 +++--
.../clientpositive/list_bucket_dml_4.q.out | 72 +++--
.../clientpositive/list_bucket_dml_5.q.out | 75 +++--
.../clientpositive/list_bucket_dml_6.q.out | 75 +++--
.../clientpositive/list_bucket_dml_7.q.out | 75 +++--
.../clientpositive/list_bucket_dml_8.q.out | 75 +++--
.../clientpositive/list_bucket_dml_9.q.out | 72 +++--
.../list_bucket_query_multiskew_1.q.out | 288 ++++++++++++-----
.../list_bucket_query_multiskew_2.q.out | 316 +++++++++++++++----
.../list_bucket_query_multiskew_3.q.out | 216 +++++++++----
.../list_bucket_query_oneskew_1.q.out | 216 +++++++++----
.../list_bucket_query_oneskew_2.q.out | 144 ++++++---
.../list_bucket_query_oneskew_3.q.out | 166 ++++++++--
.../results/clientpositive/nonmr_fetch.q.out | 108 ++++++-
.../results/clientpositive/ppr_pushdown3.q.out | 39 ++-
.../clientpositive/rand_partitionpruner3.q.out | 142 ++++++---
.../truncate_column_list_bucket.q.out | 144 ++++++---
.../results/clientpositive/union_view.q.out | 117 ++++---
31 files changed, 2814 insertions(+), 986 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java
index ffd47a2..89f6ee1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java
@@ -211,11 +211,38 @@ public class SimpleFetchOptimizer extends Transform {
bypassFilter = !pctx.getPrunedPartitions(alias, ts).hasUnknownPartitions();
}
}
- if (!aggressive && !bypassFilter) {
+
+ boolean onlyPruningFilter = bypassFilter;
+ Operator<?> op = ts;
+ while (onlyPruningFilter) {
+ if (op instanceof FileSinkOperator || op.getChildOperators() == null) {
+ break;
+ } else if (op.getChildOperators().size() != 1) {
+ onlyPruningFilter = false;
+ break;
+ } else {
+ op = op.getChildOperators().get(0);
+ }
+
+ if (op instanceof FilterOperator) {
+ ExprNodeDesc predicate = ((FilterOperator) op).getConf().getPredicate();
+ if (predicate instanceof ExprNodeConstantDesc
+ && "boolean".equals(predicate.getTypeInfo().getTypeName())) {
+ continue;
+ } else if (PartitionPruner.onlyContainsPartnCols(table, predicate)) {
+ continue;
+ } else {
+ onlyPruningFilter = false;
+ }
+ }
+ }
+
+ if (!aggressive && !onlyPruningFilter) {
return null;
}
+
PrunedPartitionList partitions = pctx.getPrunedPartitions(alias, ts);
- FetchData fetch = new FetchData(ts, parent, table, partitions, splitSample, bypassFilter);
+ FetchData fetch = new FetchData(ts, parent, table, partitions, splitSample, onlyPruningFilter);
return checkOperators(fetch, aggressive, bypassFilter);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/queries/clientpositive/nonmr_fetch.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/nonmr_fetch.q b/ql/src/test/queries/clientpositive/nonmr_fetch.q
index 1b5ab56..5b7e61e 100644
--- a/ql/src/test/queries/clientpositive/nonmr_fetch.q
+++ b/ql/src/test/queries/clientpositive/nonmr_fetch.q
@@ -15,6 +15,10 @@ select * from srcpart where ds='2008-04-08' AND hr='11' limit 10;
explain select key from src limit 10;
select key from src limit 10;
+-- negative, filter on partition column and non-partition column
+explain select * from srcpart where ds='2008-04-08' AND key > 100 limit 10;
+select * from srcpart where ds='2008-04-08' AND key > 100 limit 10;
+
-- negative, filter on non-partition column
explain select * from srcpart where key > 100 limit 10;
select * from srcpart where key > 100 limit 10;
http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/annotate_stats_part.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_part.q.out b/ql/src/test/results/clientpositive/annotate_stats_part.q.out
index 29ef214..bafc6de 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_part.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_part.q.out
@@ -441,73 +441,112 @@ PREHOOK: type: QUERY
POSTHOOK: query: explain select locid from loc_orc_n4 where locid>0 and year='2001'
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: loc_orc_n4
+ filterExpr: ((locid > 0) and (year = '2001')) (type: boolean)
+ Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (locid > 0) (type: boolean)
+ Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: locid (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- TableScan
- alias: loc_orc_n4
- filterExpr: ((locid > 0) and (year = '2001')) (type: boolean)
- Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: (locid > 0) (type: boolean)
- Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: locid (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE
- ListSink
+ ListSink
PREHOOK: query: explain select locid,year from loc_orc_n4 where locid>0 and year='2001'
PREHOOK: type: QUERY
POSTHOOK: query: explain select locid,year from loc_orc_n4 where locid>0 and year='2001'
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: loc_orc_n4
+ filterExpr: ((locid > 0) and (year = '2001')) (type: boolean)
+ Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (locid > 0) (type: boolean)
+ Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: locid (type: int), '2001' (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 7 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 7 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- TableScan
- alias: loc_orc_n4
- filterExpr: ((locid > 0) and (year = '2001')) (type: boolean)
- Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: (locid > 0) (type: boolean)
- Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: locid (type: int), '2001' (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 7 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE
- ListSink
+ ListSink
PREHOOK: query: explain select * from (select locid,year from loc_orc_n4) test where locid>0 and year='2001'
PREHOOK: type: QUERY
POSTHOOK: query: explain select * from (select locid,year from loc_orc_n4) test where locid>0 and year='2001'
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: loc_orc_n4
+ filterExpr: ((locid > 0) and (year = '2001')) (type: boolean)
+ Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (locid > 0) (type: boolean)
+ Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: locid (type: int), '2001' (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 7 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 7 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- TableScan
- alias: loc_orc_n4
- filterExpr: ((locid > 0) and (year = '2001')) (type: boolean)
- Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: (locid > 0) (type: boolean)
- Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: locid (type: int), '2001' (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 7 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE
- ListSink
+ ListSink
http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out b/ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out
index d12b5f6..2e7d796 100644
--- a/ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out
+++ b/ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out
@@ -746,25 +746,38 @@ PREHOOK: type: QUERY
POSTHOOK: query: EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: cbo_t2
+ filterExpr: (c_int = c_int) (type: boolean)
+ Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (c_int = c_int) (type: boolean)
+ Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
+ outputColumnNames: key, value, c_int, c_float, c_boolean, dt
+ Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- TableScan
- alias: cbo_t2
- filterExpr: (c_int = c_int) (type: boolean)
- Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: (c_int = c_int) (type: boolean)
- Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
- outputColumnNames: key, value, c_int, c_float, c_boolean, dt
- Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
- ListSink
+ ListSink
PREHOOK: query: -- c_int is not null
EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int)
@@ -773,25 +786,38 @@ POSTHOOK: query: -- c_int is not null
EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: cbo_t2
+ filterExpr: (c_int = (2 * c_int)) (type: boolean)
+ Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (c_int = (2 * c_int)) (type: boolean)
+ Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
+ outputColumnNames: key, value, c_int, c_float, c_boolean, dt
+ Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- TableScan
- alias: cbo_t2
- filterExpr: (c_int = (2 * c_int)) (type: boolean)
- Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: (c_int = (2 * c_int)) (type: boolean)
- Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
- outputColumnNames: key, value, c_int, c_float, c_boolean, dt
- Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
- ListSink
+ ListSink
PREHOOK: query: -- c_int is 0
EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int)
@@ -800,25 +826,38 @@ POSTHOOK: query: -- c_int is 0
EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: cbo_t2
+ filterExpr: (c_int = c_int) (type: boolean)
+ Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (c_int = c_int) (type: boolean)
+ Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
+ outputColumnNames: key, value, c_int, c_float, c_boolean, dt
+ Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- TableScan
- alias: cbo_t2
- filterExpr: (c_int = c_int) (type: boolean)
- Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: (c_int = c_int) (type: boolean)
- Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
- outputColumnNames: key, value, c_int, c_float, c_boolean, dt
- Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
- ListSink
+ ListSink
PREHOOK: query: -- c_int is not null
EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (NULL)
@@ -827,25 +866,38 @@ POSTHOOK: query: -- c_int is not null
EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (NULL)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: cbo_t2
+ filterExpr: (c_int = null) (type: boolean)
+ Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (c_int = null) (type: boolean)
+ Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string), null (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
+ outputColumnNames: key, value, c_int, c_float, c_boolean, dt
+ Statistics: Num rows: 10 Data size: 3624 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 3624 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- TableScan
- alias: cbo_t2
- filterExpr: (c_int = null) (type: boolean)
- Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: (c_int = null) (type: boolean)
- Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: string), value (type: string), null (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
- outputColumnNames: key, value, c_int, c_float, c_boolean, dt
- Statistics: Num rows: 10 Data size: 3624 Basic stats: COMPLETE Column stats: COMPLETE
- ListSink
+ ListSink
PREHOOK: query: -- rewrite to NULL
EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int)
@@ -854,25 +906,37 @@ POSTHOOK: query: -- rewrite to NULL
EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: cbo_t2
+ filterExpr: (c_int) IN (c_int, (2 * c_int)) (type: boolean)
+ Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (c_int) IN (c_int, (2 * c_int)) (type: boolean)
+ Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
+ outputColumnNames: key, value, c_int, c_float, c_boolean, dt
+ Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- TableScan
- alias: cbo_t2
- filterExpr: (c_int) IN (c_int, (2 * c_int)) (type: boolean)
- Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: (c_int) IN (c_int, (2 * c_int)) (type: boolean)
- Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
- outputColumnNames: key, value, c_int, c_float, c_boolean, dt
- Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
- ListSink
+ ListSink
PREHOOK: query: -- no rewrite
EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0)
@@ -881,25 +945,37 @@ POSTHOOK: query: -- no rewrite
EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: cbo_t2
+ filterExpr: (c_int) IN (c_int, 0) (type: boolean)
+ Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (c_int) IN (c_int, 0) (type: boolean)
+ Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
+ outputColumnNames: key, value, c_int, c_float, c_boolean, dt
+ Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- TableScan
- alias: cbo_t2
- filterExpr: (c_int) IN (c_int, 0) (type: boolean)
- Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: (c_int) IN (c_int, 0) (type: boolean)
- Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
- outputColumnNames: key, value, c_int, c_float, c_boolean, dt
- Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
- ListSink
+ ListSink
PREHOOK: query: -- no rewrite
http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/cbo_simple_select.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/cbo_simple_select.q.out b/ql/src/test/results/clientpositive/cbo_simple_select.q.out
index 588d924..33f0e71 100644
--- a/ql/src/test/results/clientpositive/cbo_simple_select.q.out
+++ b/ql/src/test/results/clientpositive/cbo_simple_select.q.out
@@ -746,25 +746,38 @@ PREHOOK: type: QUERY
POSTHOOK: query: EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: cbo_t2
+ filterExpr: (c_int = c_int) (type: boolean)
+ Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (c_int = c_int) (type: boolean)
+ Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- TableScan
- alias: cbo_t2
- filterExpr: (c_int = c_int) (type: boolean)
- Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: (c_int = c_int) (type: boolean)
- Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
- ListSink
+ ListSink
PREHOOK: query: -- c_int is not null
EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int)
@@ -773,25 +786,38 @@ POSTHOOK: query: -- c_int is not null
EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: cbo_t2
+ filterExpr: (c_int = (2 * c_int)) (type: boolean)
+ Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (c_int = (2 * c_int)) (type: boolean)
+ Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- TableScan
- alias: cbo_t2
- filterExpr: (c_int = (2 * c_int)) (type: boolean)
- Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: (c_int = (2 * c_int)) (type: boolean)
- Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
- ListSink
+ ListSink
PREHOOK: query: -- c_int is 0
EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int)
@@ -800,25 +826,38 @@ POSTHOOK: query: -- c_int is 0
EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: cbo_t2
+ filterExpr: (c_int = c_int) (type: boolean)
+ Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (c_int = c_int) (type: boolean)
+ Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- TableScan
- alias: cbo_t2
- filterExpr: (c_int = c_int) (type: boolean)
- Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: (c_int = c_int) (type: boolean)
- Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
- ListSink
+ ListSink
PREHOOK: query: -- c_int is not null
EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (NULL)
@@ -827,25 +866,38 @@ POSTHOOK: query: -- c_int is not null
EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (NULL)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: cbo_t2
+ filterExpr: (c_int = null) (type: boolean)
+ Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (c_int = null) (type: boolean)
+ Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string), null (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 10 Data size: 3624 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 3624 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- TableScan
- alias: cbo_t2
- filterExpr: (c_int = null) (type: boolean)
- Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: (c_int = null) (type: boolean)
- Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: string), value (type: string), null (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 10 Data size: 3624 Basic stats: COMPLETE Column stats: COMPLETE
- ListSink
+ ListSink
PREHOOK: query: -- rewrite to NULL
EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int)
@@ -854,25 +906,37 @@ POSTHOOK: query: -- rewrite to NULL
EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: cbo_t2
+ filterExpr: (c_int) IN (c_int, (2 * c_int)) (type: boolean)
+ Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (c_int) IN (c_int, (2 * c_int)) (type: boolean)
+ Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- TableScan
- alias: cbo_t2
- filterExpr: (c_int) IN (c_int, (2 * c_int)) (type: boolean)
- Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: (c_int) IN (c_int, (2 * c_int)) (type: boolean)
- Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
- ListSink
+ ListSink
PREHOOK: query: -- no rewrite
EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0)
@@ -881,25 +945,37 @@ POSTHOOK: query: -- no rewrite
EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: cbo_t2
+ filterExpr: (c_int) IN (c_int, 0) (type: boolean)
+ Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (c_int) IN (c_int, 0) (type: boolean)
+ Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- TableScan
- alias: cbo_t2
- filterExpr: (c_int) IN (c_int, 0) (type: boolean)
- Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: (c_int) IN (c_int, 0) (type: boolean)
- Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE
- ListSink
+ ListSink
PREHOOK: query: -- no rewrite
http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/filter_in_or_dup.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/filter_in_or_dup.q.out b/ql/src/test/results/clientpositive/filter_in_or_dup.q.out
index b821717..b50027d 100644
--- a/ql/src/test/results/clientpositive/filter_in_or_dup.q.out
+++ b/ql/src/test/results/clientpositive/filter_in_or_dup.q.out
@@ -11,25 +11,38 @@ WHERE (f.key = '1' OR f.key='2')
AND f.key IN ('1', '2')
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ filterExpr: (key) IN ('1', '2') (type: boolean)
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key) IN ('1', '2') (type: boolean)
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- TableScan
- alias: f
- filterExpr: (key) IN ('1', '2') (type: boolean)
- Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (key) IN ('1', '2') (type: boolean)
- Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: EXPLAIN
SELECT f.key
@@ -44,25 +57,38 @@ WHERE (f.key = '1' OR f.key = '2')
AND f.key IN ('1', '2', '3')
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ filterExpr: (key) IN ('1', '2') (type: boolean)
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key) IN ('1', '2') (type: boolean)
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- TableScan
- alias: f
- filterExpr: (key) IN ('1', '2') (type: boolean)
- Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (key) IN ('1', '2') (type: boolean)
- Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: EXPLAIN
SELECT f.key
@@ -77,23 +103,36 @@ WHERE (f.key = '1' OR f.key='2' OR f.key='3')
AND f.key IN ('1', '2')
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ filterExpr: (key) IN ('1', '2') (type: boolean)
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key) IN ('1', '2') (type: boolean)
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- TableScan
- alias: f
- filterExpr: (key) IN ('1', '2') (type: boolean)
- Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (key) IN ('1', '2') (type: boolean)
- Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ ListSink
http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/input42.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/input42.q.out b/ql/src/test/results/clientpositive/input42.q.out
index df98800..98c2fd0 100644
--- a/ql/src/test/results/clientpositive/input42.q.out
+++ b/ql/src/test/results/clientpositive/input42.q.out
@@ -1143,14 +1143,55 @@ OPTIMIZED SQL: SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds`, `hr`
FROM `default`.`srcpart`
WHERE `ds` = '2008-04-08' AND `key` < 200
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Partition Description:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ filterExpr: ((ds = '2008-04-08') and (UDFToDouble(key) < 200.0D)) (type: boolean)
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: (UDFToDouble(key) < 200.0D) (type: boolean)
+ Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types string:string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
Partition
+ base file name: hr=11
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -1197,7 +1238,9 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart
name: default.srcpart
+#### A masked pattern was here ####
Partition
+ base file name: hr=12
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -1244,21 +1287,15 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart
name: default.srcpart
+ Truncated Path -> Alias:
+ /srcpart/ds=2008-04-08/hr=11 [a]
+ /srcpart/ds=2008-04-08/hr=12 [a]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
Processor Tree:
- TableScan
- alias: a
- filterExpr: ((ds = '2008-04-08') and (UDFToDouble(key) < 200.0D)) (type: boolean)
- Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: (UDFToDouble(key) < 200.0D) (type: boolean)
- Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: select * from srcpart a where a.ds='2008-04-08' and key < 200
PREHOOK: type: QUERY
@@ -1660,14 +1697,54 @@ OPTIMIZED SQL: SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds`, `hr`
FROM `default`.`srcpart`
WHERE `ds` = '2008-04-08' AND RAND(100) < 0.1
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Partition Description:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: (rand(100) < 0.1D) (type: boolean)
+ Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types string:string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
Partition
+ base file name: hr=11
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -1714,7 +1791,9 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart
name: default.srcpart
+#### A masked pattern was here ####
Partition
+ base file name: hr=12
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -1761,20 +1840,15 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart
name: default.srcpart
+ Truncated Path -> Alias:
+ /srcpart/ds=2008-04-08/hr=11 [a]
+ /srcpart/ds=2008-04-08/hr=12 [a]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
Processor Tree:
- TableScan
- alias: a
- Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: (rand(100) < 0.1D) (type: boolean)
- Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: select * from srcpart a where a.ds='2008-04-08' and rand(100) < 0.1
PREHOOK: type: QUERY
http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/input_part9.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/input_part9.q.out b/ql/src/test/results/clientpositive/input_part9.q.out
index 9440167..5ecaeb1 100644
--- a/ql/src/test/results/clientpositive/input_part9.q.out
+++ b/ql/src/test/results/clientpositive/input_part9.q.out
@@ -8,14 +8,55 @@ OPTIMIZED SQL: SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds`, `hr`
FROM `default`.`srcpart`
WHERE `ds` = '2008-04-08' AND `key` IS NOT NULL
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Partition Description:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: x
+ filterExpr: ((ds = '2008-04-08') and key is not null) (type: boolean)
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types string:string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
Partition
+ base file name: hr=11
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -62,7 +103,9 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart
name: default.srcpart
+#### A masked pattern was here ####
Partition
+ base file name: hr=12
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -109,21 +152,15 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart
name: default.srcpart
+ Truncated Path -> Alias:
+ /srcpart/ds=2008-04-08/hr=11 [x]
+ /srcpart/ds=2008-04-08/hr=12 [x]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
Processor Tree:
- TableScan
- alias: x
- filterExpr: ((ds = '2008-04-08') and key is not null) (type: boolean)
- Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: SELECT x.* FROM SRCPART x WHERE key IS NOT NULL AND ds = '2008-04-08'
PREHOOK: type: QUERY
http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/list_bucket_dml_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_1.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_1.q.out
index d13edd6..226e778 100644
--- a/ql/src/test/results/clientpositive/list_bucket_dml_1.q.out
+++ b/ql/src/test/results/clientpositive/list_bucket_dml_1.q.out
@@ -409,14 +409,55 @@ OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `key`, `value`
FROM `default`.`list_bucketing_dynamic_part_n0`
WHERE `ds` = '2008-04-08' AND `hr` = '11' AND `key` = '484'
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Partition Description:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: list_bucketing_dynamic_part_n0
+ filterExpr: ((ds = '2008-04-08') and (hr = '11') and (key = '484')) (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: (key = '484') (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: '484' (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
Partition
+ base file name: key=484
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -463,21 +504,14 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.list_bucketing_dynamic_part_n0
name: default.list_bucketing_dynamic_part_n0
+ Truncated Path -> Alias:
+ /list_bucketing_dynamic_part_n0/ds=2008-04-08/hr=11/key=484 [list_bucketing_dynamic_part_n0]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
Processor Tree:
- TableScan
- alias: list_bucketing_dynamic_part_n0
- filterExpr: ((ds = '2008-04-08') and (hr = '11') and (key = '484')) (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: (key = '484') (type: boolean)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: '484' (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: select key, value from list_bucketing_dynamic_part_n0 where ds='2008-04-08' and hr='11' and key = "484"
PREHOOK: type: QUERY
http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out
index 44b712b..11f7dc1 100644
--- a/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out
+++ b/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out
@@ -290,14 +290,55 @@ OPTIMIZED SQL: SELECT `key`, CAST('val_466' AS STRING) AS `value`
FROM `default`.`list_bucketing_static_part_n3`
WHERE `ds` = '2008-04-08' AND `hr` = '11' AND `value` = 'val_466'
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Partition Description:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: list_bucketing_static_part_n3
+ filterExpr: ((ds = '2008-04-08') and (hr = '11') and (value = 'val_466')) (type: boolean)
+ Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: (value = 'val_466') (type: boolean)
+ Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), 'val_466' (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
Partition
+ base file name: value=val_466
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
partition values:
@@ -344,21 +385,14 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
name: default.list_bucketing_static_part_n3
name: default.list_bucketing_static_part_n3
+ Truncated Path -> Alias:
+ /list_bucketing_static_part_n3/ds=2008-04-08/hr=11/value=val_466 [list_bucketing_static_part_n3]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
Processor Tree:
- TableScan
- alias: list_bucketing_static_part_n3
- filterExpr: ((ds = '2008-04-08') and (hr = '11') and (value = 'val_466')) (type: boolean)
- Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: (value = 'val_466') (type: boolean)
- Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string), 'val_466' (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: select key, value from list_bucketing_static_part_n3 where ds='2008-04-08' and hr='11' and value = "val_466"
PREHOOK: type: QUERY
http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out
index f5e643e..f1c20e5 100644
--- a/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out
+++ b/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out
@@ -298,14 +298,55 @@ OPTIMIZED SQL: SELECT `col1`, CAST('466' AS STRING) AS `col2`, `col3`, CAST('val
FROM `default`.`list_bucketing_mul_col_n0`
WHERE `ds` = '2008-04-08' AND `hr` = '11' AND `col2` = '466' AND `col4` = 'val_466'
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Partition Description:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: list_bucketing_mul_col_n0
+ filterExpr: ((col2 = '466') and (col4 = 'val_466')) (type: boolean)
+ Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((col2 = '466') and (col4 = 'val_466')) (type: boolean)
+ Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: col1 (type: string), '466' (type: string), col3 (type: string), 'val_466' (type: string), col5 (type: string), '2008-04-08' (type: string), '11' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3,_col4,_col5,_col6
+ columns.types string:string:string:string:string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
Partition
+ base file name: col4=val_466
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
partition values:
@@ -352,21 +393,14 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
name: default.list_bucketing_mul_col_n0
name: default.list_bucketing_mul_col_n0
+ Truncated Path -> Alias:
+ /list_bucketing_mul_col_n0/ds=2008-04-08/hr=11/col2=466/col4=val_466 [list_bucketing_mul_col_n0]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
Processor Tree:
- TableScan
- alias: list_bucketing_mul_col_n0
- filterExpr: ((col2 = '466') and (col4 = 'val_466')) (type: boolean)
- Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: ((col2 = '466') and (col4 = 'val_466')) (type: boolean)
- Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: col1 (type: string), '466' (type: string), col3 (type: string), 'val_466' (type: string), col5 (type: string), '2008-04-08' (type: string), '11' (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
- Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: select * from list_bucketing_mul_col_n0
where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466"
@@ -395,14 +429,55 @@ OPTIMIZED SQL: SELECT `col1`, CAST('382' AS STRING) AS `col2`, `col3`, CAST('val
FROM `default`.`list_bucketing_mul_col_n0`
WHERE `ds` = '2008-04-08' AND `hr` = '11' AND `col2` = '382' AND `col4` = 'val_382'
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Partition Description:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: list_bucketing_mul_col_n0
+ filterExpr: ((col2 = '382') and (col4 = 'val_382')) (type: boolean)
+ Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((col2 = '382') and (col4 = 'val_382')) (type: boolean)
+ Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: col1 (type: string), '382' (type: string), col3 (type: string), 'val_382' (type: string), col5 (type: string), '2008-04-08' (type: string), '11' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3,_col4,_col5,_col6
+ columns.types string:string:string:string:string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
Partition
+ base file name: HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
partition values:
@@ -449,21 +524,14 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
name: default.list_bucketing_mul_col_n0
name: default.list_bucketing_mul_col_n0
+ Truncated Path -> Alias:
+ /list_bucketing_mul_col_n0/ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME [list_bucketing_mul_col_n0]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
Processor Tree:
- TableScan
- alias: list_bucketing_mul_col_n0
- filterExpr: ((col2 = '382') and (col4 = 'val_382')) (type: boolean)
- Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: ((col2 = '382') and (col4 = 'val_382')) (type: boolean)
- Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: col1 (type: string), '382' (type: string), col3 (type: string), 'val_382' (type: string), col5 (type: string), '2008-04-08' (type: string), '11' (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
- Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: select * from list_bucketing_mul_col_n0
where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382"
[2/3] hive git commit: HIVE-20210 : Simple Fetch optimizer should
lead to MapReduce when filter on non-partition column and conversion is
minimal (Jeffery Yan via Ashutosh Chauhan)
Posted by ha...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/list_bucket_dml_13.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_13.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_13.q.out
index a43a0b7..b9f658a 100644
--- a/ql/src/test/results/clientpositive/list_bucket_dml_13.q.out
+++ b/ql/src/test/results/clientpositive/list_bucket_dml_13.q.out
@@ -298,14 +298,55 @@ OPTIMIZED SQL: SELECT `col1`, CAST('466' AS STRING) AS `col2`, `col3`, CAST('val
FROM `default`.`list_bucketing_mul_col`
WHERE `ds` = '2008-04-08' AND `hr` = '2013-01-23+18:00:99' AND `col2` = '466' AND `col4` = 'val_466'
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Partition Description:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: list_bucketing_mul_col
+ filterExpr: ((col2 = '466') and (col4 = 'val_466')) (type: boolean)
+ Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((col2 = '466') and (col4 = 'val_466')) (type: boolean)
+ Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: col1 (type: string), '466' (type: string), col3 (type: string), 'val_466' (type: string), col5 (type: string), '2008-04-08' (type: string), '2013-01-23+18:00:99' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3,_col4,_col5,_col6
+ columns.types string:string:string:string:string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
Partition
+ base file name: col4=val_466
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
partition values:
@@ -352,21 +393,14 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
name: default.list_bucketing_mul_col
name: default.list_bucketing_mul_col
+ Truncated Path -> Alias:
+ /list_bucketing_mul_col/ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/col2=466/col4=val_466 [list_bucketing_mul_col]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
Processor Tree:
- TableScan
- alias: list_bucketing_mul_col
- filterExpr: ((col2 = '466') and (col4 = 'val_466')) (type: boolean)
- Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: ((col2 = '466') and (col4 = 'val_466')) (type: boolean)
- Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: col1 (type: string), '466' (type: string), col3 (type: string), 'val_466' (type: string), col5 (type: string), '2008-04-08' (type: string), '2013-01-23+18:00:99' (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
- Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: select * from list_bucketing_mul_col
where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466"
http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/list_bucket_dml_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_2.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_2.q.out
index 5750fdd..bd8e215 100644
--- a/ql/src/test/results/clientpositive/list_bucket_dml_2.q.out
+++ b/ql/src/test/results/clientpositive/list_bucket_dml_2.q.out
@@ -365,14 +365,55 @@ OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `key`, CAST('val_484' AS STRING)
FROM `default`.`list_bucketing_static_part_n4`
WHERE `ds` = '2008-04-08' AND `hr` = '11' AND `key` = '484' AND `value` = 'val_484'
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Partition Description:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: list_bucketing_static_part_n4
+ filterExpr: ((key = '484') and (value = 'val_484')) (type: boolean)
+ Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((key = '484') and (value = 'val_484')) (type: boolean)
+ Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: '484' (type: string), 'val_484' (type: string), '2008-04-08' (type: string), '11' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types string:string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
Partition
+ base file name: value=val_484
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
partition values:
@@ -419,21 +460,14 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
name: default.list_bucketing_static_part_n4
name: default.list_bucketing_static_part_n4
+ Truncated Path -> Alias:
+ /list_bucketing_static_part_n4/ds=2008-04-08/hr=11/key=484/value=val_484 [list_bucketing_static_part_n4]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
Processor Tree:
- TableScan
- alias: list_bucketing_static_part_n4
- filterExpr: ((key = '484') and (value = 'val_484')) (type: boolean)
- Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: ((key = '484') and (value = 'val_484')) (type: boolean)
- Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: '484' (type: string), 'val_484' (type: string), '2008-04-08' (type: string), '11' (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: select * from list_bucketing_static_part_n4 where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484'
PREHOOK: type: QUERY
http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/list_bucket_dml_3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_3.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_3.q.out
index affbdf5..5ba8948 100644
--- a/ql/src/test/results/clientpositive/list_bucket_dml_3.q.out
+++ b/ql/src/test/results/clientpositive/list_bucket_dml_3.q.out
@@ -357,14 +357,55 @@ OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `key`, `value`
FROM `default`.`list_bucketing_static_part_n1`
WHERE `ds` = '2008-04-08' AND `hr` = '11' AND `key` = '484'
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Partition Description:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: list_bucketing_static_part_n1
+ filterExpr: ((ds = '2008-04-08') and (hr = '11') and (key = '484')) (type: boolean)
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: (key = '484') (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: '484' (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
Partition
+ base file name: key=484
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -411,21 +452,14 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.list_bucketing_static_part_n1
name: default.list_bucketing_static_part_n1
+ Truncated Path -> Alias:
+ /list_bucketing_static_part_n1/ds=2008-04-08/hr=11/key=484 [list_bucketing_static_part_n1]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
Processor Tree:
- TableScan
- alias: list_bucketing_static_part_n1
- filterExpr: ((ds = '2008-04-08') and (hr = '11') and (key = '484')) (type: boolean)
- Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: (key = '484') (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: '484' (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: select key, value from list_bucketing_static_part_n1 where ds='2008-04-08' and hr='11' and key = "484"
PREHOOK: type: QUERY
http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/list_bucket_dml_4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_4.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_4.q.out
index 4ddd112..520d48e 100644
--- a/ql/src/test/results/clientpositive/list_bucket_dml_4.q.out
+++ b/ql/src/test/results/clientpositive/list_bucket_dml_4.q.out
@@ -808,14 +808,55 @@ OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `key`, CAST('val_484' AS STRING)
FROM `default`.`list_bucketing_static_part_n2`
WHERE `ds` = '2008-04-08' AND `hr` = '11' AND `key` = '484' AND `value` = 'val_484'
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Partition Description:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: list_bucketing_static_part_n2
+ filterExpr: ((key = '484') and (value = 'val_484')) (type: boolean)
+ Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((key = '484') and (value = 'val_484')) (type: boolean)
+ Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: '484' (type: string), 'val_484' (type: string), '2008-04-08' (type: string), '11' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types string:string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
Partition
+ base file name: value=val_484
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
partition values:
@@ -862,21 +903,14 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
name: default.list_bucketing_static_part_n2
name: default.list_bucketing_static_part_n2
+ Truncated Path -> Alias:
+ /list_bucketing_static_part_n2/ds=2008-04-08/hr=11/key=484/value=val_484 [list_bucketing_static_part_n2]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
Processor Tree:
- TableScan
- alias: list_bucketing_static_part_n2
- filterExpr: ((key = '484') and (value = 'val_484')) (type: boolean)
- Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: ((key = '484') and (value = 'val_484')) (type: boolean)
- Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: '484' (type: string), 'val_484' (type: string), '2008-04-08' (type: string), '11' (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: select * from list_bucketing_static_part_n2 where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484'
PREHOOK: type: QUERY
http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/list_bucket_dml_5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_5.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_5.q.out
index 06e2a45..47c9e24 100644
--- a/ql/src/test/results/clientpositive/list_bucket_dml_5.q.out
+++ b/ql/src/test/results/clientpositive/list_bucket_dml_5.q.out
@@ -414,14 +414,55 @@ OPTIMIZED SQL: SELECT CAST('103' AS STRING) AS `key`, CAST('val_103' AS STRING)
FROM `default`.`list_bucketing_dynamic_part_n1`
WHERE `ds` = '2008-04-08' AND `key` = '103' AND `value` = 'val_103'
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Partition Description:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: list_bucketing_dynamic_part_n1
+ filterExpr: ((key = '103') and (value = 'val_103')) (type: boolean)
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((key = '103') and (value = 'val_103')) (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: '103' (type: string), 'val_103' (type: string), '2008-04-08' (type: string), hr (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types string:string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
Partition
+ base file name: value=val_103
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -468,7 +509,9 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.list_bucketing_dynamic_part_n1
name: default.list_bucketing_dynamic_part_n1
+#### A masked pattern was here ####
Partition
+ base file name: value=val_103
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -515,21 +558,15 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.list_bucketing_dynamic_part_n1
name: default.list_bucketing_dynamic_part_n1
+ Truncated Path -> Alias:
+ /list_bucketing_dynamic_part_n1/ds=2008-04-08/hr=11/key=103/value=val_103 [list_bucketing_dynamic_part_n1]
+ /list_bucketing_dynamic_part_n1/ds=2008-04-08/hr=12/key=103/value=val_103 [list_bucketing_dynamic_part_n1]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
Processor Tree:
- TableScan
- alias: list_bucketing_dynamic_part_n1
- filterExpr: ((key = '103') and (value = 'val_103')) (type: boolean)
- Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: ((key = '103') and (value = 'val_103')) (type: boolean)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: '103' (type: string), 'val_103' (type: string), '2008-04-08' (type: string), hr (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: select key, value, ds, hr from list_bucketing_dynamic_part_n1 where ds='2008-04-08' and key = "103" and value ="val_103"
PREHOOK: type: QUERY
http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/list_bucket_dml_6.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_6.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_6.q.out
index a3089a7..aad1458 100644
--- a/ql/src/test/results/clientpositive/list_bucket_dml_6.q.out
+++ b/ql/src/test/results/clientpositive/list_bucket_dml_6.q.out
@@ -904,14 +904,55 @@ OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `key`, CAST('val_484' AS STRING)
FROM `default`.`list_bucketing_dynamic_part_n3`
WHERE `key` = '484' AND `value` = 'val_484'
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Partition Description:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: list_bucketing_dynamic_part_n3
+ filterExpr: ((key = '484') and (value = 'val_484')) (type: boolean)
+ Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((key = '484') and (value = 'val_484')) (type: boolean)
+ Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: '484' (type: string), 'val_484' (type: string), ds (type: string), hr (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types string:string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
Partition
+ base file name: hr=a1
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
partition values:
@@ -958,7 +999,9 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
name: default.list_bucketing_dynamic_part_n3
name: default.list_bucketing_dynamic_part_n3
+#### A masked pattern was here ####
Partition
+ base file name: value=val_484
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
partition values:
@@ -1005,21 +1048,15 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
name: default.list_bucketing_dynamic_part_n3
name: default.list_bucketing_dynamic_part_n3
+ Truncated Path -> Alias:
+ /list_bucketing_dynamic_part_n3/ds=2008-04-08/hr=a1 [list_bucketing_dynamic_part_n3]
+ /list_bucketing_dynamic_part_n3/ds=2008-04-08/hr=b1/key=484/value=val_484 [list_bucketing_dynamic_part_n3]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
Processor Tree:
- TableScan
- alias: list_bucketing_dynamic_part_n3
- filterExpr: ((key = '484') and (value = 'val_484')) (type: boolean)
- Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: ((key = '484') and (value = 'val_484')) (type: boolean)
- Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: '484' (type: string), 'val_484' (type: string), ds (type: string), hr (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: select * from list_bucketing_dynamic_part_n3 where key = '484' and value = 'val_484'
PREHOOK: type: QUERY
http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/list_bucket_dml_7.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_7.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_7.q.out
index faef7a9..7232e79 100644
--- a/ql/src/test/results/clientpositive/list_bucket_dml_7.q.out
+++ b/ql/src/test/results/clientpositive/list_bucket_dml_7.q.out
@@ -904,14 +904,55 @@ OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `key`, CAST('val_484' AS STRING)
FROM `default`.`list_bucketing_dynamic_part`
WHERE `key` = '484' AND `value` = 'val_484'
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Partition Description:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: list_bucketing_dynamic_part
+ filterExpr: ((key = '484') and (value = 'val_484')) (type: boolean)
+ Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((key = '484') and (value = 'val_484')) (type: boolean)
+ Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: '484' (type: string), 'val_484' (type: string), ds (type: string), hr (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types string:string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
Partition
+ base file name: hr=a1
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
partition values:
@@ -958,7 +999,9 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
name: default.list_bucketing_dynamic_part
name: default.list_bucketing_dynamic_part
+#### A masked pattern was here ####
Partition
+ base file name: hr=b1
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
partition values:
@@ -1005,21 +1048,15 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
name: default.list_bucketing_dynamic_part
name: default.list_bucketing_dynamic_part
+ Truncated Path -> Alias:
+ /list_bucketing_dynamic_part/ds=2008-04-08/hr=a1 [list_bucketing_dynamic_part]
+ /list_bucketing_dynamic_part/ds=2008-04-08/hr=b1 [list_bucketing_dynamic_part]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
Processor Tree:
- TableScan
- alias: list_bucketing_dynamic_part
- filterExpr: ((key = '484') and (value = 'val_484')) (type: boolean)
- Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: ((key = '484') and (value = 'val_484')) (type: boolean)
- Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: '484' (type: string), 'val_484' (type: string), ds (type: string), hr (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484'
PREHOOK: type: QUERY
http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out
index eac6407..337f9dc 100644
--- a/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out
+++ b/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out
@@ -464,14 +464,55 @@ OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `key`, CAST('val_484' AS STRING)
FROM `default`.`list_bucketing_dynamic_part_n2`
WHERE `key` = '484' AND `value` = 'val_484'
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Partition Description:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: list_bucketing_dynamic_part_n2
+ filterExpr: ((key = '484') and (value = 'val_484')) (type: boolean)
+ Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((key = '484') and (value = 'val_484')) (type: boolean)
+ Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: '484' (type: string), 'val_484' (type: string), ds (type: string), hr (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types string:string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
Partition
+ base file name: hr=a1
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
partition values:
@@ -518,7 +559,9 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
name: default.list_bucketing_dynamic_part_n2
name: default.list_bucketing_dynamic_part_n2
+#### A masked pattern was here ####
Partition
+ base file name: hr=b1
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
partition values:
@@ -564,21 +607,15 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
name: default.list_bucketing_dynamic_part_n2
name: default.list_bucketing_dynamic_part_n2
+ Truncated Path -> Alias:
+ /list_bucketing_dynamic_part_n2/ds=2008-04-08/hr=a1 [list_bucketing_dynamic_part_n2]
+ /list_bucketing_dynamic_part_n2/ds=2008-04-08/hr=b1 [list_bucketing_dynamic_part_n2]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
Processor Tree:
- TableScan
- alias: list_bucketing_dynamic_part_n2
- filterExpr: ((key = '484') and (value = 'val_484')) (type: boolean)
- Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: ((key = '484') and (value = 'val_484')) (type: boolean)
- Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: '484' (type: string), 'val_484' (type: string), ds (type: string), hr (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: select * from list_bucketing_dynamic_part_n2 where key = '484' and value = 'val_484'
PREHOOK: type: QUERY
http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/list_bucket_dml_9.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_9.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_9.q.out
index 1e1fc13..fbd4fde 100644
--- a/ql/src/test/results/clientpositive/list_bucket_dml_9.q.out
+++ b/ql/src/test/results/clientpositive/list_bucket_dml_9.q.out
@@ -808,14 +808,55 @@ OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `key`, CAST('val_484' AS STRING)
FROM `default`.`list_bucketing_static_part_n0`
WHERE `ds` = '2008-04-08' AND `hr` = '11' AND `key` = '484' AND `value` = 'val_484'
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Partition Description:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: list_bucketing_static_part_n0
+ filterExpr: ((key = '484') and (value = 'val_484')) (type: boolean)
+ Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((key = '484') and (value = 'val_484')) (type: boolean)
+ Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: '484' (type: string), 'val_484' (type: string), '2008-04-08' (type: string), '11' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types string:string:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
Partition
+ base file name: key=484
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
partition values:
@@ -862,21 +903,14 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
name: default.list_bucketing_static_part_n0
name: default.list_bucketing_static_part_n0
+ Truncated Path -> Alias:
+ /list_bucketing_static_part_n0/ds=2008-04-08/hr=11/key=484 [list_bucketing_static_part_n0]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
Processor Tree:
- TableScan
- alias: list_bucketing_static_part_n0
- filterExpr: ((key = '484') and (value = 'val_484')) (type: boolean)
- Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: ((key = '484') and (value = 'val_484')) (type: boolean)
- Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: '484' (type: string), 'val_484' (type: string), '2008-04-08' (type: string), '11' (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: select * from list_bucketing_static_part_n0 where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484'
PREHOOK: type: QUERY
http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out
index a09e007..e324cab 100644
--- a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out
+++ b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out
@@ -84,14 +84,55 @@ OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `key`
FROM `default`.`fact_daily`
WHERE `ds` = '1' AND `hr` = '4' AND `key` = '484' AND `value` = 'val_484'
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Partition Description:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: fact_daily
+ filterExpr: ((key = '484') and (value = 'val_484')) (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((key = '484') and (value = 'val_484')) (type: boolean)
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: '484' (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0
+ columns.types string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
Partition
+ base file name: value=val_484
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -138,21 +179,14 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.fact_daily
name: default.fact_daily
+ Truncated Path -> Alias:
+ /fact_daily/ds=1/hr=4/key=484/value=val_484 [fact_daily]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
Processor Tree:
- TableScan
- alias: fact_daily
- filterExpr: ((key = '484') and (value = 'val_484')) (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: ((key = '484') and (value = 'val_484')) (type: boolean)
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: '484' (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: SELECT key FROM fact_daily WHERE ( ds='1' and hr='4') and (key='484' and value= 'val_484')
PREHOOK: type: QUERY
@@ -173,14 +207,55 @@ OPTIMIZED SQL: SELECT CAST('238' AS STRING) AS `key`, CAST('val_238' AS STRING)
FROM `default`.`fact_daily`
WHERE `ds` = '1' AND `hr` = '4' AND `key` = '238' AND `value` = 'val_238'
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Partition Description:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: fact_daily
+ filterExpr: ((key = '238') and (value = 'val_238')) (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((key = '238') and (value = 'val_238')) (type: boolean)
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: '238' (type: string), 'val_238' (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
Partition
+ base file name: value=val_238
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -227,21 +302,14 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.fact_daily
name: default.fact_daily
+ Truncated Path -> Alias:
+ /fact_daily/ds=1/hr=4/key=238/value=val_238 [fact_daily]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
Processor Tree:
- TableScan
- alias: fact_daily
- filterExpr: ((key = '238') and (value = 'val_238')) (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: ((key = '238') and (value = 'val_238')) (type: boolean)
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: '238' (type: string), 'val_238' (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: SELECT key,value FROM fact_daily WHERE ( ds='1' and hr='4') and (key='238' and value= 'val_238')
PREHOOK: type: QUERY
@@ -263,14 +331,55 @@ OPTIMIZED SQL: SELECT `key`
FROM `default`.`fact_daily`
WHERE `ds` = '1' AND `hr` = '4' AND `value` = '3'
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Partition Description:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: fact_daily
+ filterExpr: ((ds = '1') and (hr = '4') and (value = '3')) (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: (value = '3') (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0
+ columns.types string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
Partition
+ base file name: HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -317,21 +426,14 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.fact_daily
name: default.fact_daily
+ Truncated Path -> Alias:
+ /fact_daily/ds=1/hr=4/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME [fact_daily]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
Processor Tree:
- TableScan
- alias: fact_daily
- filterExpr: ((ds = '1') and (hr = '4') and (value = '3')) (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: (value = '3') (type: boolean)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: SELECT key FROM fact_daily WHERE ( ds='1' and hr='4') and (value = "3")
PREHOOK: type: QUERY
@@ -351,14 +453,55 @@ OPTIMIZED SQL: SELECT CAST('495' AS STRING) AS `key`, `value`
FROM `default`.`fact_daily`
WHERE `ds` = '1' AND `hr` = '4' AND `key` = '495'
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Partition Description:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: fact_daily
+ filterExpr: ((ds = '1') and (hr = '4') and (key = '495')) (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: (key = '495') (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: '495' (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
Partition
+ base file name: HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -405,21 +548,14 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.fact_daily
name: default.fact_daily
+ Truncated Path -> Alias:
+ /fact_daily/ds=1/hr=4/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME [fact_daily]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
Processor Tree:
- TableScan
- alias: fact_daily
- filterExpr: ((ds = '1') and (hr = '4') and (key = '495')) (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: (key = '495') (type: boolean)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: '495' (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: SELECT key,value FROM fact_daily WHERE ( ds='1' and hr='4') and key = '369'
PREHOOK: type: QUERY
http://git-wip-us.apache.org/repos/asf/hive/blob/65f02d2f/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out
index 6217adb..98ad365 100644
--- a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out
+++ b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out
@@ -84,14 +84,104 @@ OPTIMIZED SQL: SELECT `key`, CAST('val_484' AS STRING) AS `value`
FROM `default`.`fact_daily_n2`
WHERE `ds` = '1' AND `hr` = '4' AND `value` = 'val_484'
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Partition Description:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: fact_daily_n2
+ filterExpr: ((ds = '1') and (hr = '4') and (value = 'val_484')) (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: (value = 'val_484') (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), 'val_484' (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 1
+ hr 4
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
+ bucket_count -1
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.fact_daily_n2
+ numFiles 3
+ numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
+ rawDataSize 5312
+ serialization.ddl struct fact_daily_n2 { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.fact_daily_n2
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct fact_daily_n2 { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.fact_daily_n2
+ name: default.fact_daily_n2
+#### A masked pattern was here ####
Partition
+ base file name: value=val_484
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -138,21 +228,15 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.fact_daily_n2
name: default.fact_daily_n2
+ Truncated Path -> Alias:
+ /fact_daily_n2/ds=1/hr=4/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME [fact_daily_n2]
+ /fact_daily_n2/ds=1/hr=4/key=484/value=val_484 [fact_daily_n2]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
Processor Tree:
- TableScan
- alias: fact_daily_n2
- filterExpr: ((ds = '1') and (hr = '4') and (value = 'val_484')) (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: (value = 'val_484') (type: boolean)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string), 'val_484' (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: SELECT key, value FROM fact_daily_n2 WHERE ds='1' and hr='4' and value= 'val_484'
PREHOOK: type: QUERY
@@ -173,14 +257,55 @@ OPTIMIZED SQL: SELECT CAST('406' AS STRING) AS `key`
FROM `default`.`fact_daily_n2`
WHERE `ds` = '1' AND `hr` = '4' AND `key` = '406'
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Partition Description:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: fact_daily_n2
+ filterExpr: ((ds = '1') and (hr = '4') and (key = '406')) (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: (key = '406') (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: '406' (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0
+ columns.types string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
Partition
+ base file name: HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -227,21 +352,14 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.fact_daily_n2
name: default.fact_daily_n2
+ Truncated Path -> Alias:
+ /fact_daily_n2/ds=1/hr=4/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME [fact_daily_n2]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
Processor Tree:
- TableScan
- alias: fact_daily_n2
- filterExpr: ((ds = '1') and (hr = '4') and (key = '406')) (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: (key = '406') (type: boolean)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: '406' (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: SELECT key, value FROM fact_daily_n2 WHERE ds='1' and hr='4' and key= '406'
PREHOOK: type: QUERY
@@ -265,14 +383,55 @@ OPTIMIZED SQL: SELECT `key`, `value`
FROM `default`.`fact_daily_n2`
WHERE `ds` = '1' AND `hr` = '4' AND (`key` = '484' AND `value` = 'val_484' OR `key` = '238' AND `value` = 'val_238')
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Partition Description:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: fact_daily_n2
+ filterExpr: ((ds = '1') and (hr = '4') and (((key = '484') and (value = 'val_484')) or ((key = '238') and (value = 'val_238')))) (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: (((key = '238') and (value = 'val_238')) or ((key = '484') and (value = 'val_484'))) (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
Partition
+ base file name: value=val_238
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -319,21 +478,64 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.fact_daily_n2
name: default.fact_daily_n2
+#### A masked pattern was here ####
+ Partition
+ base file name: value=val_484
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 1
+ hr 4
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
+ bucket_count -1
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.fact_daily_n2
+ numFiles 3
+ numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
+ rawDataSize 5312
+ serialization.ddl struct fact_daily_n2 { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.fact_daily_n2
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct fact_daily_n2 { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.fact_daily_n2
+ name: default.fact_daily_n2
+ Truncated Path -> Alias:
+ /fact_daily_n2/ds=1/hr=4/key=238/value=val_238 [fact_daily_n2]
+ /fact_daily_n2/ds=1/hr=4/key=484/value=val_484 [fact_daily_n2]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
Processor Tree:
- TableScan
- alias: fact_daily_n2
- filterExpr: ((ds = '1') and (hr = '4') and (((key = '484') and (value = 'val_484')) or ((key = '238') and (value = 'val_238')))) (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: (((key = '238') and (value = 'val_238')) or ((key = '484') and (value = 'val_484'))) (type: boolean)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ ListSink
PREHOOK: query: SELECT key, value FROM fact_daily_n2 WHERE ds='1' and hr='4' and ( (key='484' and value ='val_484') or (key='238' and value= 'val_238'))
PREHOOK: type: QUERY