You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2014/09/29 21:32:12 UTC
svn commit: r1628269 - in /hive/branches/branch-0.14/ql/src/test:
queries/clientpositive/dynamic_partition_pruning_2.q
results/clientpositive/tez/dynamic_partition_pruning_2.q.out
Author: gunther
Date: Mon Sep 29 19:32:12 2014
New Revision: 1628269
URL: http://svn.apache.org/r1628269
Log:
HIVE-8294: Dynamic partition pruning fails with IndexOutOfBoundsException (Gunther Hagleitner, reviewed by Prasanth J)
Modified:
hive/branches/branch-0.14/ql/src/test/queries/clientpositive/dynamic_partition_pruning_2.q
hive/branches/branch-0.14/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning_2.q.out
Modified: hive/branches/branch-0.14/ql/src/test/queries/clientpositive/dynamic_partition_pruning_2.q
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/test/queries/clientpositive/dynamic_partition_pruning_2.q?rev=1628269&r1=1628268&r2=1628269&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/test/queries/clientpositive/dynamic_partition_pruning_2.q (original)
+++ hive/branches/branch-0.14/ql/src/test/queries/clientpositive/dynamic_partition_pruning_2.q Mon Sep 29 19:32:12 2014
@@ -19,6 +19,9 @@ load data local inpath '../../data/files
load data local inpath '../../data/files/agg_01-p2.txt' into table agg_01 partition (dim_shops_id=2);
load data local inpath '../../data/files/agg_01-p3.txt' into table agg_01 partition (dim_shops_id=3);
+analyze table dim_shops compute statistics;
+analyze table agg_01 partition (dim_shops_id) compute statistics;
+
select * from dim_shops;
select * from agg_01;
@@ -40,6 +43,73 @@ d1.label in ('foo', 'bar')
GROUP BY d1.label
ORDER BY d1.label;
+set hive.tez.dynamic.partition.pruning.max.event.size=1000000;
+set hive.tez.dynamic.partition.pruning.max.data.size=1;
+
+EXPLAIN SELECT d1.label, count(*), sum(agg.amount)
+FROM agg_01 agg,
+dim_shops d1
+WHERE agg.dim_shops_id = d1.id
+and
+d1.label in ('foo', 'bar')
+GROUP BY d1.label
+ORDER BY d1.label;
+
+SELECT d1.label, count(*), sum(agg.amount)
+FROM agg_01 agg,
+dim_shops d1
+WHERE agg.dim_shops_id = d1.id
+and
+d1.label in ('foo', 'bar')
+GROUP BY d1.label
+ORDER BY d1.label;
+
+EXPLAIN SELECT d1.label
+FROM agg_01 agg,
+dim_shops d1
+WHERE agg.dim_shops_id = d1.id;
+
+SELECT d1.label
+FROM agg_01 agg,
+dim_shops d1
+WHERE agg.dim_shops_id = d1.id;
+
+EXPLAIN SELECT agg.amount
+FROM agg_01 agg,
+dim_shops d1
+WHERE agg.dim_shops_id = d1.id
+and agg.dim_shops_id = 1;
+
+SELECT agg.amount
+FROM agg_01 agg,
+dim_shops d1
+WHERE agg.dim_shops_id = d1.id
+and agg.dim_shops_id = 1;
+
+set hive.tez.dynamic.partition.pruning.max.event.size=1;
+set hive.tez.dynamic.partition.pruning.max.data.size=1000000;
+
+EXPLAIN SELECT d1.label, count(*), sum(agg.amount)
+FROM agg_01 agg,
+dim_shops d1
+WHERE agg.dim_shops_id = d1.id
+and
+d1.label in ('foo', 'bar')
+GROUP BY d1.label
+ORDER BY d1.label;
+
+SELECT d1.label, count(*), sum(agg.amount)
+FROM agg_01 agg,
+dim_shops d1
+WHERE agg.dim_shops_id = d1.id
+and
+d1.label in ('foo', 'bar')
+GROUP BY d1.label
+ORDER BY d1.label;
+
+set hive.tez.dynamic.partition.pruning.max.event.size=100000;
+set hive.tez.dynamic.partition.pruning.max.data.size=1000000;
+
EXPLAIN
SELECT amount FROM agg_01, dim_shops WHERE dim_shops_id = id AND label = 'foo'
UNION ALL
@@ -47,4 +117,4 @@ SELECT amount FROM agg_01, dim_shops WHE
SELECT amount FROM agg_01, dim_shops WHERE dim_shops_id = id AND label = 'foo'
UNION ALL
-SELECT amount FROM agg_01, dim_shops WHERE dim_shops_id = id AND label = 'bar';
\ No newline at end of file
+SELECT amount FROM agg_01, dim_shops WHERE dim_shops_id = id AND label = 'bar';
Modified: hive/branches/branch-0.14/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning_2.q.out
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning_2.q.out?rev=1628269&r1=1628268&r2=1628269&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning_2.q.out (original)
+++ hive/branches/branch-0.14/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning_2.q.out Mon Sep 29 19:32:12 2014
@@ -67,6 +67,34 @@ POSTHOOK: query: load data local inpath
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@agg_01@dim_shops_id=3
+PREHOOK: query: analyze table dim_shops compute statistics
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dim_shops
+PREHOOK: Output: default@dim_shops
+POSTHOOK: query: analyze table dim_shops compute statistics
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dim_shops
+POSTHOOK: Output: default@dim_shops
+PREHOOK: query: analyze table agg_01 partition (dim_shops_id) compute statistics
+PREHOOK: type: QUERY
+PREHOOK: Input: default@agg_01
+PREHOOK: Input: default@agg_01@dim_shops_id=1
+PREHOOK: Input: default@agg_01@dim_shops_id=2
+PREHOOK: Input: default@agg_01@dim_shops_id=3
+PREHOOK: Output: default@agg_01
+PREHOOK: Output: default@agg_01@dim_shops_id=1
+PREHOOK: Output: default@agg_01@dim_shops_id=2
+PREHOOK: Output: default@agg_01@dim_shops_id=3
+POSTHOOK: query: analyze table agg_01 partition (dim_shops_id) compute statistics
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@agg_01
+POSTHOOK: Input: default@agg_01@dim_shops_id=1
+POSTHOOK: Input: default@agg_01@dim_shops_id=2
+POSTHOOK: Input: default@agg_01@dim_shops_id=3
+POSTHOOK: Output: default@agg_01
+POSTHOOK: Output: default@agg_01@dim_shops_id=1
+POSTHOOK: Output: default@agg_01@dim_shops_id=2
+POSTHOOK: Output: default@agg_01@dim_shops_id=3
PREHOOK: query: select * from dim_shops
PREHOOK: type: QUERY
PREHOOK: Input: default@dim_shops
@@ -137,29 +165,29 @@ STAGE PLANS:
TableScan
alias: d1
filterExpr: (id is not null and (label) IN ('foo', 'bar')) (type: boolean)
- Statistics: Num rows: 0 Data size: 18 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (id is not null and (label) IN ('foo', 'bar')) (type: boolean)
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: id (type: int)
sort order: +
Map-reduce partition columns: id (type: int)
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
value expressions: label (type: string)
Select Operator
expressions: id (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
Group By Operator
keys: _col0 (type: int)
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
Dynamic Partitioning Event Operator
Target Input: agg
Partition key expr: dim_shops_id
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
Target column: dim_shops_id
Target Vertex: Map 2
Map 2
@@ -167,7 +195,7 @@ STAGE PLANS:
TableScan
alias: agg
filterExpr: dim_shops_id is not null (type: boolean)
- Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 9 Data size: 27 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -180,25 +208,552 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col5, _col6
input vertices:
1 Map 1
- Statistics: Num rows: 0 Data size: 39 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((_col1 = _col5) and (_col6) IN ('foo', 'bar')) (type: boolean)
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col6 (type: string), _col0 (type: decimal(10,0))
outputColumnNames: _col6, _col0
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(), sum(_col0)
keys: _col6 (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint), _col2 (type: decimal(20,0))
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), sum(VALUE._col1)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: decimal(20,0))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint), _col2 (type: decimal(20,0))
+ Reducer 4
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: decimal(20,0))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT d1.label, count(*), sum(agg.amount)
+FROM agg_01 agg,
+dim_shops d1
+WHERE agg.dim_shops_id = d1.id
+and
+d1.label in ('foo', 'bar')
+GROUP BY d1.label
+ORDER BY d1.label
+PREHOOK: type: QUERY
+PREHOOK: Input: default@agg_01
+PREHOOK: Input: default@agg_01@dim_shops_id=1
+PREHOOK: Input: default@agg_01@dim_shops_id=2
+PREHOOK: Input: default@agg_01@dim_shops_id=3
+PREHOOK: Input: default@dim_shops
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT d1.label, count(*), sum(agg.amount)
+FROM agg_01 agg,
+dim_shops d1
+WHERE agg.dim_shops_id = d1.id
+and
+d1.label in ('foo', 'bar')
+GROUP BY d1.label
+ORDER BY d1.label
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@agg_01
+POSTHOOK: Input: default@agg_01@dim_shops_id=1
+POSTHOOK: Input: default@agg_01@dim_shops_id=2
+POSTHOOK: Input: default@agg_01@dim_shops_id=3
+POSTHOOK: Input: default@dim_shops
+#### A masked pattern was here ####
+bar 3 15
+foo 3 6
+PREHOOK: query: EXPLAIN SELECT d1.label, count(*), sum(agg.amount)
+FROM agg_01 agg,
+dim_shops d1
+WHERE agg.dim_shops_id = d1.id
+and
+d1.label in ('foo', 'bar')
+GROUP BY d1.label
+ORDER BY d1.label
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT d1.label, count(*), sum(agg.amount)
+FROM agg_01 agg,
+dim_shops d1
+WHERE agg.dim_shops_id = d1.id
+and
+d1.label in ('foo', 'bar')
+GROUP BY d1.label
+ORDER BY d1.label
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Map 2 <- Map 1 (BROADCAST_EDGE)
+ Reducer 3 <- Map 2 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: d1
+ filterExpr: (id is not null and (label) IN ('foo', 'bar')) (type: boolean)
+ Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (id is not null and (label) IN ('foo', 'bar')) (type: boolean)
+ Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: id (type: int)
+ sort order: +
+ Map-reduce partition columns: id (type: int)
+ Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ value expressions: label (type: string)
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: agg
+ filterExpr: dim_shops_id is not null (type: boolean)
+ Statistics: Num rows: 9 Data size: 27 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {amount} {dim_shops_id}
+ 1 {id} {label}
+ keys:
+ 0 dim_shops_id (type: int)
+ 1 id (type: int)
+ outputColumnNames: _col0, _col1, _col5, _col6
+ input vertices:
+ 1 Map 1
+ Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col1 = _col5) and (_col6) IN ('foo', 'bar')) (type: boolean)
+ Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col6 (type: string), _col0 (type: decimal(10,0))
+ outputColumnNames: _col6, _col0
+ Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(), sum(_col0)
+ keys: _col6 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint), _col2 (type: decimal(20,0))
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), sum(VALUE._col1)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: decimal(20,0))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint), _col2 (type: decimal(20,0))
+ Reducer 4
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: decimal(20,0))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT d1.label, count(*), sum(agg.amount)
+FROM agg_01 agg,
+dim_shops d1
+WHERE agg.dim_shops_id = d1.id
+and
+d1.label in ('foo', 'bar')
+GROUP BY d1.label
+ORDER BY d1.label
+PREHOOK: type: QUERY
+PREHOOK: Input: default@agg_01
+PREHOOK: Input: default@agg_01@dim_shops_id=1
+PREHOOK: Input: default@agg_01@dim_shops_id=2
+PREHOOK: Input: default@agg_01@dim_shops_id=3
+PREHOOK: Input: default@dim_shops
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT d1.label, count(*), sum(agg.amount)
+FROM agg_01 agg,
+dim_shops d1
+WHERE agg.dim_shops_id = d1.id
+and
+d1.label in ('foo', 'bar')
+GROUP BY d1.label
+ORDER BY d1.label
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@agg_01
+POSTHOOK: Input: default@agg_01@dim_shops_id=1
+POSTHOOK: Input: default@agg_01@dim_shops_id=2
+POSTHOOK: Input: default@agg_01@dim_shops_id=3
+POSTHOOK: Input: default@dim_shops
+#### A masked pattern was here ####
+bar 3 15
+foo 3 6
+PREHOOK: query: EXPLAIN SELECT d1.label
+FROM agg_01 agg,
+dim_shops d1
+WHERE agg.dim_shops_id = d1.id
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT d1.label
+FROM agg_01 agg,
+dim_shops d1
+WHERE agg.dim_shops_id = d1.id
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Map 2 <- Map 1 (BROADCAST_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: d1
+ filterExpr: id is not null (type: boolean)
+ Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: id is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: id (type: int)
+ sort order: +
+ Map-reduce partition columns: id (type: int)
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ value expressions: label (type: string)
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: agg
+ filterExpr: dim_shops_id is not null (type: boolean)
+ Statistics: Num rows: 9 Data size: 27 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {dim_shops_id}
+ 1 {id} {label}
+ keys:
+ 0 dim_shops_id (type: int)
+ 1 id (type: int)
+ outputColumnNames: _col1, _col5, _col6
+ input vertices:
+ 1 Map 1
+ Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col1 = _col5) (type: boolean)
+ Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col6 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT d1.label
+FROM agg_01 agg,
+dim_shops d1
+WHERE agg.dim_shops_id = d1.id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@agg_01
+PREHOOK: Input: default@agg_01@dim_shops_id=1
+PREHOOK: Input: default@agg_01@dim_shops_id=2
+PREHOOK: Input: default@agg_01@dim_shops_id=3
+PREHOOK: Input: default@dim_shops
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT d1.label
+FROM agg_01 agg,
+dim_shops d1
+WHERE agg.dim_shops_id = d1.id
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@agg_01
+POSTHOOK: Input: default@agg_01@dim_shops_id=1
+POSTHOOK: Input: default@agg_01@dim_shops_id=2
+POSTHOOK: Input: default@agg_01@dim_shops_id=3
+POSTHOOK: Input: default@dim_shops
+#### A masked pattern was here ####
+foo
+foo
+foo
+bar
+bar
+bar
+baz
+baz
+baz
+PREHOOK: query: EXPLAIN SELECT agg.amount
+FROM agg_01 agg,
+dim_shops d1
+WHERE agg.dim_shops_id = d1.id
+and agg.dim_shops_id = 1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT agg.amount
+FROM agg_01 agg,
+dim_shops d1
+WHERE agg.dim_shops_id = d1.id
+and agg.dim_shops_id = 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Map 2 <- Map 1 (BROADCAST_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: d1
+ filterExpr: (id is not null and (id = 1)) (type: boolean)
+ Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (id is not null and (id = 1)) (type: boolean)
+ Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: 1 (type: int)
+ sort order: +
+ Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: agg
+ filterExpr: (dim_shops_id is not null and (dim_shops_id = 1)) (type: boolean)
+ Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {amount} {dim_shops_id}
+ 1
+ keys:
+ 0 dim_shops_id (type: int)
+ 1 1 (type: int)
+ outputColumnNames: _col0, _col1
+ input vertices:
+ 1 Map 1
+ Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col1 = 1) and (_col1 = 1)) (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: decimal(10,0))
+ outputColumnNames: _col0
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ File Output Operator
+ compressed: false
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT agg.amount
+FROM agg_01 agg,
+dim_shops d1
+WHERE agg.dim_shops_id = d1.id
+and agg.dim_shops_id = 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@agg_01
+PREHOOK: Input: default@agg_01@dim_shops_id=1
+PREHOOK: Input: default@dim_shops
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT agg.amount
+FROM agg_01 agg,
+dim_shops d1
+WHERE agg.dim_shops_id = d1.id
+and agg.dim_shops_id = 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@agg_01
+POSTHOOK: Input: default@agg_01@dim_shops_id=1
+POSTHOOK: Input: default@dim_shops
+#### A masked pattern was here ####
+1
+2
+3
+PREHOOK: query: EXPLAIN SELECT d1.label, count(*), sum(agg.amount)
+FROM agg_01 agg,
+dim_shops d1
+WHERE agg.dim_shops_id = d1.id
+and
+d1.label in ('foo', 'bar')
+GROUP BY d1.label
+ORDER BY d1.label
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT d1.label, count(*), sum(agg.amount)
+FROM agg_01 agg,
+dim_shops d1
+WHERE agg.dim_shops_id = d1.id
+and
+d1.label in ('foo', 'bar')
+GROUP BY d1.label
+ORDER BY d1.label
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Map 2 <- Map 1 (BROADCAST_EDGE)
+ Reducer 3 <- Map 2 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: d1
+ filterExpr: (id is not null and (label) IN ('foo', 'bar')) (type: boolean)
+ Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (id is not null and (label) IN ('foo', 'bar')) (type: boolean)
+ Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: id (type: int)
+ sort order: +
+ Map-reduce partition columns: id (type: int)
+ Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ value expressions: label (type: string)
+ Select Operator
+ expressions: id (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ Dynamic Partitioning Event Operator
+ Target Input: agg
+ Partition key expr: dim_shops_id
+ Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ Target column: dim_shops_id
+ Target Vertex: Map 2
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: agg
+ filterExpr: dim_shops_id is not null (type: boolean)
+ Statistics: Num rows: 9 Data size: 27 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {amount} {dim_shops_id}
+ 1 {id} {label}
+ keys:
+ 0 dim_shops_id (type: int)
+ 1 id (type: int)
+ outputColumnNames: _col0, _col1, _col5, _col6
+ input vertices:
+ 1 Map 1
+ Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col1 = _col5) and (_col6) IN ('foo', 'bar')) (type: boolean)
+ Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col6 (type: string), _col0 (type: decimal(10,0))
+ outputColumnNames: _col6, _col0
+ Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(), sum(_col0)
+ keys: _col6 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint), _col2 (type: decimal(20,0))
Reducer 3
Reduce Operator Tree:
@@ -207,25 +762,25 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: decimal(20,0))
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint), _col2 (type: decimal(20,0))
Reducer 4
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: decimal(20,0))
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -296,28 +851,28 @@ STAGE PLANS:
TableScan
alias: dim_shops
filterExpr: (id is not null and (label = 'bar')) (type: boolean)
- Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (id is not null and (label = 'bar')) (type: boolean)
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: id (type: int)
sort order: +
Map-reduce partition columns: id (type: int)
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: id (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
Group By Operator
keys: _col0 (type: int)
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
Dynamic Partitioning Event Operator
Target Input: agg_01
Partition key expr: dim_shops_id
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
Target column: dim_shops_id
Target Vertex: Map 3
Map 2
@@ -325,28 +880,28 @@ STAGE PLANS:
TableScan
alias: dim_shops
filterExpr: (id is not null and (label = 'foo')) (type: boolean)
- Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (id is not null and (label = 'foo')) (type: boolean)
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: id (type: int)
sort order: +
Map-reduce partition columns: id (type: int)
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: id (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
Group By Operator
keys: _col0 (type: int)
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
Dynamic Partitioning Event Operator
Target Input: agg_01
Partition key expr: dim_shops_id
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
Target column: dim_shops_id
Target Vertex: Map 5
Map 3