You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2018/08/06 05:47:52 UTC
[7/9] hive git commit: HIVE-19097 : related equals and in operators
may cause inaccurate stats estimations (Zoltan Haindrich via Ashutosh
Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
index ba004e9..edbe8bd 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
@@ -6775,10 +6775,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date_hour_n2
- filterExpr: (((date = '2008-04-08') or (date = '2008-04-09')) and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean)
+ filterExpr: ((date) IN ('2008-04-08', '2008-04-09') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean)
Statistics: Num rows: 4 Data size: 2944 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (((date = '2008-04-08') or (date = '2008-04-09')) and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean)
+ predicate: ((UDFToDouble(hour) = 11.0D) and (date) IN ('2008-04-08', '2008-04-09') and ds is not null and hr is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 1472 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string), hr (type: string)
http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out b/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out
index 54d5be0..aa4d888 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out
@@ -275,7 +275,7 @@ STAGE PLANS:
native: true
predicateExpression: FilterTimestampColumnInList(col 0:timestamp, values [0001-01-02 16:00:00.0, 0002-02-03 16:00:00.0])
predicate: (ts) IN (TIMESTAMP'0001-01-01 00:00:00', TIMESTAMP'0002-02-02 00:00:00') (type: boolean)
- Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: ts (type: timestamp)
outputColumnNames: _col0
@@ -283,13 +283,13 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0]
- Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/mapjoin47.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/mapjoin47.q.out b/ql/src/test/results/clientpositive/mapjoin47.q.out
index cf29fa0..172d160 100644
--- a/ql/src/test/results/clientpositive/mapjoin47.q.out
+++ b/ql/src/test/results/clientpositive/mapjoin47.q.out
@@ -707,7 +707,7 @@ STAGE PLANS:
1
outputColumnNames: _col0, _col1, _col2, _col3
residual filter predicates: {(struct(_col0,_col2)) IN (const struct(100,100), const struct(101,101), const struct(102,102))}
- Statistics: Num rows: 3125 Data size: 60200 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9375 Data size: 180600 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 10
Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out
index 01f8951..d352509 100644
--- a/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out
+++ b/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out
@@ -30627,17 +30627,17 @@ STAGE PLANS:
Filter Operator
isSamplingPred: false
predicate: (struct(cint,cfloat)) IN (const struct(49,3.5), const struct(47,2.09), const struct(45,3.02)) (type: boolean)
- Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9216 Data size: 110592 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9216 Data size: 110592 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9216 Data size: 110592 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -30856,19 +30856,19 @@ STAGE PLANS:
Filter Operator
isSamplingPred: false
predicate: (cstring1) IN ('biology', 'history', 'topology') (type: boolean)
- Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
keys: cstring1 (type: string)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
null sort order: a
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
tag: -1
value expressions: _col1 (type: bigint)
auto parallelism: false
@@ -30933,11 +30933,11 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col1 (type: bigint), _col0 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 0
@@ -30966,7 +30966,7 @@ STAGE PLANS:
key expressions: _col1 (type: string)
null sort order: a
sort order: +
- Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE
tag: -1
value expressions: _col0 (type: bigint)
auto parallelism: false
@@ -31003,13 +31003,13 @@ STAGE PLANS:
Select Operator
expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey0 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/pcr.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/pcr.q.out b/ql/src/test/results/clientpositive/pcr.q.out
index 919b712..68a58bd 100644
--- a/ql/src/test/results/clientpositive/pcr.q.out
+++ b/ql/src/test/results/clientpositive/pcr.q.out
@@ -1460,11 +1460,6 @@ PREHOOK: query: explain extended select key, value from pcr_t1 where (ds='2000-0
PREHOOK: type: QUERY
POSTHOOK: query: explain extended select key, value from pcr_t1 where (ds='2000-04-08' or ds='2000-04-09') and key=14 order by key, value
POSTHOOK: type: QUERY
-OPTIMIZED SQL: SELECT CAST(14 AS INTEGER) AS `key`, `value`
-FROM (SELECT `value`
-FROM `default`.`pcr_t1`
-WHERE (`ds` = '2000-04-08' OR `ds` = '2000-04-09') AND `key` = 14
-ORDER BY `value`) AS `t1`
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -1475,7 +1470,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: pcr_t1
- filterExpr: (((ds = '2000-04-08') or (ds = '2000-04-09')) and (key = 14)) (type: boolean)
+ filterExpr: ((ds) IN ('2000-04-08', '2000-04-09') and (key = 14)) (type: boolean)
Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
@@ -1647,10 +1642,6 @@ PREHOOK: query: explain extended select key, value from pcr_t1 where ds='2000-04
PREHOOK: type: QUERY
POSTHOOK: query: explain extended select key, value from pcr_t1 where ds='2000-04-08' or ds='2000-04-09' order by key, value
POSTHOOK: type: QUERY
-OPTIMIZED SQL: SELECT `key`, `value`
-FROM `default`.`pcr_t1`
-WHERE `ds` = '2000-04-08' OR `ds` = '2000-04-09'
-ORDER BY `key`, `value`
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -1661,7 +1652,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: pcr_t1
- filterExpr: ((ds = '2000-04-08') or (ds = '2000-04-09')) (type: boolean)
+ filterExpr: (ds) IN ('2000-04-08', '2000-04-09') (type: boolean)
Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Select Operator
@@ -2162,10 +2153,6 @@ PREHOOK: query: explain extended select key, value, ds from pcr_t1 where (ds='20
PREHOOK: type: QUERY
POSTHOOK: query: explain extended select key, value, ds from pcr_t1 where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2) order by key, value, ds
POSTHOOK: type: QUERY
-OPTIMIZED SQL: SELECT `key`, `value`, `ds`
-FROM `default`.`pcr_t1`
-WHERE `ds` = '2000-04-08' AND `key` = 1 OR `ds` = '2000-04-09' AND `key` = 2
-ORDER BY `key`, `value`, `ds`
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -2176,22 +2163,22 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: pcr_t1
- filterExpr: (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2))) (type: boolean)
+ filterExpr: ((struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) and (struct(ds)) IN (struct('2000-04-08'), struct('2000-04-09'))) (type: boolean)
Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2))) (type: boolean)
- Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
+ predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean)
+ Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: int), value (type: string), ds (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
null sort order: aaa
sort order: +++
- Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
tag: -1
auto parallelism: false
Execution mode: vectorized
@@ -2302,13 +2289,13 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -4919,11 +4906,6 @@ PREHOOK: query: explain extended select key, value, ds, hr from srcpart where ds
PREHOOK: type: QUERY
POSTHOOK: query: explain extended select key, value, ds, hr from srcpart where ds='2008-04-08' and (hr='11' or hr='12') and key=11 order by key, ds, hr
POSTHOOK: type: QUERY
-OPTIMIZED SQL: SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds`, `hr`
-FROM (SELECT `key`, `value`, `hr`
-FROM `default`.`srcpart`
-WHERE `ds` = '2008-04-08' AND (`hr` = '11' OR `hr` = '12') AND `key` = 11
-ORDER BY `key`, `hr`) AS `t1`
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -4934,7 +4916,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart
- filterExpr: ((ds = '2008-04-08') and ((hr = '11') or (hr = '12')) and (UDFToDouble(key) = 11.0D)) (type: boolean)
+ filterExpr: ((hr) IN ('11', '12') and (ds = '2008-04-08') and (UDFToDouble(key) = 11.0D)) (type: boolean)
Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/pcs.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/pcs.q.out b/ql/src/test/results/clientpositive/pcs.q.out
index c8819cc..5cfc093 100644
--- a/ql/src/test/results/clientpositive/pcs.q.out
+++ b/ql/src/test/results/clientpositive/pcs.q.out
@@ -764,17 +764,17 @@ STAGE PLANS:
Filter Operator
isSamplingPred: false
predicate: (struct(_col2,_col0,_col8)) IN (const struct('2000-04-08',1,'2000-04-09'), const struct('2000-04-09',2,'2000-04-08')) (type: boolean)
- Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col2 (type: string), _col6 (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query10.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query10.q.out b/ql/src/test/results/clientpositive/perf/spark/query10.q.out
index 45dfc53..b7faa9a 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query10.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query10.q.out
@@ -202,7 +202,7 @@ STAGE PLANS:
Edges:
Reducer 12 <- Map 11 (GROUP, 169)
Reducer 15 <- Map 14 (GROUP, 336)
- Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 697), Map 7 (PARTITION-LEVEL SORT, 697)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 855), Map 7 (PARTITION-LEVEL SORT, 855)
Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 597), Reducer 2 (PARTITION-LEVEL SORT, 597)
Reducer 4 <- Map 9 (PARTITION-LEVEL SORT, 1009), Reducer 12 (PARTITION-LEVEL SORT, 1009), Reducer 15 (PARTITION-LEVEL SORT, 1009), Reducer 3 (PARTITION-LEVEL SORT, 1009)
Reducer 5 <- Reducer 4 (GROUP, 1009)
@@ -309,16 +309,16 @@ STAGE PLANS:
Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((ca_county) IN ('Walker County', 'Richland County', 'Gaines County', 'Douglas County', 'Dona Ana County') and ca_address_sk is not null) (type: boolean)
- Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ca_address_sk (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
Map 8
Map Operator Tree:
http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query12.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query12.q.out b/ql/src/test/results/clientpositive/perf/spark/query12.q.out
index ad7e912..413930c 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query12.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query12.q.out
@@ -96,7 +96,7 @@ STAGE PLANS:
Stage: Stage-1
Spark
Edges:
- Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 171), Map 7 (PARTITION-LEVEL SORT, 171)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 174), Map 7 (PARTITION-LEVEL SORT, 174)
Reducer 3 <- Reducer 2 (GROUP, 186)
Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 93)
Reducer 5 <- Reducer 4 (SORT, 1)
@@ -142,16 +142,16 @@ STAGE PLANS:
Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((i_category) IN ('Jewelry', 'Sports', 'Books') and i_item_sk is not null) (type: boolean)
- Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: i_item_sk (type: int), i_item_id (type: string), i_item_desc (type: string), i_current_price (type: decimal(7,2)), i_class (type: string), i_category (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: decimal(7,2)), _col4 (type: string), _col5 (type: string)
Execution mode: vectorized
Reducer 2
http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query13.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query13.q.out b/ql/src/test/results/clientpositive/perf/spark/query13.q.out
index fb2a061..c9fcb88 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query13.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query13.q.out
@@ -109,22 +109,22 @@ STAGE PLANS:
Spark
#### A masked pattern was here ####
Vertices:
- Map 8
+ Map 10
Map Operator Tree:
TableScan
- alias: household_demographics
- filterExpr: (((hd_dep_count = 3) or (hd_dep_count = 1)) and hd_demo_sk is not null) (type: boolean)
- Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+ alias: store
+ filterExpr: s_store_sk is not null (type: boolean)
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (((hd_dep_count = 3) or (hd_dep_count = 1)) and hd_demo_sk is not null) (type: boolean)
- Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+ predicate: s_store_sk is not null (type: boolean)
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: hd_demo_sk (type: int), hd_dep_count (type: int)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
+ expressions: s_store_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
keys:
- 0 _col3 (type: int)
+ 0 _col4 (type: int)
1 _col0 (type: int)
Execution mode: vectorized
Local Work:
@@ -134,23 +134,23 @@ STAGE PLANS:
Spark
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 8
Map Operator Tree:
TableScan
- alias: store
- filterExpr: s_store_sk is not null (type: boolean)
- Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ alias: household_demographics
+ filterExpr: ((hd_dep_count) IN (3, 1) and hd_demo_sk is not null) (type: boolean)
+ Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: s_store_sk is not null (type: boolean)
- Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((hd_dep_count) IN (3, 1) and hd_demo_sk is not null) (type: boolean)
+ Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: s_store_sk (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ expressions: hd_demo_sk (type: int), hd_dep_count (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
keys:
- 0 _col0 (type: int)
- 1 _col4 (type: int)
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
Execution mode: vectorized
Local Work:
Map Reduce Local Work
@@ -158,65 +158,33 @@ STAGE PLANS:
Stage: Stage-1
Spark
Edges:
- Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 49), Map 7 (PARTITION-LEVEL SORT, 49)
- Reducer 4 <- Map 9 (PARTITION-LEVEL SORT, 138), Reducer 3 (PARTITION-LEVEL SORT, 138)
- Reducer 5 <- Map 10 (PARTITION-LEVEL SORT, 17), Reducer 4 (PARTITION-LEVEL SORT, 17)
- Reducer 6 <- Reducer 5 (GROUP, 1)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 133), Map 6 (PARTITION-LEVEL SORT, 133)
+ Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 152), Reducer 2 (PARTITION-LEVEL SORT, 152)
+ Reducer 4 <- Map 9 (PARTITION-LEVEL SORT, 166), Reducer 3 (PARTITION-LEVEL SORT, 166)
+ Reducer 5 <- Reducer 4 (GROUP, 1)
#### A masked pattern was here ####
Vertices:
- Map 10
- Map Operator Tree:
- TableScan
- alias: customer_demographics
- filterExpr: (((cd_marital_status = 'M') or (cd_marital_status = 'D') or (cd_marital_status = 'U')) and ((cd_education_status = '4 yr Degree') or (cd_education_status = 'Primary') or (cd_education_status = 'Advanced Degree')) and cd_demo_sk is not null) (type: boolean)
- Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (((cd_education_status = '4 yr Degree') or (cd_education_status = 'Primary') or (cd_education_status = 'Advanced Degree')) and ((cd_marital_status = 'M') or (cd_marital_status = 'D') or (cd_marital_status = 'U')) and cd_demo_sk is not null) (type: boolean)
- Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: cd_demo_sk (type: int), cd_marital_status (type: string), cd_education_status (type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string), _col2 (type: string)
- Execution mode: vectorized
- Map 2
+ Map 1
Map Operator Tree:
TableScan
alias: store_sales
- filterExpr: ((ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_net_profit BETWEEN 100 AND 200 or ss_net_profit BETWEEN 150 AND 300 or ss_net_profit BETWEEN 50 AND 250) and ss_store_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_addr_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+ filterExpr: ((ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and ss_store_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_addr_sk is not null and ss_sold_date_sk is not null) (type: boolean)
Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((ss_net_profit BETWEEN 100 AND 200 or ss_net_profit BETWEEN 150 AND 300 or ss_net_profit BETWEEN 50 AND 250) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
- Statistics: Num rows: 63999513 Data size: 5646055611 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
+ Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ss_sold_date_sk (type: int), ss_cdemo_sk (type: int), ss_hdemo_sk (type: int), ss_addr_sk (type: int), ss_store_sk (type: int), ss_quantity (type: int), ss_sales_price (type: decimal(7,2)), ss_ext_sales_price (type: decimal(7,2)), ss_ext_wholesale_cost (type: decimal(7,2)), ss_net_profit (type: decimal(7,2))
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
- Statistics: Num rows: 63999513 Data size: 5646055611 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col4 (type: int)
- outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9, _col10
- input vertices:
- 0 Map 1
- Statistics: Num rows: 70399465 Data size: 6210661306 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col1 (type: int)
- sort order: +
- Map-reduce partition columns: _col1 (type: int)
- Statistics: Num rows: 70399465 Data size: 6210661306 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col6 (type: int), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2))
+ Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2))
Execution mode: vectorized
- Local Work:
- Map Reduce Local Work
- Map 7
+ Map 6
Map Operator Tree:
TableScan
alias: date_dim
@@ -235,26 +203,62 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: customer_demographics
+ filterExpr: ((cd_marital_status) IN ('M', 'D', 'U') and (cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and cd_demo_sk is not null) (type: boolean)
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) (type: boolean)
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cd_demo_sk (type: int), cd_marital_status (type: string), cd_education_status (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string)
+ Execution mode: vectorized
Map 9
Map Operator Tree:
TableScan
alias: customer_address
- filterExpr: ((ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and (ca_country = 'United States') and ca_address_sk is not null) (type: boolean)
+ filterExpr: ((ca_country = 'United States') and ca_address_sk is not null) (type: boolean)
Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null) (type: boolean)
- Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((ca_country = 'United States') and ca_address_sk is not null) (type: boolean)
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ca_address_sk (type: int), ca_state (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 211198404 Data size: 18631984502 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 211198404 Data size: 18631984502 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2))
Reducer 3
Local Work:
Map Reduce Local Work
@@ -265,62 +269,54 @@ STAGE PLANS:
keys:
0 _col1 (type: int)
1 _col0 (type: int)
- outputColumnNames: _col2, _col3, _col4, _col6, _col7, _col8, _col9, _col10
- Statistics: Num rows: 77439413 Data size: 6831727584 Basic stats: COMPLETE Column stats: NONE
+ outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col13, _col14
+ Statistics: Num rows: 232318249 Data size: 20495183396 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 _col3 (type: int)
+ 0 _col2 (type: int)
1 _col0 (type: int)
- outputColumnNames: _col2, _col4, _col6, _col7, _col8, _col9, _col10, _col14
+ outputColumnNames: _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col13, _col14, _col16
input vertices:
1 Map 8
- Statistics: Num rows: 85183356 Data size: 7514900505 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col4 (type: int)
- sort order: +
- Map-reduce partition columns: _col4 (type: int)
- Statistics: Num rows: 85183356 Data size: 7514900505 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col2 (type: int), _col6 (type: int), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col14 (type: int)
+ Statistics: Num rows: 255550079 Data size: 22544702224 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((_col13 = 'D') and (_col14 = 'Primary') and _col6 BETWEEN 50 AND 100 and (_col16 = 1)) or ((_col13 = 'M') and (_col14 = '4 yr Degree') and _col6 BETWEEN 100 AND 150 and (_col16 = 3)) or ((_col13 = 'U') and (_col14 = 'Advanced Degree') and _col6 BETWEEN 150 AND 200 and (_col16 = 1))) (type: boolean)
+ Statistics: Num rows: 10647918 Data size: 939362419 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col3 (type: int)
+ Statistics: Num rows: 10647918 Data size: 939362419 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col4 (type: int), _col5 (type: int), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2))
Reducer 4
+ Local Work:
+ Map Reduce Local Work
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 _col4 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col2, _col6, _col7, _col8, _col9, _col10, _col14, _col16
- Statistics: Num rows: 93701693 Data size: 8266390734 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (((_col16) IN ('KY', 'GA', 'NM') and _col10 BETWEEN 100 AND 200) or ((_col16) IN ('MT', 'OR', 'IN') and _col10 BETWEEN 150 AND 300) or ((_col16) IN ('WI', 'MO', 'WV') and _col10 BETWEEN 50 AND 250)) (type: boolean)
- Statistics: Num rows: 15616947 Data size: 1377731627 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col2 (type: int)
- sort order: +
- Map-reduce partition columns: _col2 (type: int)
- Statistics: Num rows: 15616947 Data size: 1377731627 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col6 (type: int), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col14 (type: int)
- Reducer 5
- Reduce Operator Tree:
- Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col2 (type: int)
+ 0 _col3 (type: int)
1 _col0 (type: int)
- outputColumnNames: _col6, _col7, _col8, _col9, _col14, _col19, _col20
- Statistics: Num rows: 17178642 Data size: 1515504822 Basic stats: COMPLETE Column stats: NONE
+ outputColumnNames: _col4, _col5, _col7, _col8, _col9, _col18
+ Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (((_col19 = 'D') and (_col20 = 'Primary') and _col7 BETWEEN 50 AND 100 and (_col14 = 1)) or ((_col19 = 'M') and (_col20 = '4 yr Degree') and _col7 BETWEEN 100 AND 150 and (_col14 = 3)) or ((_col19 = 'U') and (_col20 = 'Advanced Degree') and _col7 BETWEEN 150 AND 200 and (_col14 = 1))) (type: boolean)
- Statistics: Num rows: 715776 Data size: 63145968 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col6 (type: int), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2))
- outputColumnNames: _col6, _col8, _col9
- Statistics: Num rows: 715776 Data size: 63145968 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((((_col18 = 'KY') or (_col18 = 'GA') or (_col18 = 'NM')) and _col9 BETWEEN 100 AND 200) or (((_col18 = 'MT') or (_col18 = 'OR') or (_col18 = 'IN')) and _col9 BETWEEN 150 AND 300) or (((_col18 = 'WI') or (_col18 = 'MO') or (_col18 = 'WV')) and _col9 BETWEEN 50 AND 250)) (type: boolean)
+ Statistics: Num rows: 7333332 Data size: 7442451276 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col4 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col5, _col7, _col8
+ input vertices:
+ 1 Map 10
+ Statistics: Num rows: 8066665 Data size: 8186696581 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: sum(_col6), count(_col6), sum(_col8), count(_col8), sum(_col9), count(_col9)
+ aggregations: sum(_col5), count(_col5), sum(_col7), count(_col7), sum(_col8), count(_col8)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE
@@ -328,7 +324,7 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(17,2)), _col3 (type: bigint), _col4 (type: decimal(17,2)), _col5 (type: bigint)
- Reducer 6
+ Reducer 5
Execution mode: vectorized
Reduce Operator Tree:
Group By Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query15.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query15.q.out b/ql/src/test/results/clientpositive/perf/spark/query15.q.out
index 3d6fbda..67684f6 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query15.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query15.q.out
@@ -157,7 +157,7 @@ STAGE PLANS:
outputColumnNames: _col3, _col4, _col7
Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((_col3) IN ('CA', 'WA', 'GA') or (_col7 > 500) or (substr(_col4, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792')) (type: boolean)
+ predicate: ((_col3 = 'CA') or (_col3 = 'GA') or (_col3 = 'WA') or (_col7 > 500) or (substr(_col4, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792')) (type: boolean)
Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col4 (type: string), _col7 (type: decimal(7,2))
http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query16.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query16.q.out b/ql/src/test/results/clientpositive/perf/spark/query16.q.out
index 2f51a71..b5adc85 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query16.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query16.q.out
@@ -77,11 +77,11 @@ STAGE PLANS:
Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((cc_county) IN ('Ziebach County', 'Levy County', 'Huron County', 'Franklin Parish', 'Daviess County') and cc_call_center_sk is not null) (type: boolean)
- Statistics: Num rows: 30 Data size: 61350 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cc_call_center_sk (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 30 Data size: 61350 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
keys:
0 _col2 (type: int)
http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query17.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query17.q.out b/ql/src/test/results/clientpositive/perf/spark/query17.q.out
index 23f1e85..35405a7 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query17.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query17.q.out
@@ -158,16 +158,16 @@ STAGE PLANS:
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) (type: boolean)
- Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: d_date_sk (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
Map 13
Map Operator Tree:
@@ -197,16 +197,16 @@ STAGE PLANS:
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) (type: boolean)
- Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: d_date_sk (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
Map 7
Map Operator Tree:
http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query18.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query18.q.out b/ql/src/test/results/clientpositive/perf/spark/query18.q.out
index f8bec59..3ebd215 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query18.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query18.q.out
@@ -75,9 +75,9 @@ STAGE PLANS:
Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 306), Map 9 (PARTITION-LEVEL SORT, 306)
Reducer 11 <- Map 14 (PARTITION-LEVEL SORT, 337), Reducer 10 (PARTITION-LEVEL SORT, 337)
Reducer 12 <- Map 15 (PARTITION-LEVEL SORT, 374), Reducer 11 (PARTITION-LEVEL SORT, 374)
- Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 428), Map 7 (PARTITION-LEVEL SORT, 428)
- Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 302), Reducer 2 (PARTITION-LEVEL SORT, 302)
- Reducer 4 <- Reducer 12 (PARTITION-LEVEL SORT, 731), Reducer 3 (PARTITION-LEVEL SORT, 731)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 855), Map 7 (PARTITION-LEVEL SORT, 855)
+ Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 597), Reducer 2 (PARTITION-LEVEL SORT, 597)
+ Reducer 4 <- Reducer 12 (PARTITION-LEVEL SORT, 1009), Reducer 3 (PARTITION-LEVEL SORT, 1009)
Reducer 5 <- Reducer 4 (GROUP, 1009)
Reducer 6 <- Reducer 5 (SORT, 1)
#### A masked pattern was here ####
@@ -90,16 +90,16 @@ STAGE PLANS:
Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((c_birth_month) IN (9, 5, 12, 4, 1, 10) and c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null) (type: boolean)
- Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: c_customer_sk (type: int), c_current_cdemo_sk (type: int), c_current_addr_sk (type: int), c_birth_year (type: int)
outputColumnNames: _col0, _col1, _col2, _col4
- Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col2 (type: int)
sort order: +
Map-reduce partition columns: _col2 (type: int)
- Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: int), _col1 (type: int), _col4 (type: int)
Execution mode: vectorized
Map 13
@@ -169,16 +169,16 @@ STAGE PLANS:
Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((ca_state) IN ('ND', 'WI', 'AL', 'NC', 'OK', 'MS', 'TN') and ca_address_sk is not null) (type: boolean)
- Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ca_address_sk (type: int), ca_county (type: string), ca_state (type: string), ca_country (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string)
Execution mode: vectorized
Map 8
@@ -281,12 +281,12 @@ STAGE PLANS:
0 _col2 (type: int)
1 _col0 (type: int)
outputColumnNames: _col0, _col1, _col4, _col6, _col7, _col8
- Statistics: Num rows: 44000000 Data size: 37840889538 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col1 (type: int)
sort order: +
Map-reduce partition columns: _col1 (type: int)
- Statistics: Num rows: 44000000 Data size: 37840889538 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: int), _col4 (type: int), _col6 (type: string), _col7 (type: string), _col8 (type: string)
Reducer 3
Reduce Operator Tree:
@@ -297,12 +297,12 @@ STAGE PLANS:
0 _col1 (type: int)
1 _col0 (type: int)
outputColumnNames: _col0, _col4, _col6, _col7, _col8
- Statistics: Num rows: 48400001 Data size: 41624979393 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 48400001 Data size: 41624979393 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE
value expressions: _col4 (type: int), _col6 (type: string), _col7 (type: string), _col8 (type: string)
Reducer 4
Reduce Operator Tree:
http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query20.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query20.q.out b/ql/src/test/results/clientpositive/perf/spark/query20.q.out
index 76fae0b..86e0e72 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query20.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query20.q.out
@@ -88,7 +88,7 @@ STAGE PLANS:
Stage: Stage-1
Spark
Edges:
- Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 338), Map 7 (PARTITION-LEVEL SORT, 338)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 341), Map 7 (PARTITION-LEVEL SORT, 341)
Reducer 3 <- Reducer 2 (GROUP, 369)
Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 185)
Reducer 5 <- Reducer 4 (SORT, 1)
@@ -134,16 +134,16 @@ STAGE PLANS:
Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((i_category) IN ('Jewelry', 'Sports', 'Books') and i_item_sk is not null) (type: boolean)
- Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: i_item_sk (type: int), i_item_id (type: string), i_item_desc (type: string), i_current_price (type: decimal(7,2)), i_class (type: string), i_category (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: decimal(7,2)), _col4 (type: string), _col5 (type: string)
Execution mode: vectorized
Reducer 2
http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query23.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query23.q.out b/ql/src/test/results/clientpositive/perf/spark/query23.q.out
index 08b0f93..4ccc2df 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query23.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query23.q.out
@@ -150,16 +150,16 @@ STAGE PLANS:
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) (type: boolean)
- Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: d_date_sk (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
Map 19
Map Operator Tree:
@@ -208,16 +208,16 @@ STAGE PLANS:
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) (type: boolean)
- Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: d_date_sk (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
Map 26
Map Operator Tree:
@@ -450,16 +450,16 @@ STAGE PLANS:
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) (type: boolean)
- Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: d_date_sk (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
Map 48
Map Operator Tree:
@@ -508,16 +508,16 @@ STAGE PLANS:
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) (type: boolean)
- Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: d_date_sk (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
Map 55
Map Operator Tree:
@@ -755,16 +755,16 @@ STAGE PLANS:
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) (type: boolean)
- Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: d_date_sk (type: int), d_date (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
Execution mode: vectorized
Map 12
http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query27.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query27.q.out b/ql/src/test/results/clientpositive/perf/spark/query27.q.out
index e7ed297..294222e 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query27.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query27.q.out
@@ -60,11 +60,11 @@ STAGE PLANS:
Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((s_state) IN ('SD', 'FL', 'MI', 'LA', 'MO', 'SC') and s_store_sk is not null) (type: boolean)
- Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: s_store_sk (type: int), s_state (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
keys:
0 _col3 (type: int)
http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query29.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query29.q.out b/ql/src/test/results/clientpositive/perf/spark/query29.q.out
index b070fc0..a734710 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query29.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query29.q.out
@@ -258,16 +258,16 @@ STAGE PLANS:
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((d_year) IN (1999, 2000, 2001) and d_date_sk is not null) (type: boolean)
- Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: d_date_sk (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
Map 7
Map Operator Tree:
http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query34.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query34.q.out b/ql/src/test/results/clientpositive/perf/spark/query34.q.out
index b40081e..bb2796b 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query34.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query34.q.out
@@ -72,10 +72,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: household_demographics
- filterExpr: (((hd_buy_potential = '>10000') or (hd_buy_potential = 'unknown')) and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.2D)) ELSE (null) END and hd_demo_sk is not null) (type: boolean)
+ filterExpr: ((hd_buy_potential) IN ('>10000', 'unknown') and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.2D)) ELSE (null) END and hd_demo_sk is not null) (type: boolean)
Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (((hd_buy_potential = '>10000') or (hd_buy_potential = 'unknown')) and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.2D)) ELSE (null) END and hd_demo_sk is not null) (type: boolean)
+ predicate: ((hd_buy_potential) IN ('>10000', 'unknown') and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.2D)) ELSE (null) END and hd_demo_sk is not null) (type: boolean)
Statistics: Num rows: 1200 Data size: 128400 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: hd_demo_sk (type: int)
@@ -95,11 +95,11 @@ STAGE PLANS:
Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((s_county) IN ('Mobile County', 'Maverick County', 'Huron County', 'Kittitas County', 'Fairfield County', 'Jackson County', 'Barrow County', 'Pennington County') and s_store_sk is not null) (type: boolean)
- Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: s_store_sk (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
keys:
0 _col3 (type: int)
@@ -165,16 +165,16 @@ STAGE PLANS:
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((d_dom BETWEEN 1 AND 3 or d_dom BETWEEN 25 AND 28) and (d_year) IN (2000, 2001, 2002) and d_date_sk is not null) (type: boolean)
- Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 16232 Data size: 18163608 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: d_date_sk (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 16232 Data size: 18163608 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 16232 Data size: 18163608 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
Reducer 2
Reduce Operator Tree:
http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query36.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query36.q.out b/ql/src/test/results/clientpositive/perf/spark/query36.q.out
index d3bea76..694e579 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query36.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query36.q.out
@@ -74,11 +74,11 @@ STAGE PLANS:
Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((s_state) IN ('SD', 'FL', 'MI', 'LA', 'MO', 'SC', 'AL', 'GA') and s_store_sk is not null) (type: boolean)
- Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: s_store_sk (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
keys:
0 _col2 (type: int)
http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query37.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query37.q.out b/ql/src/test/results/clientpositive/perf/spark/query37.q.out
index 17c85a6..bce0d68 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query37.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query37.q.out
@@ -96,16 +96,16 @@ STAGE PLANS:
Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((i_manufact_id) IN (678, 964, 918, 849) and i_current_price BETWEEN 22 AND 52 and i_item_sk is not null) (type: boolean)
- Statistics: Num rows: 25666 Data size: 36863512 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 51333 Data size: 73728460 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: i_item_sk (type: int), i_item_id (type: string), i_item_desc (type: string), i_current_price (type: decimal(7,2))
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 25666 Data size: 36863512 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 51333 Data size: 73728460 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 25666 Data size: 36863512 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 51333 Data size: 73728460 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: decimal(7,2))
Execution mode: vectorized
Map 6
http://git-wip-us.apache.org/repos/asf/hive/blob/20c95c1c/ql/src/test/results/clientpositive/perf/spark/query45.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query45.q.out b/ql/src/test/results/clientpositive/perf/spark/query45.q.out
index d61f8b8..cac3d05 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query45.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query45.q.out
@@ -57,11 +57,11 @@ STAGE PLANS:
Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (i_item_sk) IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) (type: boolean)
- Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: i_item_id (type: string)
outputColumnNames: i_item_id
- Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(), count(i_item_id)
mode: hash
@@ -90,13 +90,13 @@ STAGE PLANS:
Stage: Stage-1
Spark
Edges:
- Reducer 11 <- Map 10 (GROUP, 3)
+ Reducer 11 <- Map 10 (GROUP, 6)
Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 154), Map 14 (PARTITION-LEVEL SORT, 154)
Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 855), Map 6 (PARTITION-LEVEL SORT, 855)
Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 777), Reducer 9 (PARTITION-LEVEL SORT, 777)
Reducer 4 <- Reducer 3 (GROUP, 230)
Reducer 5 <- Reducer 4 (SORT, 1)
- Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 7), Reducer 11 (PARTITION-LEVEL SORT, 7)
+ Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 8), Reducer 11 (PARTITION-LEVEL SORT, 8)
Reducer 9 <- Reducer 13 (PARTITION-LEVEL SORT, 174), Reducer 8 (PARTITION-LEVEL SORT, 174)
#### A masked pattern was here ####
Vertices:
@@ -128,21 +128,21 @@ STAGE PLANS:
Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (i_item_sk) IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) (type: boolean)
- Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: i_item_id (type: string)
outputColumnNames: i_item_id
- Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
Group By Operator
keys: i_item_id (type: string)
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
Map 12
Map Operator Tree:
@@ -230,16 +230,16 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), true (type: boolean)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: boolean)
Reducer 13
Reduce Operator Tree: