You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2015/10/13 01:53:54 UTC
[2/5] hive git commit: HIVE-12065 : FS stats collection may generate
incorrect stats for multi-insert query (Ashutosh Chauhan via Pengcheng Xiong)
http://git-wip-us.apache.org/repos/asf/hive/blob/9b4826e7/ql/src/test/queries/clientpositive/infer_bucket_sort_multi_insert.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/infer_bucket_sort_multi_insert.q b/ql/src/test/queries/clientpositive/infer_bucket_sort_multi_insert.q
index e3992b8..3341df0 100644
--- a/ql/src/test/queries/clientpositive/infer_bucket_sort_multi_insert.q
+++ b/ql/src/test/queries/clientpositive/infer_bucket_sort_multi_insert.q
@@ -1,5 +1,6 @@
set hive.exec.infer.bucket.sort=true;
set hive.exec.infer.bucket.sort.num.buckets.power.two=true;
+set hive.stats.dbclass=fs;
-- This tests inferring how data is bucketed/sorted from the operators in the reducer
-- and populating that information in partitions' metadata. In particular, those cases
http://git-wip-us.apache.org/repos/asf/hive/blob/9b4826e7/ql/src/test/queries/clientpositive/multi_insert.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/multi_insert.q b/ql/src/test/queries/clientpositive/multi_insert.q
index 5947985..1fdfa59 100644
--- a/ql/src/test/queries/clientpositive/multi_insert.q
+++ b/ql/src/test/queries/clientpositive/multi_insert.q
@@ -5,7 +5,7 @@ create table src_multi2 like src;
set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;
-
+set hive.stats.dbclass=fs;
explain
from src
insert overwrite table src_multi1 select * where key < 10
http://git-wip-us.apache.org/repos/asf/hive/blob/9b4826e7/ql/src/test/queries/clientpositive/multi_insert_gby2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/multi_insert_gby2.q b/ql/src/test/queries/clientpositive/multi_insert_gby2.q
index 46e2b19..fa29261 100644
--- a/ql/src/test/queries/clientpositive/multi_insert_gby2.q
+++ b/ql/src/test/queries/clientpositive/multi_insert_gby2.q
@@ -1,7 +1,7 @@
--HIVE-3699 Multiple insert overwrite into multiple tables query stores same results in all tables
create table e1 (count int);
create table e2 (percentile double);
-
+set hive.stats.dbclass=fs;
explain
FROM (select key, cast(key as double) as value from src order by key) a
INSERT OVERWRITE TABLE e1
http://git-wip-us.apache.org/repos/asf/hive/blob/9b4826e7/ql/src/test/queries/clientpositive/multi_insert_gby3.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/multi_insert_gby3.q b/ql/src/test/queries/clientpositive/multi_insert_gby3.q
index 1221af4..d85ff9a 100644
--- a/ql/src/test/queries/clientpositive/multi_insert_gby3.q
+++ b/ql/src/test/queries/clientpositive/multi_insert_gby3.q
@@ -2,7 +2,7 @@
create table e1 (key string, keyD double);
create table e2 (key string, keyD double, value string);
create table e3 (key string, keyD double);
-
+set hive.stats.dbclass=fs;
explain
FROM (select key, cast(key as double) as keyD, value from src order by key) a
INSERT OVERWRITE TABLE e1
http://git-wip-us.apache.org/repos/asf/hive/blob/9b4826e7/ql/src/test/queries/clientpositive/multi_insert_lateral_view.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/multi_insert_lateral_view.q b/ql/src/test/queries/clientpositive/multi_insert_lateral_view.q
index acf905f..d80717f 100644
--- a/ql/src/test/queries/clientpositive/multi_insert_lateral_view.q
+++ b/ql/src/test/queries/clientpositive/multi_insert_lateral_view.q
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
-- SORT_QUERY_RESULTS
create table src_10 as select * from src limit 10;
http://git-wip-us.apache.org/repos/asf/hive/blob/9b4826e7/ql/src/test/queries/clientpositive/multi_insert_mixed.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/multi_insert_mixed.q b/ql/src/test/queries/clientpositive/multi_insert_mixed.q
index 6d91973..8fb577a 100644
--- a/ql/src/test/queries/clientpositive/multi_insert_mixed.q
+++ b/ql/src/test/queries/clientpositive/multi_insert_mixed.q
@@ -1,7 +1,7 @@
create table src_multi1 like src;
create table src_multi2 like src;
create table src_multi3 like src;
-
+set hive.stats.dbclass=fs;
-- Testing the case where a map work contains both shuffling (ReduceSinkOperator)
-- and inserting to output table (FileSinkOperator).
http://git-wip-us.apache.org/repos/asf/hive/blob/9b4826e7/ql/src/test/queries/clientpositive/multi_insert_move_tasks_share_dependencies.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/multi_insert_move_tasks_share_dependencies.q b/ql/src/test/queries/clientpositive/multi_insert_move_tasks_share_dependencies.q
index 3117713..3ddaa47 100644
--- a/ql/src/test/queries/clientpositive/multi_insert_move_tasks_share_dependencies.q
+++ b/ql/src/test/queries/clientpositive/multi_insert_move_tasks_share_dependencies.q
@@ -1,5 +1,5 @@
set hive.multi.insert.move.tasks.share.dependencies=true;
-
+set hive.stats.dbclass=fs;
-- SORT_QUERY_RESULTS
create table src_multi1 like src;
http://git-wip-us.apache.org/repos/asf/hive/blob/9b4826e7/ql/src/test/queries/clientpositive/multi_insert_union_src.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/multi_insert_union_src.q b/ql/src/test/queries/clientpositive/multi_insert_union_src.q
index 088d756..f9b6f87 100644
--- a/ql/src/test/queries/clientpositive/multi_insert_union_src.q
+++ b/ql/src/test/queries/clientpositive/multi_insert_union_src.q
@@ -1,7 +1,7 @@
drop table if exists src2;
drop table if exists src_multi1;
drop table if exists src_multi1;
-
+set hive.stats.dbclass=fs;
CREATE TABLE src2 as SELECT * FROM src;
create table src_multi1 like src;
http://git-wip-us.apache.org/repos/asf/hive/blob/9b4826e7/ql/src/test/results/clientpositive/spark/column_access_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/column_access_stats.q.out b/ql/src/test/results/clientpositive/spark/column_access_stats.q.out
index 5803093..869d6cb 100644
--- a/ql/src/test/results/clientpositive/spark/column_access_stats.q.out
+++ b/ql/src/test/results/clientpositive/spark/column_access_stats.q.out
@@ -387,15 +387,15 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: t2
- Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: key (type: string)
sort order: +
Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
Join Operator
@@ -405,10 +405,10 @@ STAGE PLANS:
0 key (type: string)
1 key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -506,15 +506,15 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: t2
- Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((val = 3) and key is not null) (type: boolean)
- Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: key (type: string)
sort order: +
Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
Join Operator
@@ -600,19 +600,19 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: t2
- Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((key = 6) and val is not null) (type: boolean)
- Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: val (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
Join Operator
@@ -709,32 +709,32 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: t2
- Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE
Map 5
Map Operator Tree:
TableScan
alias: t3
- Statistics: Num rows: 1 Data size: 35 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 1 Data size: 35 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: key (type: string)
sort order: +
Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 1 Data size: 35 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE
value expressions: val (type: string)
Reducer 2
Reduce Operator Tree:
@@ -745,12 +745,12 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE
Reducer 3
Reduce Operator Tree:
Join Operator
@@ -760,10 +760,10 @@ STAGE PLANS:
0 _col0 (type: string)
1 key (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/9b4826e7/ql/src/test/results/clientpositive/spark/pcr.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/pcr.q.out b/ql/src/test/results/clientpositive/spark/pcr.q.out
index fb08f10..b67a909 100644
--- a/ql/src/test/results/clientpositive/spark/pcr.q.out
+++ b/ql/src/test/results/clientpositive/spark/pcr.q.out
@@ -4390,8 +4390,8 @@ STAGE PLANS:
#### A masked pattern was here ####
name default.pcr_t2
numFiles 1
- numRows 0
- rawDataSize 0
+ numRows 20
+ rawDataSize 160
serialization.ddl struct pcr_t2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -4429,8 +4429,8 @@ STAGE PLANS:
#### A masked pattern was here ####
name default.pcr_t3
numFiles 1
- numRows 0
- rawDataSize 0
+ numRows 20
+ rawDataSize 160
serialization.ddl struct pcr_t3 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -4509,8 +4509,8 @@ STAGE PLANS:
#### A masked pattern was here ####
name default.pcr_t2
numFiles 1
- numRows 0
- rawDataSize 0
+ numRows 20
+ rawDataSize 160
serialization.ddl struct pcr_t2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -4540,8 +4540,8 @@ STAGE PLANS:
#### A masked pattern was here ####
name default.pcr_t3
numFiles 1
- numRows 0
- rawDataSize 0
+ numRows 20
+ rawDataSize 160
serialization.ddl struct pcr_t3 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
http://git-wip-us.apache.org/repos/asf/hive/blob/9b4826e7/ql/src/test/results/clientpositive/spark/ppd_join5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/ppd_join5.q.out b/ql/src/test/results/clientpositive/spark/ppd_join5.q.out
index 0cf9080..6bf5080 100644
--- a/ql/src/test/results/clientpositive/spark/ppd_join5.q.out
+++ b/ql/src/test/results/clientpositive/spark/ppd_join5.q.out
@@ -59,40 +59,40 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: a
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (id1 is not null and id2 is not null) (type: boolean)
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: id1 (type: string), id2 (type: string)
sort order: ++
Map-reduce partition columns: id1 (type: string), id2 (type: string)
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Map 4
Map Operator Tree:
TableScan
alias: b
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (id is not null and (d <= 1)) (type: boolean)
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: id (type: string), id (type: string)
sort order: ++
Map-reduce partition columns: id (type: string), id (type: string)
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
value expressions: d (type: int)
Map 5
Map Operator Tree:
TableScan
alias: c
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (d <= 1) (type: boolean)
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
value expressions: d (type: int)
Reducer 2
Reduce Operator Tree:
@@ -103,10 +103,10 @@ STAGE PLANS:
0 id1 (type: string), id2 (type: string)
1 id (type: string), id (type: string)
outputColumnNames: _col0, _col1, _col6
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: int)
Reducer 3
Reduce Operator Tree:
@@ -117,14 +117,14 @@ STAGE PLANS:
0
1
outputColumnNames: _col0, _col1, _col6, _col11
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col6 (type: int), _col11 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -167,40 +167,40 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: a
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (id1 is not null and id2 is not null) (type: boolean)
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: id1 (type: string), id2 (type: string)
sort order: ++
Map-reduce partition columns: id1 (type: string), id2 (type: string)
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Map 4
Map Operator Tree:
TableScan
alias: b
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (id is not null and (d <= 1)) (type: boolean)
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: id (type: string), id (type: string)
sort order: ++
Map-reduce partition columns: id (type: string), id (type: string)
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
value expressions: d (type: int)
Map 5
Map Operator Tree:
TableScan
alias: c
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (d <= 1) (type: boolean)
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
value expressions: d (type: int)
Reducer 2
Reduce Operator Tree:
@@ -211,10 +211,10 @@ STAGE PLANS:
0 id1 (type: string), id2 (type: string)
1 id (type: string), id (type: string)
outputColumnNames: _col0, _col1, _col6
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: int)
Reducer 3
Reduce Operator Tree:
@@ -225,17 +225,17 @@ STAGE PLANS:
0
1
outputColumnNames: _col0, _col1, _col6, _col11
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((_col6 > 1) or (_col11 > 1)) (type: boolean)
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col6 (type: int), _col11 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/9b4826e7/ql/src/test/results/clientpositive/spark/smb_mapjoin_12.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_12.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_12.q.out
index eeb23a8..eeb18b0 100644
--- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_12.q.out
+++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_12.q.out
@@ -159,19 +159,19 @@ STAGE PLANS:
1 key (type: int)
outputColumnNames: _col0, _col7
Position of Big Table: 0
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 825 Data size: 8764 Basic stats: COMPLETE Column stats: NONE
BucketMapJoin: true
Select Operator
expressions: _col0 (type: int), _col7 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 825 Data size: 8764 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 1
#### A masked pattern was here ####
NumFilesPerFileSink: 1
Static Partition Specification: ds=1/
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 825 Data size: 8764 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.TextInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/9b4826e7/ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out
index 7c34ceb..878b930 100644
--- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out
+++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out
@@ -341,12 +341,12 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: b
- Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: UDFToDouble(value) is not null (type: boolean)
- Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
keys:
0 UDFToDouble(key) (type: double)
@@ -373,8 +373,8 @@ STAGE PLANS:
#### A masked pattern was here ####
name default.test_table4
numFiles 16
- numRows 0
- rawDataSize 0
+ numRows 500
+ rawDataSize 5312
serialization.ddl struct test_table4 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -395,8 +395,8 @@ STAGE PLANS:
#### A masked pattern was here ####
name default.test_table4
numFiles 16
- numRows 0
- rawDataSize 0
+ numRows 500
+ rawDataSize 5312
serialization.ddl struct test_table4 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -418,12 +418,12 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: a
- Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: UDFToDouble(key) is not null (type: boolean)
- Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -434,15 +434,15 @@ STAGE PLANS:
input vertices:
1 Map 3
Position of Big Table: 0
- Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
- Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
tag: -1
value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: string)
auto parallelism: false
@@ -467,8 +467,8 @@ STAGE PLANS:
#### A masked pattern was here ####
name default.test_table3
numFiles 16
- numRows 0
- rawDataSize 0
+ numRows 500
+ rawDataSize 5312
serialization.ddl struct test_table3 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -489,8 +489,8 @@ STAGE PLANS:
#### A masked pattern was here ####
name default.test_table3
numFiles 16
- numRows 0
- rawDataSize 0
+ numRows 500
+ rawDataSize 5312
serialization.ddl struct test_table3 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -507,16 +507,16 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 10
- Statistics: Num rows: 10 Data size: 1080 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 10 Data size: 1080 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.TextInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/9b4826e7/ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out
index 4032170..06b6121 100644
--- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out
+++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out
@@ -809,12 +809,12 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: b
- Statistics: Num rows: 71 Data size: 7718 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: (key is not null and value is not null) (type: boolean)
- Statistics: Num rows: 18 Data size: 1956 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 125 Data size: 1804 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
keys:
0 key (type: int), value (type: string)
@@ -846,8 +846,8 @@ STAGE PLANS:
#### A masked pattern was here ####
name default.test_table2
numFiles 16
- numRows 0
- rawDataSize 0
+ numRows 500
+ rawDataSize 7218
serialization.ddl struct test_table2 { i32 key, i32 key2, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -868,8 +868,8 @@ STAGE PLANS:
#### A masked pattern was here ####
name default.test_table2
numFiles 16
- numRows 0
- rawDataSize 0
+ numRows 500
+ rawDataSize 7218
serialization.ddl struct test_table2 { i32 key, i32 key2, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
http://git-wip-us.apache.org/repos/asf/hive/blob/9b4826e7/ql/src/test/results/clientpositive/spark/smb_mapjoin_16.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_16.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_16.q.out
index db737b3..9ce40bd 100644
--- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_16.q.out
+++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_16.q.out
@@ -67,7 +67,7 @@ STAGE PLANS:
keys:
0 key (type: int)
1 key (type: int)
- Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
mode: hash
http://git-wip-us.apache.org/repos/asf/hive/blob/9b4826e7/ql/src/test/results/clientpositive/spark/union34.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/union34.q.out b/ql/src/test/results/clientpositive/spark/union34.q.out
index a9edf14..0d35488 100644
--- a/ql/src/test/results/clientpositive/spark/union34.q.out
+++ b/ql/src/test/results/clientpositive/spark/union34.q.out
@@ -92,14 +92,14 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: src10_2
- Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
keys:
0 _col0 (type: string)
@@ -117,14 +117,14 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: src10_1
- Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -134,11 +134,11 @@ STAGE PLANS:
outputColumnNames: _col0, _col1
input vertices:
1 Map 3
- Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
- Statistics: Num rows: 3 Data size: 353 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
Local Work:
Map Reduce Local Work
@@ -146,39 +146,39 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: src10_3
- Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
- Statistics: Num rows: 3 Data size: 353 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
Map 5
Map Operator Tree:
TableScan
alias: src10_4
- Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
- Statistics: Num rows: 3 Data size: 353 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
Reducer 2
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 3 Data size: 353 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 3 Data size: 353 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -280,64 +280,64 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: src10_1
- Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
Map 4
Map Operator Tree:
TableScan
alias: src10_2
- Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
Map 5
Map Operator Tree:
TableScan
alias: src10_3
- Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
- Statistics: Num rows: 3 Data size: 353 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
Map 6
Map Operator Tree:
TableScan
alias: src10_4
- Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
- Statistics: Num rows: 3 Data size: 353 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
Reducer 2
Reduce Operator Tree:
@@ -345,21 +345,21 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
- Statistics: Num rows: 3 Data size: 353 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
Reducer 3
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 3 Data size: 353 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 3 Data size: 353 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat