You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2018/06/08 06:00:05 UTC
[02/14] hive git commit: HIVE-18079 : Statistics: Allow HyperLogLog
to be merged to the lowest-common-denominator bit-size (Gopal V via Prasanth
J)
http://git-wip-us.apache.org/repos/asf/hive/blob/13960aa9/ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out
index 68aabb0..4a10953 100644
--- a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out
+++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out
@@ -193,48 +193,48 @@ STAGE PLANS:
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: int)
+ key expressions: _col1 (type: string)
sort order: +
- Map-reduce partition columns: _col0 (type: int)
+ Map-reduce partition columns: _col1 (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
+ value expressions: _col0 (type: int)
Execution mode: vectorized
Map 4
Map Operator Tree:
TableScan
- alias: c
+ alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: key is not null (type: boolean)
+ predicate: value is not null (type: boolean)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: int)
+ key expressions: _col1 (type: string)
sort order: +
- Map-reduce partition columns: _col0 (type: int)
+ Map-reduce partition columns: _col1 (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
Execution mode: vectorized
Map 5
Map Operator Tree:
TableScan
- alias: b
+ alias: c
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: value is not null (type: boolean)
+ predicate: key is not null (type: boolean)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: int), value (type: string)
- outputColumnNames: _col0, _col1
+ expressions: key (type: int)
+ outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col1 (type: string)
+ key expressions: _col0 (type: int)
sort order: +
- Map-reduce partition columns: _col1 (type: string)
+ Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int)
Execution mode: vectorized
Reducer 2
Reduce Operator Tree:
@@ -242,28 +242,28 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col0, _col1
+ 0 _col1 (type: string)
+ 1 _col1 (type: string)
+ outputColumnNames: _col0, _col2
Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col1 (type: string)
+ key expressions: _col0 (type: int)
sort order: +
- Map-reduce partition columns: _col1 (type: string)
+ Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int)
+ value expressions: _col2 (type: int)
Reducer 3
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 _col1 (type: string)
- 1 _col1 (type: string)
- outputColumnNames: _col0, _col3
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col2
Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: int), _col3 (type: int)
+ expressions: _col0 (type: int), _col2 (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -309,48 +309,48 @@ STAGE PLANS:
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: int)
+ key expressions: _col1 (type: string)
sort order: +
- Map-reduce partition columns: _col0 (type: int)
+ Map-reduce partition columns: _col1 (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
+ value expressions: _col0 (type: int)
Execution mode: vectorized
Map 4
Map Operator Tree:
TableScan
- alias: c
+ alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: key is not null (type: boolean)
+ predicate: value is not null (type: boolean)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: int)
+ key expressions: _col1 (type: string)
sort order: +
- Map-reduce partition columns: _col0 (type: int)
+ Map-reduce partition columns: _col1 (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
Execution mode: vectorized
Map 5
Map Operator Tree:
TableScan
- alias: b
+ alias: c
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: value is not null (type: boolean)
+ predicate: key is not null (type: boolean)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: int), value (type: string)
- outputColumnNames: _col0, _col1
+ expressions: key (type: int)
+ outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col1 (type: string)
+ key expressions: _col0 (type: int)
sort order: +
- Map-reduce partition columns: _col1 (type: string)
+ Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int)
Execution mode: vectorized
Reducer 2
Reduce Operator Tree:
@@ -358,28 +358,28 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col0, _col1
+ 0 _col1 (type: string)
+ 1 _col1 (type: string)
+ outputColumnNames: _col0, _col2
Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col1 (type: string)
+ key expressions: _col0 (type: int)
sort order: +
- Map-reduce partition columns: _col1 (type: string)
+ Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int)
+ value expressions: _col2 (type: int)
Reducer 3
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 _col1 (type: string)
- 1 _col1 (type: string)
- outputColumnNames: _col0, _col3
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col2
Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: int), _col3 (type: int)
+ expressions: _col0 (type: int), _col2 (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1906,48 +1906,48 @@ STAGE PLANS:
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: int)
+ key expressions: _col1 (type: string)
sort order: +
- Map-reduce partition columns: _col0 (type: int)
+ Map-reduce partition columns: _col1 (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
+ value expressions: _col0 (type: int)
Execution mode: vectorized
Map 4
Map Operator Tree:
TableScan
- alias: c
+ alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: key is not null (type: boolean)
+ predicate: value is not null (type: boolean)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: int)
+ key expressions: _col1 (type: string)
sort order: +
- Map-reduce partition columns: _col0 (type: int)
+ Map-reduce partition columns: _col1 (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
Execution mode: vectorized
Map 5
Map Operator Tree:
TableScan
- alias: b
+ alias: c
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: value is not null (type: boolean)
+ predicate: key is not null (type: boolean)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: int), value (type: string)
- outputColumnNames: _col0, _col1
+ expressions: key (type: int)
+ outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col1 (type: string)
+ key expressions: _col0 (type: int)
sort order: +
- Map-reduce partition columns: _col1 (type: string)
+ Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int)
Execution mode: vectorized
Reducer 2
Reduce Operator Tree:
@@ -1955,28 +1955,28 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col0, _col1
+ 0 _col1 (type: string)
+ 1 _col1 (type: string)
+ outputColumnNames: _col0, _col2
Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col1 (type: string)
+ key expressions: _col0 (type: int)
sort order: +
- Map-reduce partition columns: _col1 (type: string)
+ Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int)
+ value expressions: _col2 (type: int)
Reducer 3
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 _col1 (type: string)
- 1 _col1 (type: string)
- outputColumnNames: _col0, _col3
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col2
Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: int), _col3 (type: int)
+ expressions: _col0 (type: int), _col2 (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -2023,48 +2023,48 @@ STAGE PLANS:
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: int)
+ key expressions: _col1 (type: string)
sort order: +
- Map-reduce partition columns: _col0 (type: int)
+ Map-reduce partition columns: _col1 (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
+ value expressions: _col0 (type: int)
Execution mode: vectorized
Map 4
Map Operator Tree:
TableScan
- alias: c
+ alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: key is not null (type: boolean)
+ predicate: value is not null (type: boolean)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: int)
+ key expressions: _col1 (type: string)
sort order: +
- Map-reduce partition columns: _col0 (type: int)
+ Map-reduce partition columns: _col1 (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
Execution mode: vectorized
Map 5
Map Operator Tree:
TableScan
- alias: b
+ alias: c
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: value is not null (type: boolean)
+ predicate: key is not null (type: boolean)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: int), value (type: string)
- outputColumnNames: _col0, _col1
+ expressions: key (type: int)
+ outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col1 (type: string)
+ key expressions: _col0 (type: int)
sort order: +
- Map-reduce partition columns: _col1 (type: string)
+ Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int)
Execution mode: vectorized
Reducer 2
Reduce Operator Tree:
@@ -2072,28 +2072,28 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col0, _col1
+ 0 _col1 (type: string)
+ 1 _col1 (type: string)
+ outputColumnNames: _col0, _col2
Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col1 (type: string)
+ key expressions: _col0 (type: int)
sort order: +
- Map-reduce partition columns: _col1 (type: string)
+ Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int)
+ value expressions: _col2 (type: int)
Reducer 3
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 _col1 (type: string)
- 1 _col1 (type: string)
- outputColumnNames: _col0, _col3
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col2
Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: int), _col3 (type: int)
+ expressions: _col0 (type: int), _col2 (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
File Output Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/13960aa9/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out b/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out
index b1363f0..ddd6bd1 100644
--- a/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out
+++ b/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out
@@ -488,26 +488,25 @@ JOIN src y ON (x.key = y.key)
JOIN src1 z ON (x.key = z.key)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-3 depends on stages: Stage-4
+ Stage-3 is a root stage
Stage-1 depends on stages: Stage-3
Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-3
Spark
#### A masked pattern was here ####
Vertices:
- Map 2
+ Map 1
Map Operator Tree:
TableScan
- alias: z
+ alias: x
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: key is not null (type: boolean)
+ predicate: (key is not null and value is not null) (type: boolean)
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
@@ -517,7 +516,8 @@ STAGE PLANS:
keys:
0 _col0 (type: string)
1 _col0 (type: string)
- Position of Big Table: 0
+ 2 _col0 (type: string)
+ Position of Big Table: 2
Execution mode: vectorized
Local Work:
Map Reduce Local Work
@@ -573,42 +573,27 @@ STAGE PLANS:
name: default.src1
name: default.src1
Truncated Path -> Alias:
- /src1 [$hdt$_3:z]
-
- Stage: Stage-3
- Spark
-#### A masked pattern was here ####
- Vertices:
- Map 1
+ /src1 [$hdt$_2:x]
+ Map 2
Map Operator Tree:
TableScan
- alias: x
+ alias: z
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: (key is not null and value is not null) (type: boolean)
+ predicate: key is not null (type: boolean)
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
+ Spark HashTable Sink Operator
keys:
0 _col0 (type: string)
1 _col0 (type: string)
- outputColumnNames: _col0, _col1, _col3
- input vertices:
- 1 Map 2
- Position of Big Table: 0
- Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col1 (type: string)
- 1 _col0 (type: string)
- Position of Big Table: 1
+ 2 _col0 (type: string)
+ Position of Big Table: 2
Execution mode: vectorized
Local Work:
Map Reduce Local Work
@@ -664,24 +649,24 @@ STAGE PLANS:
name: default.src1
name: default.src1
Truncated Path -> Alias:
- /src1 [$hdt$_2:x]
+ /src1 [$hdt$_3:z]
Map 4
Map Operator Tree:
TableScan
- alias: y
+ alias: w
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: key is not null (type: boolean)
+ predicate: value is not null (type: boolean)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
+ expressions: value (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
keys:
- 0 _col0 (type: string)
+ 0 _col1 (type: string)
1 _col0 (type: string)
Position of Big Table: 0
Execution mode: vectorized
@@ -739,7 +724,7 @@ STAGE PLANS:
name: default.src
name: default.src
Truncated Path -> Alias:
- /src [$hdt$_0:y]
+ /src [$hdt$_0:w]
Stage: Stage-1
Spark
@@ -748,49 +733,52 @@ STAGE PLANS:
Map 3
Map Operator Tree:
TableScan
- alias: w
+ alias: y
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: value is not null (type: boolean)
+ predicate: key is not null (type: boolean)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: value (type: string)
- outputColumnNames: _col0
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
+ Inner Join 0 to 2
keys:
- 0 _col1 (type: string)
+ 0 _col0 (type: string)
1 _col0 (type: string)
- outputColumnNames: _col0, _col3
+ 2 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3, _col5
input vertices:
0 Map 1
- Position of Big Table: 1
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ 1 Map 2
+ Position of Big Table: 2
+ Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 _col0 (type: string)
+ 0 _col1 (type: string)
1 _col0 (type: string)
- outputColumnNames: _col0, _col3, _col6
+ outputColumnNames: _col0, _col3, _col5
input vertices:
1 Map 4
Position of Big Table: 0
- Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col3 (type: string), _col6 (type: string)
+ expressions: _col0 (type: string), _col3 (type: string), _col5 (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 1
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.TextInputFormat
@@ -873,7 +861,7 @@ STAGE PLANS:
name: default.src
name: default.src
Truncated Path -> Alias:
- /src [$hdt$_1:w]
+ /src [$hdt$_1:y]
Stage: Stage-0
Move Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/13960aa9/ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out b/ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out
index ec632a6..fff2f31 100644
--- a/ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out
+++ b/ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out
@@ -566,18 +566,18 @@ STAGE PLANS:
Map 5
Map Operator Tree:
TableScan
- alias: src1
+ alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: key is not null (type: boolean)
+ predicate: value is not null (type: boolean)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string)
+ expressions: value (type: string)
outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
keys:
- 0 _col0 (type: string)
+ 0 _col1 (type: string)
1 _col0 (type: string)
Execution mode: vectorized
Local Work:
@@ -603,22 +603,22 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col1 (type: string)
+ key expressions: _col0 (type: string)
sort order: +
- Map-reduce partition columns: _col1 (type: string)
+ Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string), _col2 (type: string)
+ value expressions: _col1 (type: string), _col2 (type: string)
Execution mode: vectorized
Map 4
Map Operator Tree:
TableScan
- alias: src
+ alias: src1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: value is not null (type: boolean)
+ predicate: key is not null (type: boolean)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: value (type: string)
+ expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
@@ -635,15 +635,15 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 _col1 (type: string)
+ 0 _col0 (type: string)
1 _col0 (type: string)
- outputColumnNames: _col0, _col2
+ outputColumnNames: _col1, _col2
Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 _col0 (type: string)
+ 0 _col1 (type: string)
1 _col0 (type: string)
outputColumnNames: _col2
input vertices:
http://git-wip-us.apache.org/repos/asf/hive/blob/13960aa9/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out b/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out
index 103491d..85d0b8a 100644
--- a/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out
+++ b/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out
@@ -1665,9 +1665,9 @@ Stage-0
Stage-1
Reducer 2
File Output Operator [FS_19]
- Select Operator [SEL_18] (rows=366 width=178)
+ Select Operator [SEL_18] (rows=365 width=178)
Output:["_col0","_col1"]
- Filter Operator [FIL_17] (rows=366 width=179)
+ Filter Operator [FIL_17] (rows=365 width=179)
predicate:_col3 is null
Join Operator [JOIN_16] (rows=500 width=179)
Output:["_col0","_col1","_col3"],condition map:[{"":"{\"type\":\"Left Outer\",\"left\":0,\"right\":1}"}],keys:{"0":"_col1","1":"_col0"}
@@ -1730,9 +1730,9 @@ Stage-0
Stage-1
Reducer 3
File Output Operator [FS_18]
- Select Operator [SEL_17] (rows=183 width=178)
+ Select Operator [SEL_17] (rows=185 width=178)
Output:["_col0","_col1"]
- Filter Operator [FIL_16] (rows=183 width=179)
+ Filter Operator [FIL_16] (rows=185 width=179)
predicate:_col4 is null
Join Operator [JOIN_15] (rows=250 width=179)
Output:["_col0","_col1","_col4"],condition map:[{"":"{\"type\":\"Left Outer\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0, _col1","1":"_col0, _col1"}
@@ -1806,7 +1806,7 @@ Stage-0
Stage-1
Reducer 2
File Output Operator [FS_12]
- Join Operator [JOIN_10] (rows=133 width=178)
+ Join Operator [JOIN_10] (rows=131 width=178)
Output:["_col0","_col1"],condition map:[{"":"{\"type\":\"Left Semi\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0, _col1","1":"_col0, _col1"}
<-Map 1 [PARTITION-LEVEL SORT]
PARTITION-LEVEL SORT [RS_8]
@@ -1858,7 +1858,7 @@ Stage-0
Stage-1
Reducer 2
File Output Operator [FS_12]
- Join Operator [JOIN_10] (rows=133 width=178)
+ Join Operator [JOIN_10] (rows=131 width=178)
Output:["_col0","_col1"],condition map:[{"":"{\"type\":\"Left Semi\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0, _col1","1":"_col0, _col1"}
<-Map 1 [PARTITION-LEVEL SORT]
PARTITION-LEVEL SORT [RS_8]
@@ -1900,7 +1900,7 @@ Stage-0
Stage-1
Reducer 2
File Output Operator [FS_12]
- Join Operator [JOIN_10] (rows=133 width=178)
+ Join Operator [JOIN_10] (rows=131 width=178)
Output:["_col0","_col1"],condition map:[{"":"{\"type\":\"Left Semi\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0","1":"_col0"}
<-Map 1 [PARTITION-LEVEL SORT]
PARTITION-LEVEL SORT [RS_8]
@@ -2021,16 +2021,16 @@ Stage-0
<-Reducer 3 [PARTITION-LEVEL SORT]
PARTITION-LEVEL SORT [RS_27]
PartitionCols:_col2
- Filter Operator [FIL_15] (rows=66 width=186)
+ Filter Operator [FIL_15] (rows=65 width=186)
predicate:_col2 is not null
- Group By Operator [GBY_14] (rows=66 width=186)
+ Group By Operator [GBY_14] (rows=65 width=186)
Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1
<-Reducer 2 [GROUP]
GROUP [RS_13]
PartitionCols:_col0, _col1
- Group By Operator [GBY_12] (rows=66 width=186)
+ Group By Operator [GBY_12] (rows=65 width=186)
Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col0, _col1
- Join Operator [JOIN_10] (rows=133 width=178)
+ Join Operator [JOIN_10] (rows=131 width=178)
Output:["_col0","_col1"],condition map:[{"":"{\"type\":\"Left Semi\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0","1":"_col0"}
<-Map 1 [PARTITION-LEVEL SORT]
PARTITION-LEVEL SORT [RS_8]
@@ -2634,7 +2634,7 @@ Stage-0
PartitionCols:_col0
Group By Operator [GBY_10] (rows=16 width=94)
Output:["_col0","_col1"],aggregations:["count()"],keys:_col0
- Join Operator [JOIN_8] (rows=40 width=86)
+ Join Operator [JOIN_8] (rows=39 width=86)
Output:["_col0"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0","1":"_col0"}
<-Map 1 [PARTITION-LEVEL SORT]
PARTITION-LEVEL SORT [RS_6]
@@ -2695,7 +2695,7 @@ Stage-0
PartitionCols:_col0
Group By Operator [GBY_10] (rows=16 width=94)
Output:["_col0","_col1"],aggregations:["count()"],keys:_col0
- Join Operator [JOIN_8] (rows=40 width=86)
+ Join Operator [JOIN_8] (rows=39 width=86)
Output:["_col0"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0","1":"_col0"}
<-Map 1 [PARTITION-LEVEL SORT]
PARTITION-LEVEL SORT [RS_6]
@@ -2755,7 +2755,7 @@ Stage-0
PartitionCols:_col0
Group By Operator [GBY_10] (rows=16 width=94)
Output:["_col0","_col1"],aggregations:["count()"],keys:_col0
- Map Join Operator [MAPJOIN_22] (rows=40 width=86)
+ Map Join Operator [MAPJOIN_22] (rows=39 width=86)
Conds:SEL_5._col0=SEL_5._col0(Inner),Output:["_col0"]
<-Select Operator [SEL_5] (rows=500 width=87)
Output:["_col0"]
@@ -2807,16 +2807,16 @@ Stage-0
GROUP [RS_18]
Group By Operator [GBY_17] (rows=1 width=16)
Output:["_col0","_col1"],aggregations:["sum(_col0)","sum(_col1)"]
- Select Operator [SEL_15] (rows=10 width=94)
+ Select Operator [SEL_15] (rows=9 width=94)
Output:["_col0","_col1"]
- Group By Operator [GBY_14] (rows=10 width=94)
+ Group By Operator [GBY_14] (rows=9 width=94)
Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0
<-Reducer 2 [GROUP]
GROUP [RS_13]
PartitionCols:_col0
- Group By Operator [GBY_12] (rows=10 width=94)
+ Group By Operator [GBY_12] (rows=9 width=94)
Output:["_col0","_col1"],aggregations:["count()"],keys:_col0
- Join Operator [JOIN_10] (rows=20 width=86)
+ Join Operator [JOIN_10] (rows=19 width=86)
Output:["_col0"],condition map:[{"":"{\"type\":\"Left Semi\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0","1":"_col0"}
<-Map 1 [PARTITION-LEVEL SORT]
PARTITION-LEVEL SORT [RS_8]
@@ -5215,9 +5215,9 @@ Stage-2
Reducer 2
File Output Operator [FS_11]
table:{"name:":"default.dest_j1_n14"}
- Select Operator [SEL_9] (rows=809 width=95)
+ Select Operator [SEL_9] (rows=791 width=95)
Output:["_col0","_col1"]
- Join Operator [JOIN_8] (rows=809 width=178)
+ Join Operator [JOIN_8] (rows=791 width=178)
Output:["_col0","_col2"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0","1":"_col0"}
<-Map 1 [PARTITION-LEVEL SORT]
PARTITION-LEVEL SORT [RS_6]
@@ -5241,7 +5241,7 @@ Stage-2
Map 4
File Output Operator [FS_11]
table:{"name:":"default.dest_j1_n14"}
- Select Operator [SEL_9] (rows=809 width=95)
+ Select Operator [SEL_9] (rows=791 width=95)
Output:["_col0","_col1"]
Map Join Operator [MAPJOIN_16]
Conds:TS_14.reducesinkkey0=TS_14.reducesinkkey0(Inner),Output:["_col0","_col2"]
@@ -5496,7 +5496,7 @@ Stage-0
Stage-1
Reducer 2
File Output Operator [FS_10]
- Join Operator [JOIN_8] (rows=809 width=356)
+ Join Operator [JOIN_8] (rows=791 width=356)
Output:["_col0","_col1","_col2","_col3"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0","1":"_col0"}
<-Map 1 [PARTITION-LEVEL SORT]
PARTITION-LEVEL SORT [RS_6]
http://git-wip-us.apache.org/repos/asf/hive/blob/13960aa9/ql/src/test/results/clientpositive/tez/explainanalyze_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_1.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_1.q.out
index 070fca7..c253fd2 100644
--- a/ql/src/test/results/clientpositive/tez/explainanalyze_1.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainanalyze_1.q.out
@@ -251,7 +251,7 @@ Stage-0
Stage-1
Reducer 2
File Output Operator [FS_5]
- Group By Operator [GBY_3] (rows=309/309 width=95)
+ Group By Operator [GBY_3] (rows=316/309 width=95)
Output:["_col0","_col1"],aggregations:["count(KEY._col0)"],keys:KEY._col0
<-Map 1 [SIMPLE_EDGE]
SHUFFLE [RS_2]
@@ -298,7 +298,7 @@ Stage-0
Output:["_col0"],aggregations:["count()"]
<-Reducer 2 [CUSTOM_SIMPLE_EDGE]
PARTITION_ONLY_SHUFFLE [RS_10]
- Merge Join Operator [MERGEJOIN_18] (rows=267/0 width=8)
+ Merge Join Operator [MERGEJOIN_18] (rows=262/0 width=8)
Conds:RS_6._col0=RS_7._col0(Inner)
<-Map 1 [SIMPLE_EDGE]
SHUFFLE [RS_6]
@@ -347,7 +347,7 @@ Stage-0
Output:["_col0"],aggregations:["count()"]
<-Reducer 2 [CUSTOM_SIMPLE_EDGE]
PARTITION_ONLY_SHUFFLE [RS_10]
- Merge Join Operator [MERGEJOIN_18] (rows=267/1019 width=8)
+ Merge Join Operator [MERGEJOIN_18] (rows=262/1019 width=8)
Conds:RS_6._col0=RS_7._col0(Inner)
<-Map 1 [SIMPLE_EDGE]
SHUFFLE [RS_6]
@@ -451,9 +451,9 @@ Stage-0
Stage-1
Reducer 2
File Output Operator [FS_10]
- Select Operator [SEL_9] (rows=809/1028 width=178)
+ Select Operator [SEL_9] (rows=791/1028 width=178)
Output:["_col0","_col1"]
- Merge Join Operator [MERGEJOIN_15] (rows=809/1028 width=178)
+ Merge Join Operator [MERGEJOIN_15] (rows=791/1028 width=178)
Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col0","_col2"]
<-Map 1 [SIMPLE_EDGE]
SHUFFLE [RS_6]
@@ -513,9 +513,9 @@ Stage-0
Stage-1
Reducer 2
File Output Operator [FS_9]
- Transform Operator [SCR_8] (rows=809/1028 width=178)
+ Transform Operator [SCR_8] (rows=791/1028 width=178)
command:cat
- Merge Join Operator [MERGEJOIN_14] (rows=809/1028 width=178)
+ Merge Join Operator [MERGEJOIN_14] (rows=791/1028 width=178)
Conds:RS_3.key=RS_5.key(Inner),Output:["_col0","_col1"]
<-Map 1 [SIMPLE_EDGE]
SHUFFLE [RS_3]
http://git-wip-us.apache.org/repos/asf/hive/blob/13960aa9/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out
index bccfa04..42bad01 100644
--- a/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out
@@ -847,9 +847,9 @@ Stage-0
Stage-1
Map 2 vectorized
File Output Operator [FS_34]
- Select Operator [SEL_33] (rows=391/480 width=186)
+ Select Operator [SEL_33] (rows=399/480 width=186)
Output:["_col0","_col1","_col2"]
- Map Join Operator [MAPJOIN_32] (rows=391/480 width=186)
+ Map Join Operator [MAPJOIN_32] (rows=399/480 width=186)
BucketMapJoin:true,Conds:RS_29._col0=SEL_31._col0(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col3"]
<-Map 1 [CUSTOM_EDGE] vectorized
MULTICAST [RS_29]
http://git-wip-us.apache.org/repos/asf/hive/blob/13960aa9/ql/src/test/results/clientpositive/tez/explainanalyze_4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_4.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_4.q.out
index 5c17512..9d14557 100644
--- a/ql/src/test/results/clientpositive/tez/explainanalyze_4.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainanalyze_4.q.out
@@ -44,11 +44,11 @@ Stage-0
Stage-1
Reducer 3
File Output Operator [FS_12]
- Select Operator [SEL_11] (rows=2076/10 width=553)
+ Select Operator [SEL_11] (rows=2048/10 width=552)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"]
<-Reducer 2 [SIMPLE_EDGE]
SHUFFLE [RS_10]
- Merge Join Operator [MERGEJOIN_17] (rows=2076/10 width=553)
+ Merge Join Operator [MERGEJOIN_17] (rows=2048/10 width=552)
Conds:RS_6._col2=RS_7._col2(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"]
<-Map 1 [SIMPLE_EDGE]
SHUFFLE [RS_6]
@@ -143,7 +143,7 @@ Stage-0
Output:["_col0"],aggregations:["count()"]
<-Reducer 2 [CUSTOM_SIMPLE_EDGE]
PARTITION_ONLY_SHUFFLE [RS_10]
- Merge Join Operator [MERGEJOIN_18] (rows=2076/10 width=8)
+ Merge Join Operator [MERGEJOIN_18] (rows=2048/10 width=8)
Conds:RS_6._col0=RS_7._col0(Inner)
<-Map 1 [SIMPLE_EDGE]
SHUFFLE [RS_6]
@@ -232,16 +232,16 @@ Stage-0
Stage-1
Reducer 4
File Output Operator [FS_15]
- Select Operator [SEL_14] (rows=623/5 width=11)
+ Select Operator [SEL_14] (rows=631/5 width=11)
Output:["_col0","_col1"]
<-Reducer 3 [SIMPLE_EDGE]
SHUFFLE [RS_13]
- Group By Operator [GBY_11] (rows=623/5 width=11)
+ Group By Operator [GBY_11] (rows=631/5 width=11)
Output:["_col0","_col1"],aggregations:["count()"],keys:KEY._col0
<-Reducer 2 [SIMPLE_EDGE]
SHUFFLE [RS_10]
PartitionCols:_col0
- Merge Join Operator [MERGEJOIN_20] (rows=2076/10 width=3)
+ Merge Join Operator [MERGEJOIN_20] (rows=2048/10 width=3)
Conds:RS_6._col1=RS_7._col0(Inner),Output:["_col0"]
<-Map 1 [SIMPLE_EDGE]
SHUFFLE [RS_6]
http://git-wip-us.apache.org/repos/asf/hive/blob/13960aa9/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out
index fd71c0c..75f29fa 100644
--- a/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out
@@ -114,9 +114,9 @@ Stage-3
<-Reducer 4 [CUSTOM_SIMPLE_EDGE]
File Output Operator [FS_19]
table:{"name:":"default.src_multi2_n7"}
- Select Operator [SEL_18] (rows=849/508 width=178)
+ Select Operator [SEL_18] (rows=830/508 width=178)
Output:["_col0","_col1"]
- Merge Join Operator [MERGEJOIN_26] (rows=849/508 width=178)
+ Merge Join Operator [MERGEJOIN_26] (rows=830/508 width=178)
Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col0","_col3"]
<-Map 7 [SIMPLE_EDGE]
SHUFFLE [RS_16]
@@ -154,7 +154,7 @@ Stage-3
TableScan [TS_3] (rows=25/25 width=175)
Output:["key","value"]
PARTITION_ONLY_SHUFFLE [RS_2]
- Select Operator [SEL_1] (rows=849/508 width=178)
+ Select Operator [SEL_1] (rows=830/508 width=178)
Output:["key","value"]
Please refer to the previous Select Operator [SEL_18]
http://git-wip-us.apache.org/repos/asf/hive/blob/13960aa9/ql/src/test/results/clientpositive/tez/explainuser_3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/explainuser_3.q.out b/ql/src/test/results/clientpositive/tez/explainuser_3.q.out
index 8b7b11d..d993905 100644
--- a/ql/src/test/results/clientpositive/tez/explainuser_3.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainuser_3.q.out
@@ -675,9 +675,9 @@ Stage-0
Stage-1
Map 2 vectorized
File Output Operator [FS_34]
- Select Operator [SEL_33] (rows=391 width=186)
+ Select Operator [SEL_33] (rows=399 width=186)
Output:["_col0","_col1","_col2"]
- Map Join Operator [MAPJOIN_32] (rows=391 width=186)
+ Map Join Operator [MAPJOIN_32] (rows=399 width=186)
BucketMapJoin:true,Conds:RS_29._col0=SEL_31._col0(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col3"]
<-Map 1 [CUSTOM_EDGE] vectorized
MULTICAST [RS_29]
http://git-wip-us.apache.org/repos/asf/hive/blob/13960aa9/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out b/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out
index 5b9149c..910a812 100644
--- a/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out
+++ b/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out
@@ -56,7 +56,7 @@ STAGE PLANS:
1 _col0 (type: int)
input vertices:
1 Map 3
- Statistics: Num rows: 25057 Data size: 200456 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 24737 Data size: 197896 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
mode: hash
@@ -175,7 +175,7 @@ STAGE PLANS:
1 _col0 (type: int)
input vertices:
1 Map 3
- Statistics: Num rows: 25057 Data size: 200456 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 24737 Data size: 197896 Basic stats: COMPLETE Column stats: COMPLETE
HybridGraceHashJoin: true
Group By Operator
aggregations: count()
@@ -293,7 +293,7 @@ STAGE PLANS:
1 _col0 (type: int)
input vertices:
1 Map 3
- Statistics: Num rows: 18702 Data size: 149616 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 18464 Data size: 147712 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
mode: hash
@@ -408,7 +408,7 @@ STAGE PLANS:
1 _col0 (type: int)
input vertices:
1 Map 3
- Statistics: Num rows: 18702 Data size: 149616 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 18464 Data size: 147712 Basic stats: COMPLETE Column stats: COMPLETE
HybridGraceHashJoin: true
Group By Operator
aggregations: count()
@@ -521,7 +521,7 @@ STAGE PLANS:
1 _col0 (type: int)
input vertices:
1 Map 3
- Statistics: Num rows: 25057 Data size: 200456 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 24737 Data size: 197896 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
mode: hash
@@ -630,7 +630,7 @@ STAGE PLANS:
1 _col0 (type: int)
input vertices:
1 Map 3
- Statistics: Num rows: 25057 Data size: 200456 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 24737 Data size: 197896 Basic stats: COMPLETE Column stats: COMPLETE
HybridGraceHashJoin: true
Group By Operator
aggregations: count()
http://git-wip-us.apache.org/repos/asf/hive/blob/13960aa9/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_2.q.out b/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_2.q.out
index 3bacb4a..a3a77f9 100644
--- a/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_2.q.out
+++ b/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_2.q.out
@@ -72,7 +72,7 @@ STAGE PLANS:
input vertices:
0 Map 1
2 Map 4
- Statistics: Num rows: 261 Data size: 2088 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
mode: hash
@@ -200,7 +200,7 @@ STAGE PLANS:
input vertices:
0 Map 1
2 Map 4
- Statistics: Num rows: 261 Data size: 2088 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
HybridGraceHashJoin: true
Group By Operator
aggregations: count()
@@ -343,7 +343,7 @@ STAGE PLANS:
0 Map 1
2 Map 4
3 Map 5
- Statistics: Num rows: 1694 Data size: 13552 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1584 Data size: 12672 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
mode: hash
@@ -492,7 +492,7 @@ STAGE PLANS:
0 Map 1
2 Map 4
3 Map 5
- Statistics: Num rows: 1694 Data size: 13552 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1584 Data size: 12672 Basic stats: COMPLETE Column stats: COMPLETE
HybridGraceHashJoin: true
Group By Operator
aggregations: count()
@@ -671,7 +671,7 @@ STAGE PLANS:
input vertices:
0 Map 1
2 Map 6
- Statistics: Num rows: 261 Data size: 2088 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
mode: hash
@@ -729,7 +729,7 @@ STAGE PLANS:
input vertices:
0 Map 7
2 Map 10
- Statistics: Num rows: 261 Data size: 2088 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 265 Data size: 2120 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
mode: hash
@@ -915,7 +915,7 @@ STAGE PLANS:
input vertices:
0 Map 1
2 Map 6
- Statistics: Num rows: 261 Data size: 2088 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
HybridGraceHashJoin: true
Group By Operator
aggregations: count()
@@ -974,7 +974,7 @@ STAGE PLANS:
input vertices:
0 Map 7
2 Map 10
- Statistics: Num rows: 261 Data size: 2088 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 265 Data size: 2120 Basic stats: COMPLETE Column stats: COMPLETE
HybridGraceHashJoin: true
Group By Operator
aggregations: count()
@@ -1157,7 +1157,7 @@ STAGE PLANS:
input vertices:
0 Map 1
2 Map 4
- Statistics: Num rows: 20 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 19 Data size: 1691 Basic stats: COMPLETE Column stats: COMPLETE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -1169,7 +1169,7 @@ STAGE PLANS:
input vertices:
1 Map 5
2 Map 6
- Statistics: Num rows: 204 Data size: 1632 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 196 Data size: 1568 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
mode: hash
@@ -1347,7 +1347,7 @@ STAGE PLANS:
input vertices:
0 Map 1
2 Map 4
- Statistics: Num rows: 20 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 19 Data size: 1691 Basic stats: COMPLETE Column stats: COMPLETE
HybridGraceHashJoin: true
Map Join Operator
condition map:
@@ -1360,7 +1360,7 @@ STAGE PLANS:
input vertices:
1 Map 5
2 Map 6
- Statistics: Num rows: 204 Data size: 1632 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 196 Data size: 1568 Basic stats: COMPLETE Column stats: COMPLETE
HybridGraceHashJoin: true
Group By Operator
aggregations: count()
http://git-wip-us.apache.org/repos/asf/hive/blob/13960aa9/ql/src/test/results/clientpositive/tez/tez-tag.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/tez-tag.q.out b/ql/src/test/results/clientpositive/tez/tez-tag.q.out
index 55ce485..cf96067 100644
--- a/ql/src/test/results/clientpositive/tez/tez-tag.q.out
+++ b/ql/src/test/results/clientpositive/tez/tez-tag.q.out
@@ -190,7 +190,7 @@ Stage-0
PARTITION_ONLY_SHUFFLE [RS_17]
Group By Operator [GBY_16] (rows=1 width=8)
Output:["_col0"],aggregations:["count()"]
- Merge Join Operator [MERGEJOIN_30] (rows=63 width=8)
+ Merge Join Operator [MERGEJOIN_30] (rows=64 width=8)
Conds:RS_12._col0=RS_13._col0(Inner)
<-Map 6 [SIMPLE_EDGE]
SHUFFLE [RS_13]
http://git-wip-us.apache.org/repos/asf/hive/blob/13960aa9/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimatorFactory.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimatorFactory.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimatorFactory.java
index 4e4dfb7..b630fa3 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimatorFactory.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimatorFactory.java
@@ -25,6 +25,7 @@ import java.util.Arrays;
import org.apache.hadoop.hive.common.ndv.fm.FMSketch;
import org.apache.hadoop.hive.common.ndv.fm.FMSketchUtils;
import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog;
+import org.apache.hadoop.hive.common.ndv.hll.HyperLogLogUtils;
public class NumDistinctValueEstimatorFactory {
@@ -44,7 +45,7 @@ public class NumDistinctValueEstimatorFactory {
if (isFMSketch(buf)) {
return FMSketchUtils.deserializeFM(buf);
} else {
- return HyperLogLog.builder().build().deserialize(buf);
+ return HyperLogLogUtils.deserializeHLL(buf);
}
} catch (IOException e) {
throw new RuntimeException(e);
@@ -56,7 +57,7 @@ public class NumDistinctValueEstimatorFactory {
if (n instanceof FMSketch) {
return new FMSketch(((FMSketch) n).getNumBitVectors());
} else {
- return HyperLogLog.builder().build();
+ return HyperLogLog.builder().setSizeOptimized().build();
}
}
@@ -65,7 +66,7 @@ public class NumDistinctValueEstimatorFactory {
if ("fm".equals(func.toLowerCase())) {
return new FMSketch(numBitVectors);
} else if ("hll".equals(func.toLowerCase())) {
- return HyperLogLog.builder().build();
+ return HyperLogLog.builder().setSizeOptimized().build();
} else {
throw new RuntimeException("Can not recognize " + func);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/13960aa9/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLDenseRegister.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLDenseRegister.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLDenseRegister.java
index 12897fc..422bfbe 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLDenseRegister.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLDenseRegister.java
@@ -62,6 +62,31 @@ public class HLLDenseRegister implements HLLRegister {
return set(registerIdx, (byte) lr);
}
+ // this is a lossy invert of the function above, which produces a hashcode
+ // which collides with the current winner of the register (we lose all higher
+ // bits, but we get all bits useful for lesser p-bit options)
+
+ // +-------------|-------------+
+ // |xxxx100000000|1000000000000| (lr=9 + idx=1024)
+ // +-------------|-------------+
+ // \
+ // +---------------|-----------+
+ // |xxxx10000000010|00000000000| (lr=2 + idx=0)
+ // +---------------|-----------+
+
+ // This shows the relevant bits of the original hash value
+ // and how the conversion is moving bits from the index value
+ // over to the leading zero computation
+
+ public void extractLowBitsTo(HLLRegister dest) {
+ for (int idx = 0; idx < register.length; idx++) {
+ byte lr = register[idx]; // this can be a max of 65, never > 127
+ if (lr != 0) {
+ dest.add((long) ((1 << (p + lr - 1)) | idx));
+ }
+ }
+ }
+
public boolean set(int idx, byte value) {
boolean updated = false;
if (idx < register.length && value > register[idx]) {
http://git-wip-us.apache.org/repos/asf/hive/blob/13960aa9/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java
index d62b858..d5ac54a 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.common.ndv.hll;
import java.util.Map;
+import java.util.Map.Entry;
import java.util.TreeMap;
public class HLLSparseRegister implements HLLRegister {
@@ -187,6 +188,18 @@ public class HLLSparseRegister implements HLLRegister {
return sparseMap;
}
+ // this is effectively the same as the dense register impl.
+ public void extractLowBitsTo(HLLRegister dest) {
+ for (Entry<Integer, Byte> entry : getSparseMap().entrySet()) {
+ int idx = entry.getKey();
+ byte lr = entry.getValue(); // this can be a max of 65, never > 127
+ if (lr != 0) {
+ // should be a no-op for sparse
+ dest.add((long) ((1 << (p + lr - 1)) | idx));
+ }
+ }
+ }
+
public int getP() {
return p;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/13960aa9/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
index a3cc989..91a6865 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
@@ -18,10 +18,8 @@
package org.apache.hadoop.hive.common.ndv.hll;
-import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
-import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.util.Map;
@@ -160,6 +158,13 @@ public class HyperLogLog implements NumDistinctValueEstimator {
return this;
}
+ public HyperLogLogBuilder setSizeOptimized() {
+ // allowing this to be increased via config breaks the merge impl
+ // p=10 = ~1kb per vector or smaller
+ this.numRegisterIndexBits = 10;
+ return this;
+ }
+
public HyperLogLogBuilder setEncoding(EncodingType enc) {
this.encoding = enc;
return this;
@@ -431,12 +436,23 @@ public class HyperLogLog implements NumDistinctValueEstimator {
* @throws IllegalArgumentException
*/
public void merge(HyperLogLog hll) {
- if (p != hll.p || chosenHashBits != hll.chosenHashBits) {
+ if (chosenHashBits != hll.chosenHashBits) {
throw new IllegalArgumentException(
"HyperLogLog cannot be merged as either p or hashbits are different. Current: "
+ toString() + " Provided: " + hll.toString());
}
+ if (p > hll.p) {
+ throw new IllegalArgumentException(
+ "HyperLogLog cannot merge a smaller p into a larger one : "
+ + toString() + " Provided: " + hll.toString());
+ }
+
+ if (p != hll.p) {
+ // invariant: p > hll.p
+ hll = hll.squash(p);
+ }
+
EncodingType otherEncoding = hll.getEncoding();
if (encoding.equals(EncodingType.SPARSE) && otherEncoding.equals(EncodingType.SPARSE)) {
@@ -464,7 +480,37 @@ public class HyperLogLog implements NumDistinctValueEstimator {
}
/**
- * Converts sparse to dense hll register
+ * Reduces the accuracy of the HLL provided to a smaller size
+ * @param p0
+ * - new p size for the new HyperLogLog (smaller or no change)
+ * @return reduced (or same) HyperLogLog instance
+ */
+ public HyperLogLog squash(final int p0) {
+ if (p0 > p) {
+ throw new IllegalArgumentException(
+ "HyperLogLog cannot be be squashed to be bigger. Current: "
+ + toString() + " Provided: " + p0);
+ }
+
+ if (p0 == p) {
+ return this;
+ }
+
+ final HyperLogLog hll = new HyperLogLogBuilder()
+ .setNumRegisterIndexBits(p0).setEncoding(EncodingType.DENSE)
+ .enableNoBias(noBias).build();
+ final HLLDenseRegister result = hll.denseRegister;
+
+ if (encoding == EncodingType.SPARSE) {
+ sparseRegister.extractLowBitsTo(result);
+ } else if (encoding == EncodingType.DENSE) {
+ denseRegister.extractLowBitsTo(result);
+ }
+ return hll;
+ }
+
+ /**
+ * Converts sparse to dense hll register.
* @param sparseRegister
* - sparse register to be converted
* @return converted dense register
@@ -576,14 +622,7 @@ public class HyperLogLog implements NumDistinctValueEstimator {
@Override
public NumDistinctValueEstimator deserialize(byte[] buf) {
- InputStream is = new ByteArrayInputStream(buf);
- try {
- HyperLogLog result = HyperLogLogUtils.deserializeHLL(is);
- is.close();
- return result;
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
+ return HyperLogLogUtils.deserializeHLL(buf);
}
@Override
http://git-wip-us.apache.org/repos/asf/hive/blob/13960aa9/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLogUtils.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLogUtils.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLogUtils.java
index 4e6510b..aeba2e9 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLogUtils.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLogUtils.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.hive.common.ndv.hll;
+import java.io.ByteArrayInputStream;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
@@ -126,7 +127,7 @@ public class HyperLogLogUtils {
}
/**
- * Refer serializeHLL() for format of serialization. This funtions
+ * Refer serializeHLL() for format of serialization. This function
* deserializes the serialized hyperloglogs
* @param in
* - input stream
@@ -198,6 +199,22 @@ public class HyperLogLogUtils {
return result;
}
+ /**
+ * This function deserializes the serialized hyperloglogs from a byte array.
+ * @param buf - to deserialize
+ * @return HyperLogLog
+ */
+ public static HyperLogLog deserializeHLL(final byte[] buf) {
+ InputStream is = new ByteArrayInputStream(buf); // TODO: use faster non-sync inputstream
+ try {
+ HyperLogLog result = deserializeHLL(is);
+ is.close();
+ return result;
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
private static void bitpackHLLRegister(OutputStream out, byte[] register, int bitWidth)
throws IOException {
int bitsLeft = 8;
http://git-wip-us.apache.org/repos/asf/hive/blob/13960aa9/standalone-metastore/src/test/java/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLog.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/test/java/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLog.java b/standalone-metastore/src/test/java/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLog.java
index 617d9c3..e014fb5 100644
--- a/standalone-metastore/src/test/java/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLog.java
+++ b/standalone-metastore/src/test/java/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLog.java
@@ -37,14 +37,18 @@ public class TestHyperLogLog {
HyperLogLog hll3 = HyperLogLog.builder().setEncoding(EncodingType.DENSE).build();
HyperLogLog hll4 = HyperLogLog.builder().setNumRegisterIndexBits(16)
.setEncoding(EncodingType.DENSE).build();
+ HyperLogLog hll5 = HyperLogLog.builder().setNumRegisterIndexBits(12)
+ .setEncoding(EncodingType.DENSE).build();
int size = 1000;
for (int i = 0; i < size; i++) {
hll.addLong(i);
hll2.addLong(size + i);
hll3.addLong(2 * size + i);
+ hll4.addLong(3 * size + i);
}
double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance;
double delta = threshold * size / 100;
+ double delta4 = threshold * (4*size) / 100;
assertEquals((double) size, (double) hll.count(), delta);
assertEquals((double) size, (double) hll2.count(), delta);
@@ -63,8 +67,13 @@ public class TestHyperLogLog {
assertEquals((double) 3 * size, (double) hll.count(), delta);
assertEquals(EncodingType.DENSE, hll.getEncoding());
- // invalid merge -- register set size doesn't match
+ // valid merge -- register set size gets bigger (also 4k items
hll.merge(hll4);
+ assertEquals((double) 4 * size, (double) hll.count(), delta4);
+ assertEquals(EncodingType.DENSE, hll.getEncoding());
+
+ // invalid merge -- smaller register merge to bigger
+ hll.merge(hll5);
}
@Test(expected = IllegalArgumentException.class)
@@ -74,14 +83,18 @@ public class TestHyperLogLog {
HyperLogLog hll3 = HyperLogLog.builder().setEncoding(EncodingType.SPARSE).build();
HyperLogLog hll4 = HyperLogLog.builder().setNumRegisterIndexBits(16)
.setEncoding(EncodingType.SPARSE).build();
+ HyperLogLog hll5 = HyperLogLog.builder().setNumRegisterIndexBits(12)
+ .setEncoding(EncodingType.SPARSE).build();
int size = 500;
for (int i = 0; i < size; i++) {
hll.addLong(i);
hll2.addLong(size + i);
hll3.addLong(2 * size + i);
+ hll4.addLong(3 * size + i);
}
double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance;
double delta = threshold * size / 100;
+ double delta4 = threshold * (4*size) / 100;
assertEquals((double) size, (double) hll.count(), delta);
assertEquals((double) size, (double) hll2.count(), delta);
@@ -100,8 +113,13 @@ public class TestHyperLogLog {
assertEquals((double) 3 * size, (double) hll.count(), delta);
assertEquals(EncodingType.SPARSE, hll.getEncoding());
- // invalid merge -- register set size doesn't match
+ // valid merge -- register set size gets bigger & dense automatically
hll.merge(hll4);
+ assertEquals((double) 4 * size, (double) hll.count(), delta4);
+ assertEquals(EncodingType.DENSE, hll.getEncoding());
+
+ // invalid merge -- smaller register merge to bigger
+ hll.merge(hll5);
}
@Test(expected = IllegalArgumentException.class)
@@ -111,11 +129,14 @@ public class TestHyperLogLog {
HyperLogLog hll3 = HyperLogLog.builder().setEncoding(EncodingType.DENSE).build();
HyperLogLog hll4 = HyperLogLog.builder().setNumRegisterIndexBits(16)
.setEncoding(EncodingType.DENSE).build();
+ HyperLogLog hll5 = HyperLogLog.builder().setNumRegisterIndexBits(12)
+ .setEncoding(EncodingType.DENSE).build();
int size = 1000;
for (int i = 0; i < size; i++) {
hll.addLong(i);
hll2.addLong(size + i);
hll3.addLong(2 * size + i);
+ hll4.addLong(3 * size + i);
}
double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance;
double delta = threshold * size / 100;
@@ -137,8 +158,13 @@ public class TestHyperLogLog {
assertEquals((double) 3 * size, (double) hll.count(), delta);
assertEquals(EncodingType.DENSE, hll.getEncoding());
- // invalid merge -- register set size doesn't match
- hll.merge(hll4);
+ // merge should convert hll2 to DENSE
+ hll2.merge(hll4);
+ assertEquals((double) 2 * size, (double) hll2.count(), delta);
+ assertEquals(EncodingType.DENSE, hll2.getEncoding());
+
+ // invalid merge -- smaller register merge to bigger
+ hll.merge(hll5);
}
@Test(expected = IllegalArgumentException.class)
@@ -148,11 +174,14 @@ public class TestHyperLogLog {
HyperLogLog hll3 = HyperLogLog.builder().setEncoding(EncodingType.SPARSE).build();
HyperLogLog hll4 = HyperLogLog.builder().setNumRegisterIndexBits(16)
.setEncoding(EncodingType.SPARSE).build();
+ HyperLogLog hll5 = HyperLogLog.builder().setNumRegisterIndexBits(12)
+ .setEncoding(EncodingType.SPARSE).build();
int size = 1000;
for (int i = 0; i < size; i++) {
hll.addLong(i);
hll2.addLong(size + i);
hll3.addLong(2 * size + i);
+ hll4.addLong(3 * size + i);
}
double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance;
double delta = threshold * size / 100;
@@ -174,8 +203,14 @@ public class TestHyperLogLog {
assertEquals((double) 3 * size, (double) hll.count(), delta);
assertEquals(EncodingType.DENSE, hll.getEncoding());
- // invalid merge -- register set size doesn't match
- hll.merge(hll4);
+ // merge should convert hll3 to DENSE
+ hll3.merge(hll4);
+ assertEquals((double) 2 * size, (double) hll3.count(), delta);
+ assertEquals(EncodingType.DENSE, hll3.getEncoding());
+
+ // invalid merge -- smaller register merge to bigger
+ hll.merge(hll5);
+
}
@Test(expected = IllegalArgumentException.class)
@@ -185,11 +220,14 @@ public class TestHyperLogLog {
HyperLogLog hll3 = HyperLogLog.builder().setEncoding(EncodingType.SPARSE).build();
HyperLogLog hll4 = HyperLogLog.builder().setNumRegisterIndexBits(16)
.setEncoding(EncodingType.SPARSE).build();
+ HyperLogLog hll5 = HyperLogLog.builder().setNumRegisterIndexBits(12)
+ .setEncoding(EncodingType.SPARSE).build();
int size = 1000;
for (int i = 0; i < size; i++) {
hll.addLong(i);
hll2.addLong(size + i);
hll3.addLong(2 * size + i);
+ hll4.addLong(3 * size + i);
}
double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance;
double delta = threshold * size / 100;
@@ -211,8 +249,13 @@ public class TestHyperLogLog {
assertEquals((double) 3 * size, (double) hll.count(), delta);
assertEquals(EncodingType.DENSE, hll.getEncoding());
- // invalid merge -- register set size doesn't match
- hll.merge(hll4);
+ // merge should convert hll2 to DENSE
+ hll2.merge(hll4);
+ assertEquals((double) 2 * size, (double) hll2.count(), delta);
+ assertEquals(EncodingType.DENSE, hll2.getEncoding());
+
+ // invalid merge -- smaller register merge to bigger
+ hll.merge(hll5);
}
@Test
@@ -227,4 +270,69 @@ public class TestHyperLogLog {
double delta = threshold * size / 100;
assertEquals((double) size, (double) hll.count(), delta);
}
+
+ @Test
+ public void testHLLSquash() {
+
+ int[] sizes = new int[] { 500, 1000, 2300, 4096};
+ int minBits = 9;
+ for (final int size : sizes) {
+
+ HyperLogLog hlls[] = new HyperLogLog[16];
+ for (int k = minBits; k < hlls.length; k++) {
+ final HyperLogLog hll = HyperLogLog.builder()
+ .setEncoding(EncodingType.DENSE).setNumRegisterIndexBits(k).build();
+ for (int i = 0; i < size; i++) {
+ hll.addLong(i);
+ }
+ hlls[k] = hll;
+ }
+
+ for (int k = minBits; k < hlls.length; k++) {
+ for (int j = k + 1; j < hlls.length; j++) {
+ final HyperLogLog large = hlls[j];
+ final HyperLogLog small = hlls[k];
+ final HyperLogLog mush = large
+ .squash(small.getNumRegisterIndexBits());
+ assertEquals(small.count(), mush.count(), 0);
+ double delta = Math.ceil(small.getStandardError()*size);
+ assertEquals((double) size, (double) mush.count(), delta);
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testHLLDenseDenseSquash() {
+ HyperLogLog p14HLL = HyperLogLog.builder().setEncoding(EncodingType.DENSE).setNumRegisterIndexBits(14).build();
+ HyperLogLog p10HLL = HyperLogLog.builder().setEncoding(EncodingType.DENSE).setNumRegisterIndexBits(10).build();
+ int size = 1_000_000;
+ for (int i = 0; i < size; i++) {
+ p14HLL.addLong(i);
+ }
+
+ for (int i = 0; i < 10_000; i++) {
+ p10HLL.addLong(i);
+ }
+
+ p14HLL.squash(p10HLL.getNumRegisterIndexBits());
+ assertEquals((double) size, p14HLL.count(), longRangeTolerance * size / 100.0);
+ }
+
+ @Test
+ public void testHLLSparseDenseSquash() {
+ HyperLogLog p14HLL = HyperLogLog.builder().setEncoding(EncodingType.SPARSE).setNumRegisterIndexBits(14).build();
+ HyperLogLog p10HLL = HyperLogLog.builder().setEncoding(EncodingType.DENSE).setNumRegisterIndexBits(10).build();
+ int size = 2000;
+ for (int i = 0; i < size; i++) {
+ p14HLL.addLong(i);
+ }
+
+ for (int i = 0; i < 10_000; i++) {
+ p10HLL.addLong(i);
+ }
+
+ p14HLL.squash(p10HLL.getNumRegisterIndexBits());
+ assertEquals((double) size, p14HLL.count(), longRangeTolerance * size / 100.0);
+ }
}