You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2019/03/09 02:53:29 UTC
[hive] branch master updated: HIVE-20656: Sensible defaults: Map
aggregation memory configs are too aggressive (Prasanth Jayachandran
reviewed by Gopal V)
This is an automated email from the ASF dual-hosted git repository.
prasanthj pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new e7f7fe3 HIVE-20656: Sensible defaults: Map aggregation memory configs are too aggressive (Prasanth Jayachandran reviewed by Gopal V)
e7f7fe3 is described below
commit e7f7fe3b1cf443823a05e5409f55c55475fb5b48
Author: Prasanth Jayachandran <pr...@apache.org>
AuthorDate: Fri Mar 8 18:52:15 2019 -0800
HIVE-20656: Sensible defaults: Map aggregation memory configs are too aggressive (Prasanth Jayachandran reviewed by Gopal V)
---
.../java/org/apache/hadoop/hive/conf/HiveConf.java | 4 ++--
.../results/clientpositive/groupby_position.q.out | 24 +++++++++++-----------
.../perf/tez/constraints/query94.q.out | 4 ++--
.../perf/tez/constraints/query95.q.out | 4 ++--
.../results/clientpositive/perf/tez/query94.q.out | 4 ++--
.../results/clientpositive/perf/tez/query95.q.out | 4 ++--
6 files changed, 22 insertions(+), 22 deletions(-)
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 0dea099..076035b 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1741,14 +1741,14 @@ public class HiveConf extends Configuration {
"How many rows with the same key value should be cached in memory per smb joined table."),
HIVEGROUPBYMAPINTERVAL("hive.groupby.mapaggr.checkinterval", 100000,
"Number of rows after which size of the grouping keys/aggregation classes is performed"),
- HIVEMAPAGGRHASHMEMORY("hive.map.aggr.hash.percentmemory", (float) 0.99,
+ HIVEMAPAGGRHASHMEMORY("hive.map.aggr.hash.percentmemory", (float) 0.5,
"Portion of total memory to be used by map-side group aggregation hash table"),
HIVEMAPJOINFOLLOWEDBYMAPAGGRHASHMEMORY("hive.mapjoin.followby.map.aggr.hash.percentmemory", (float) 0.3,
"Portion of total memory to be used by map-side group aggregation hash table, when this group by is followed by map join"),
HIVEMAPAGGRMEMORYTHRESHOLD("hive.map.aggr.hash.force.flush.memory.threshold", (float) 0.9,
"The max memory to be used by map-side group aggregation hash table.\n" +
"If the memory usage is higher than this number, force to flush data"),
- HIVEMAPAGGRHASHMINREDUCTION("hive.map.aggr.hash.min.reduction", (float) 0.5,
+ HIVEMAPAGGRHASHMINREDUCTION("hive.map.aggr.hash.min.reduction", (float) 0.99,
"Hash aggregation will be turned off if the ratio between hash table size and input rows is bigger than this number. \n" +
"Set to 1 to make sure hash aggregation is never turned off."),
HIVEMULTIGROUPBYSINGLEREDUCER("hive.multigroupby.singlereducer", true,
diff --git a/ql/src/test/results/clientpositive/groupby_position.q.out b/ql/src/test/results/clientpositive/groupby_position.q.out
index 296279a..7305df0 100644
--- a/ql/src/test/results/clientpositive/groupby_position.q.out
+++ b/ql/src/test/results/clientpositive/groupby_position.q.out
@@ -69,7 +69,7 @@ STAGE PLANS:
keys: key (type: string), value (type: string), substr(value, 5) (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 83 Data size: 30710 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 166 Data size: 61420 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
table:
@@ -168,7 +168,7 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 83 Data size: 30710 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 166 Data size: 61420 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized
Reduce Operator Tree:
Group By Operator
@@ -176,14 +176,14 @@ STAGE PLANS:
keys: KEY._col0 (type: string), KEY._col1 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 166 Data size: 30876 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), CAST( _col2 AS STRING) (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 166 Data size: 46314 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 166 Data size: 46314 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -192,7 +192,7 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
outputColumnNames: key, val1, val2
- Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 166 Data size: 46314 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll')
mode: hash
@@ -354,7 +354,7 @@ STAGE PLANS:
keys: value (type: string), key (type: string), substr(value, 5) (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 83 Data size: 30710 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 166 Data size: 61420 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
table:
@@ -453,7 +453,7 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 83 Data size: 30710 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 166 Data size: 61420 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized
Reduce Operator Tree:
Group By Operator
@@ -461,14 +461,14 @@ STAGE PLANS:
keys: KEY._col0 (type: string), KEY._col1 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 166 Data size: 30876 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: UDFToInteger(_col1) (type: int), _col0 (type: string), CAST( _col2 AS STRING) (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 166 Data size: 46314 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 166 Data size: 46314 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -477,7 +477,7 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
outputColumnNames: key, val1, val2
- Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 166 Data size: 46314 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll')
mode: hash
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query94.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query94.q.out
index 1dba4fb..ab688b2 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/query94.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query94.q.out
@@ -103,12 +103,12 @@ Stage-0
PARTITION_ONLY_SHUFFLE [RS_160]
Group By Operator [GBY_159] (rows=1 width=232)
Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"]
- Group By Operator [GBY_158] (rows=2511437 width=228)
+ Group By Operator [GBY_158] (rows=5022875 width=228)
Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0
<-Reducer 6 [SIMPLE_EDGE]
SHUFFLE [RS_73]
PartitionCols:_col0
- Group By Operator [GBY_72] (rows=2511437 width=228)
+ Group By Operator [GBY_72] (rows=5022875 width=228)
Output:["_col0","_col2","_col3"],aggregations:["sum(_col5)","sum(_col6)"],keys:_col4
Select Operator [SEL_41] (rows=5022875 width=229)
Output:["_col4","_col5","_col6"]
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query95.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query95.q.out
index 523fa2c..420cd78 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/query95.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query95.q.out
@@ -117,12 +117,12 @@ Stage-0
PARTITION_ONLY_SHUFFLE [RS_278]
Group By Operator [GBY_277] (rows=1 width=232)
Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"]
- Group By Operator [GBY_276] (rows=2511437 width=228)
+ Group By Operator [GBY_276] (rows=5022875 width=228)
Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0
<-Reducer 6 [SIMPLE_EDGE]
SHUFFLE [RS_109]
PartitionCols:_col0
- Group By Operator [GBY_108] (rows=2511437 width=228)
+ Group By Operator [GBY_108] (rows=5022875 width=228)
Output:["_col0","_col2","_col3"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col3
Merge Join Operator [MERGEJOIN_235] (rows=5022875 width=227)
Conds:RS_55._col3=RS_275._col0(Inner),Output:["_col3","_col4","_col5"]
diff --git a/ql/src/test/results/clientpositive/perf/tez/query94.q.out b/ql/src/test/results/clientpositive/perf/tez/query94.q.out
index e6ac653..fdd2fd0 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query94.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query94.q.out
@@ -103,12 +103,12 @@ Stage-0
PARTITION_ONLY_SHUFFLE [RS_162]
Group By Operator [GBY_161] (rows=1 width=232)
Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"]
- Group By Operator [GBY_160] (rows=2511437 width=228)
+ Group By Operator [GBY_160] (rows=5022875 width=228)
Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0
<-Reducer 6 [SIMPLE_EDGE]
SHUFFLE [RS_74]
PartitionCols:_col0
- Group By Operator [GBY_73] (rows=2511437 width=228)
+ Group By Operator [GBY_73] (rows=5022875 width=228)
Output:["_col0","_col2","_col3"],aggregations:["sum(_col5)","sum(_col6)"],keys:_col4
Select Operator [SEL_42] (rows=5022875 width=229)
Output:["_col4","_col5","_col6"]
diff --git a/ql/src/test/results/clientpositive/perf/tez/query95.q.out b/ql/src/test/results/clientpositive/perf/tez/query95.q.out
index da131d6..0a8c9a9 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query95.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query95.q.out
@@ -117,12 +117,12 @@ Stage-0
PARTITION_ONLY_SHUFFLE [RS_286]
Group By Operator [GBY_285] (rows=1 width=232)
Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"]
- Group By Operator [GBY_284] (rows=2511437 width=228)
+ Group By Operator [GBY_284] (rows=5022875 width=228)
Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0
<-Reducer 6 [SIMPLE_EDGE]
SHUFFLE [RS_115]
PartitionCols:_col0
- Group By Operator [GBY_114] (rows=2511437 width=228)
+ Group By Operator [GBY_114] (rows=5022875 width=228)
Output:["_col0","_col2","_col3"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col3
Merge Join Operator [MERGEJOIN_241] (rows=5022875 width=227)
Conds:RS_61._col3=RS_283._col0(Inner),Output:["_col3","_col4","_col5"]