You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2019/03/26 19:22:40 UTC
[hive] 01/02: HIVE-21496: Automatic sizing of unordered buffer can
overflow (Jesus Camacho Rodriguez, reviewed by Prasanth Jayachandran)
This is an automated email from the ASF dual-hosted git repository.
jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
commit d7ed17e27eda65dc0ea641887b6389bf0aec7e96
Author: Jesus Camacho Rodriguez <jc...@apache.org>
AuthorDate: Mon Mar 25 12:59:14 2019 -0700
HIVE-21496: Automatic sizing of unordered buffer can overflow (Jesus Camacho Rodriguez, reviewed by Prasanth Jayachandran)
---
.../java/org/apache/hadoop/hive/ql/stats/StatsUtils.java | 5 ++---
.../clientpositive/llap/constraints_optimization.q.out | 6 +++---
.../clientpositive/llap/results_cache_temptable.q.out | 8 ++++----
.../clientpositive/perf/tez/constraints/query14.q.out | 14 +++++++-------
ql/src/test/results/clientpositive/perf/tez/query14.q.out | 14 +++++++-------
5 files changed, 23 insertions(+), 24 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index 6149880..46048cd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -1967,10 +1967,7 @@ public class StatsUtils {
if (useColStats) {
List<ColStatistics> colStats = stats.getColumnStats();
for (ColStatistics cs : colStats) {
- long oldNumNulls = cs.getNumNulls();
long oldDV = cs.getCountDistint();
- long newNumNulls = Math.round(ratio * oldNumNulls);
- cs.setNumNulls(newNumNulls);
if (affectedColumns.contains(cs.getColumnName())) {
long newDV = oldDV;
@@ -1987,6 +1984,8 @@ public class StatsUtils {
if (oldDV > newNumRows) {
cs.setCountDistint(newNumRows);
}
+ long newNumNulls = Math.round(ratio * cs.getNumNulls());
+ cs.setNumNulls(newNumNulls > newNumRows ? newNumRows: newNumNulls);
}
stats.setColumnStats(colStats);
long newDataSize = StatsUtils.getDataSizeFromColumnStats(newNumRows, colStats);
diff --git a/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out b/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out
index fbdc702..afcf53f 100644
--- a/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out
+++ b/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out
@@ -601,12 +601,12 @@ STAGE PLANS:
minReductionHashAggr: 0.0
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: bigint), _col1 (type: string)
sort order: ++
Map-reduce partition columns: _col0 (type: bigint), _col1 (type: string)
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE
TopN Hash Memory Usage: 0.1
Execution mode: vectorized, llap
LLAP IO: no inputs
@@ -617,7 +617,7 @@ STAGE PLANS:
keys: KEY._col0 (type: bigint), KEY._col1 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: bigint)
outputColumnNames: _col0
diff --git a/ql/src/test/results/clientpositive/llap/results_cache_temptable.q.out b/ql/src/test/results/clientpositive/llap/results_cache_temptable.q.out
index 4f1e3a7..d6eb82d 100644
--- a/ql/src/test/results/clientpositive/llap/results_cache_temptable.q.out
+++ b/ql/src/test/results/clientpositive/llap/results_cache_temptable.q.out
@@ -316,10 +316,10 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 376 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 376 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: no inputs
@@ -330,10 +330,10 @@ STAGE PLANS:
aggregations: count(VALUE._col0)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 376 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 376 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query14.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query14.q.out
index 1a3aefe..0f48872 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/query14.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query14.q.out
@@ -297,21 +297,21 @@ Stage-0
Stage-1
Reducer 9 vectorized
File Output Operator [FS_1350]
- Limit [LIM_1349] (rows=7 width=192)
+ Limit [LIM_1349] (rows=7 width=212)
Number of rows:100
- Select Operator [SEL_1348] (rows=7 width=192)
+ Select Operator [SEL_1348] (rows=7 width=212)
Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
<-Reducer 8 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_1347]
- Select Operator [SEL_1346] (rows=7 width=192)
+ Select Operator [SEL_1346] (rows=7 width=212)
Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
- Group By Operator [GBY_1345] (rows=7 width=200)
+ Group By Operator [GBY_1345] (rows=7 width=220)
Output:["_col0","_col1","_col2","_col3","_col5","_col6"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4
<-Union 7 [SIMPLE_EDGE]
<-Reducer 16 [CONTAINS]
Reduce Output Operator [RS_1195]
PartitionCols:_col0, _col1, _col2, _col3, _col4
- Group By Operator [GBY_1194] (rows=7 width=200)
+ Group By Operator [GBY_1194] (rows=7 width=220)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L
Top N Key Operator [TNK_1193] (rows=3 width=221)
keys:_col0, _col1, _col2, _col3, 0L,sort order:+++++,top n:100
@@ -683,7 +683,7 @@ Stage-0
<-Reducer 22 [CONTAINS]
Reduce Output Operator [RS_1202]
PartitionCols:_col0, _col1, _col2, _col3, _col4
- Group By Operator [GBY_1201] (rows=7 width=200)
+ Group By Operator [GBY_1201] (rows=7 width=220)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L
Top N Key Operator [TNK_1200] (rows=3 width=221)
keys:_col0, _col1, _col2, _col3, 0L,sort order:+++++,top n:100
@@ -866,7 +866,7 @@ Stage-0
<-Reducer 6 [CONTAINS]
Reduce Output Operator [RS_1188]
PartitionCols:_col0, _col1, _col2, _col3, _col4
- Group By Operator [GBY_1187] (rows=7 width=200)
+ Group By Operator [GBY_1187] (rows=7 width=220)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L
Top N Key Operator [TNK_1186] (rows=3 width=221)
keys:_col0, _col1, _col2, _col3, 0L,sort order:+++++,top n:100
diff --git a/ql/src/test/results/clientpositive/perf/tez/query14.q.out b/ql/src/test/results/clientpositive/perf/tez/query14.q.out
index fd8eb9b..66c4f39 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query14.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query14.q.out
@@ -297,21 +297,21 @@ Stage-0
Stage-1
Reducer 9 vectorized
File Output Operator [FS_1335]
- Limit [LIM_1334] (rows=7 width=192)
+ Limit [LIM_1334] (rows=7 width=212)
Number of rows:100
- Select Operator [SEL_1333] (rows=7 width=192)
+ Select Operator [SEL_1333] (rows=7 width=212)
Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
<-Reducer 8 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_1332]
- Select Operator [SEL_1331] (rows=7 width=192)
+ Select Operator [SEL_1331] (rows=7 width=212)
Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
- Group By Operator [GBY_1330] (rows=7 width=200)
+ Group By Operator [GBY_1330] (rows=7 width=220)
Output:["_col0","_col1","_col2","_col3","_col5","_col6"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4
<-Union 7 [SIMPLE_EDGE]
<-Reducer 16 [CONTAINS]
Reduce Output Operator [RS_1177]
PartitionCols:_col0, _col1, _col2, _col3, _col4
- Group By Operator [GBY_1176] (rows=7 width=200)
+ Group By Operator [GBY_1176] (rows=7 width=220)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L
Top N Key Operator [TNK_1175] (rows=3 width=221)
keys:_col0, _col1, _col2, _col3, 0L,sort order:+++++,top n:100
@@ -687,7 +687,7 @@ Stage-0
<-Reducer 22 [CONTAINS]
Reduce Output Operator [RS_1184]
PartitionCols:_col0, _col1, _col2, _col3, _col4
- Group By Operator [GBY_1183] (rows=7 width=200)
+ Group By Operator [GBY_1183] (rows=7 width=220)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L
Top N Key Operator [TNK_1182] (rows=3 width=221)
keys:_col0, _col1, _col2, _col3, 0L,sort order:+++++,top n:100
@@ -874,7 +874,7 @@ Stage-0
<-Reducer 6 [CONTAINS]
Reduce Output Operator [RS_1170]
PartitionCols:_col0, _col1, _col2, _col3, _col4
- Group By Operator [GBY_1169] (rows=7 width=200)
+ Group By Operator [GBY_1169] (rows=7 width=220)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L
Top N Key Operator [TNK_1168] (rows=3 width=221)
keys:_col0, _col1, _col2, _col3, 0L,sort order:+++++,top n:100