You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by rb...@apache.org on 2020/07/29 03:43:26 UTC
[hive] branch master updated: HIVE-23917: Reset key access count
during eviction in VectorGroupByOperator (#1306)
This is an automated email from the ASF dual-hosted git repository.
rbalamohan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new f7be5fe HIVE-23917: Reset key access count during eviction in VectorGroupByOperator (#1306)
f7be5fe is described below
commit f7be5fe240e5e057edff9c14f03f6cc17367bc92
Author: rbalamohan <rb...@apache.org>
AuthorDate: Wed Jul 29 09:13:11 2020 +0530
HIVE-23917: Reset key access count during eviction in VectorGroupByOperator (#1306)
---
.../hive/ql/exec/vector/VectorAggregationBufferRow.java | 4 ++++
.../hadoop/hive/ql/exec/vector/VectorGroupByOperator.java | 4 +++-
.../hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java | 11 +++++++++++
3 files changed, 18 insertions(+), 1 deletion(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAggregationBufferRow.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAggregationBufferRow.java
index a7ef154..a265e52 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAggregationBufferRow.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAggregationBufferRow.java
@@ -89,4 +89,8 @@ public class VectorAggregationBufferRow {
public void incrementAccessCount() {
accessed++;
}
+
+ public void resetAccessCount() {
+ accessed = 0;
+ }
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
index 85535f5..02864d9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
@@ -599,8 +599,10 @@ public class VectorGroupByOperator extends Operator<GroupByDesc>
while(iter.hasNext()) {
Map.Entry<KeyWrapper, VectorAggregationBufferRow> pair = iter.next();
if (!all && avgAccess >= 1) {
- // Retain entries when access pattern is > than average access
if (pair.getValue().getAccessCount() > avgAccess) {
+ // resetting to give chance for other entries
+ totalAccessCount -= pair.getValue().getAccessCount();
+ pair.getValue().resetAccessCount();
continue;
}
}
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java
index c22a833..d6a8548 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java
@@ -692,11 +692,18 @@ public class TestVectorGroupByOperator {
// This processing would trigger flush
for (VectorizedRowBatch unit: data) {
+ long zeroAccessBeforeFlush = getElementsWithZeroAccess(processingMode.mapKeysAggregationBuffers);
vgo.process(unit, 0);
long freqElementsAfterFlush = getElementsHigherThan(processingMode.mapKeysAggregationBuffers, avgAccess);
assertTrue("After flush: " + freqElementsAfterFlush + ", before flush: " + numElementsToBeRetained,
(freqElementsAfterFlush >= numElementsToBeRetained));
+
+ // ensure that freq elements are reset for providing chance for others
+ long zeroAccessAfterFlush = getElementsWithZeroAccess(processingMode.mapKeysAggregationBuffers);
+ assertTrue("After flush: " + zeroAccessAfterFlush + ", before flush: " + zeroAccessBeforeFlush,
+ (zeroAccessAfterFlush > zeroAccessBeforeFlush));
+
break;
}
vgo.close(false);
@@ -706,6 +713,10 @@ public class TestVectorGroupByOperator {
return aggMap.values().stream().filter(v -> (v.getAccessCount() > avgAccess)).count();
}
+ long getElementsWithZeroAccess(Map<KeyWrapper, VectorAggregationBufferRow> aggMap) {
+ return aggMap.values().stream().filter(v -> (v.getAccessCount() == 0)).count();
+ }
+
@Test
public void testMaxHTEntriesFlush() throws HiveException {