You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by rb...@apache.org on 2020/07/29 03:43:26 UTC

[hive] branch master updated: HIVE-23917: Reset key access count during eviction in VectorGroupByOperator (#1306)

This is an automated email from the ASF dual-hosted git repository.

rbalamohan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new f7be5fe  HIVE-23917: Reset key access count during eviction in VectorGroupByOperator (#1306)
f7be5fe is described below

commit f7be5fe240e5e057edff9c14f03f6cc17367bc92
Author: rbalamohan <rb...@apache.org>
AuthorDate: Wed Jul 29 09:13:11 2020 +0530

    HIVE-23917: Reset key access count during eviction in VectorGroupByOperator (#1306)
---
 .../hive/ql/exec/vector/VectorAggregationBufferRow.java       |  4 ++++
 .../hadoop/hive/ql/exec/vector/VectorGroupByOperator.java     |  4 +++-
 .../hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java | 11 +++++++++++
 3 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAggregationBufferRow.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAggregationBufferRow.java
index a7ef154..a265e52 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAggregationBufferRow.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAggregationBufferRow.java
@@ -89,4 +89,8 @@ public class VectorAggregationBufferRow {
   public void incrementAccessCount() {
     accessed++;
   }
+
+  public void resetAccessCount() {
+    accessed = 0;
+  }
 }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
index 85535f5..02864d9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
@@ -599,8 +599,10 @@ public class VectorGroupByOperator extends Operator<GroupByDesc>
       while(iter.hasNext()) {
         Map.Entry<KeyWrapper, VectorAggregationBufferRow> pair = iter.next();
         if (!all && avgAccess >= 1) {
-          // Retain entries when access pattern is > than average access
           if (pair.getValue().getAccessCount() > avgAccess) {
+            // resetting to give chance for other entries
+            totalAccessCount -= pair.getValue().getAccessCount();
+            pair.getValue().resetAccessCount();
             continue;
           }
         }
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java
index c22a833..d6a8548 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java
@@ -692,11 +692,18 @@ public class TestVectorGroupByOperator {
 
     // This processing would trigger flush
     for (VectorizedRowBatch unit: data) {
+      long zeroAccessBeforeFlush = getElementsWithZeroAccess(processingMode.mapKeysAggregationBuffers);
       vgo.process(unit,  0);
       long freqElementsAfterFlush = getElementsHigherThan(processingMode.mapKeysAggregationBuffers, avgAccess);
 
       assertTrue("After flush: " + freqElementsAfterFlush + ", before flush: " + numElementsToBeRetained,
           (freqElementsAfterFlush >= numElementsToBeRetained));
+
+      // ensure that freq elements are reset for providing chance for others
+      long zeroAccessAfterFlush = getElementsWithZeroAccess(processingMode.mapKeysAggregationBuffers);
+      assertTrue("After flush: " + zeroAccessAfterFlush + ", before flush: " + zeroAccessBeforeFlush,
+          (zeroAccessAfterFlush > zeroAccessBeforeFlush));
+
       break;
     }
     vgo.close(false);
@@ -706,6 +713,10 @@ public class TestVectorGroupByOperator {
     return aggMap.values().stream().filter(v -> (v.getAccessCount() > avgAccess)).count();
   }
 
+  long getElementsWithZeroAccess(Map<KeyWrapper, VectorAggregationBufferRow> aggMap) {
+    return aggMap.values().stream().filter(v -> (v.getAccessCount() == 0)).count();
+  }
+
   @Test
   public void testMaxHTEntriesFlush() throws HiveException {