You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by do...@apache.org on 2021/08/27 03:51:58 UTC

[orc] branch branch-1.7 updated: ORC-848: Recycle Internal Buffer in StringHashTableDictionary (#751)

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-1.7
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/branch-1.7 by this push:
     new c4dcbdb  ORC-848: Recycle Internal Buffer in StringHashTableDictionary (#751)
c4dcbdb is described below

commit c4dcbdb0bfec86d29287dbdc4834c0feacbd6a3f
Author: belugabehr <12...@users.noreply.github.com>
AuthorDate: Thu Aug 26 23:50:54 2021 -0400

    ORC-848: Recycle Internal Buffer in StringHashTableDictionary (#751)
    
    ### What changes were proposed in this pull request?
    When clearing a StringHashTableDictionary, re-use the existing internal buffer.
    
    ### Why are the changes needed?
    Nit performance.
    
    ### How was this patch tested?
    No change in functionality. Use existing unit tests.
    
    (cherry picked from commit aa7eee36e40333b96d20aa8ebf5b72b38d757f92)
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
 .../apache/orc/impl/StringHashTableDictionary.java   | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/java/core/src/java/org/apache/orc/impl/StringHashTableDictionary.java b/java/core/src/java/org/apache/orc/impl/StringHashTableDictionary.java
index 1a0564a..8faa5ae 100644
--- a/java/core/src/java/org/apache/orc/impl/StringHashTableDictionary.java
+++ b/java/core/src/java/org/apache/orc/impl/StringHashTableDictionary.java
@@ -74,18 +74,24 @@ public class StringHashTableDictionary implements Dictionary {
     this.capacity = initialCapacity;
     this.loadFactor = loadFactor;
     this.keyOffsets = new DynamicIntArray(initialCapacity);
-    initHashBuckets(initialCapacity);
+    initializeHashBuckets();
     this.threshold = (int)Math.min(initialCapacity * loadFactor, MAX_ARRAY_SIZE + 1);
   }
 
-  private void initHashBuckets(int capacity) {
-    DynamicIntArray[] buckets = new DynamicIntArray[capacity];
-    for (int i = 0; i < capacity; i++) {
+  /**
+   * Initialize the hash buckets. This will create the hash buckets if they have
+   * not already been created; otherwise the existing buckets will be overwritten
+   * (cleared).
+   */
+  private void initializeHashBuckets() {
+    final DynamicIntArray[] newBuckets =
+        (this.hashBuckets == null) ? new DynamicIntArray[this.capacity] : this.hashBuckets;
+    for (int i = 0; i < this.capacity; i++) {
       // We don't need large bucket: If we have more than a handful of collisions,
       // then the table is too small or the function isn't good.
-      buckets[i] = createBucket();
+      newBuckets[i] = createBucket();
     }
-    hashBuckets = buckets;
+    this.hashBuckets = newBuckets;
   }
 
   private DynamicIntArray createBucket() {
@@ -111,7 +117,7 @@ public class StringHashTableDictionary implements Dictionary {
   public void clear() {
     byteArray.clear();
     keyOffsets.clear();
-    initHashBuckets(this.capacity);
+    initializeHashBuckets();
   }
 
   @Override