You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by do...@apache.org on 2021/08/27 03:51:58 UTC
[orc] branch branch-1.7 updated: ORC-848: Recycle Internal Buffer
in StringHashTableDictionary (#751)
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-1.7
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/branch-1.7 by this push:
new c4dcbdb ORC-848: Recycle Internal Buffer in StringHashTableDictionary (#751)
c4dcbdb is described below
commit c4dcbdb0bfec86d29287dbdc4834c0feacbd6a3f
Author: belugabehr <12...@users.noreply.github.com>
AuthorDate: Thu Aug 26 23:50:54 2021 -0400
ORC-848: Recycle Internal Buffer in StringHashTableDictionary (#751)
### What changes were proposed in this pull request?
When clearing a StringHashTableDictionary, re-use the existing internal buffer.
### Why are the changes needed?
Nit performance.
### How was this patch tested?
No change in functionality. Use existing unit tests.
(cherry picked from commit aa7eee36e40333b96d20aa8ebf5b72b38d757f92)
Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
.../apache/orc/impl/StringHashTableDictionary.java | 20 +++++++++++++-------
1 file changed, 13 insertions(+), 7 deletions(-)
diff --git a/java/core/src/java/org/apache/orc/impl/StringHashTableDictionary.java b/java/core/src/java/org/apache/orc/impl/StringHashTableDictionary.java
index 1a0564a..8faa5ae 100644
--- a/java/core/src/java/org/apache/orc/impl/StringHashTableDictionary.java
+++ b/java/core/src/java/org/apache/orc/impl/StringHashTableDictionary.java
@@ -74,18 +74,24 @@ public class StringHashTableDictionary implements Dictionary {
this.capacity = initialCapacity;
this.loadFactor = loadFactor;
this.keyOffsets = new DynamicIntArray(initialCapacity);
- initHashBuckets(initialCapacity);
+ initializeHashBuckets();
this.threshold = (int)Math.min(initialCapacity * loadFactor, MAX_ARRAY_SIZE + 1);
}
- private void initHashBuckets(int capacity) {
- DynamicIntArray[] buckets = new DynamicIntArray[capacity];
- for (int i = 0; i < capacity; i++) {
+ /**
+ * Initialize the hash buckets. This will create the hash buckets if they have
+ * not already been created; otherwise the existing buckets will be overwritten
+ * (cleared).
+ */
+ private void initializeHashBuckets() {
+ final DynamicIntArray[] newBuckets =
+ (this.hashBuckets == null) ? new DynamicIntArray[this.capacity] : this.hashBuckets;
+ for (int i = 0; i < this.capacity; i++) {
// We don't need large bucket: If we have more than a handful of collisions,
// then the table is too small or the function isn't good.
- buckets[i] = createBucket();
+ newBuckets[i] = createBucket();
}
- hashBuckets = buckets;
+ this.hashBuckets = newBuckets;
}
private DynamicIntArray createBucket() {
@@ -111,7 +117,7 @@ public class StringHashTableDictionary implements Dictionary {
public void clear() {
byteArray.clear();
keyOffsets.clear();
- initHashBuckets(this.capacity);
+ initializeHashBuckets();
}
@Override