You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by pg...@apache.org on 2021/07/29 10:01:56 UTC
[orc] branch main updated: ORC-842: Remove newKey from
StringHashTableDictionary (#748)
This is an automated email from the ASF dual-hosted git repository.
pgaref pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/main by this push:
new 8458715 ORC-842: Remove newKey from StringHashTableDictionary (#748)
8458715 is described below
commit 8458715c12bcddb35b0bc265c3b240081fbeb270
Author: David Mollitor <dm...@apache.org>
AuthorDate: Mon Jul 12 12:28:07 2021 -0400
ORC-842: Remove newKey from StringHashTableDictionary (#748)
When adding keys to `StringHashTableDictionary`, do not copy data into intermediate container.
Performance.
No changes to functionality. Use existing unit tests. Existing unit test was updated to use new internal mechanism for testing.
Change-Id: Ie04c940f13570d2d77df2bdc5621c944b7879823
Signed-off-by: Panagiotis Garefalakis <pg...@apache.org>
---
.../apache/orc/impl/StringHashTableDictionary.java | 38 +++++++++++++---------
.../orc/impl/TestStringHashTableDictionary.java | 6 ++--
2 files changed, 24 insertions(+), 20 deletions(-)
diff --git a/java/core/src/java/org/apache/orc/impl/StringHashTableDictionary.java b/java/core/src/java/org/apache/orc/impl/StringHashTableDictionary.java
index d940ef8..80ad379 100644
--- a/java/core/src/java/org/apache/orc/impl/StringHashTableDictionary.java
+++ b/java/core/src/java/org/apache/orc/impl/StringHashTableDictionary.java
@@ -37,8 +37,6 @@ public class StringHashTableDictionary implements Dictionary {
// containing starting offset of the key (in byte) in the byte array.
private final DynamicIntArray keyOffsets;
- private final Text newKey = new Text();
-
private DynamicIntArray[] hashBuckets;
private int capacity;
@@ -124,31 +122,29 @@ public class StringHashTableDictionary implements Dictionary {
return DictionaryUtils.writeToTextInternal(out, position, this.keyOffsets, this.byteArray);
}
- @Override
- public int add(byte[] bytes, int offset, int length) {
- newKey.set(bytes, offset, length);
- return add(newKey);
+ public int add(Text text) {
+ return add(text.getBytes(), 0, text.getLength());
}
- public int add(Text text) {
+ @Override
+ public int add(final byte[] bytes, final int offset, final int length) {
resizeIfNeeded();
- newKey.set(text);
- int index = getIndex(text);
+ int index = getIndex(bytes, offset, length);
DynamicIntArray candidateArray = hashBuckets[index];
Text tmpText = new Text();
for (int i = 0; i < candidateArray.size(); i++) {
- getText(tmpText, candidateArray.get(i));
- if (tmpText.equals(newKey)) {
- return candidateArray.get(i);
+ final int candidateIndex = candidateArray.get(i);
+ getText(tmpText, candidateIndex);
+ if (tmpText.compareTo(bytes, offset, length) == 0) {
+ return candidateIndex;
}
}
// if making it here, it means no match.
- int len = newKey.getLength();
int currIdx = keyOffsets.size();
- keyOffsets.add(byteArray.add(newKey.getBytes(), 0, len));
+ keyOffsets.add(byteArray.add(bytes, offset, length));
candidateArray.add(currIdx);
return currIdx;
}
@@ -172,10 +168,20 @@ public class StringHashTableDictionary implements Dictionary {
/**
* Compute the hash value and find the corresponding index.
- *
*/
int getIndex(Text text) {
- return Math.floorMod(text.hashCode(), capacity);
+ return getIndex(text.getBytes(), 0, text.getLength());
+ }
+
+ /**
+ * Compute the hash value and find the corresponding index.
+ */
+ int getIndex(final byte[] bytes, final int offset, final int length) {
+ int hash = 1;
+ for (int i = offset; i < offset + length; i++) {
+ hash = (31 * hash) + bytes[i];
+ }
+ return Math.floorMod(hash, capacity);
}
// Resize the hash table, re-hash all the existing keys.
diff --git a/java/core/src/test/org/apache/orc/impl/TestStringHashTableDictionary.java b/java/core/src/test/org/apache/orc/impl/TestStringHashTableDictionary.java
index b3a5470..388a065 100644
--- a/java/core/src/test/org/apache/orc/impl/TestStringHashTableDictionary.java
+++ b/java/core/src/test/org/apache/orc/impl/TestStringHashTableDictionary.java
@@ -99,10 +99,8 @@ public class TestStringHashTableDictionary {
* this way we know the order of the traverse() method.
*/
@Override
- int getIndex(Text text) {
- String s = text.toString();
- int underscore = s.indexOf("_");
- return Integer.parseInt(text.toString().substring(0, underscore));
+ int getIndex(byte[] bytes, int offset, int length) {
+ return (char) bytes[0] - '0';
}
}