You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kafka.apache.org by cm...@apache.org on 2021/03/18 17:02:31 UTC
[kafka] branch 2.8 updated: MINOR: Fix BaseHashTable sizing (#10334)
This is an automated email from the ASF dual-hosted git repository.
cmccabe pushed a commit to branch 2.8
in repository https://gitbox.apache.org/repos/asf/kafka.git
The following commit(s) were added to refs/heads/2.8 by this push:
new 37a54a0 MINOR: Fix BaseHashTable sizing (#10334)
37a54a0 is described below
commit 37a54a07d0baaa3ff29b5f32b36b0951b2efab4e
Author: Colin Patrick McCabe <cm...@confluent.io>
AuthorDate: Thu Mar 18 09:58:49 2021 -0700
MINOR: Fix BaseHashTable sizing (#10334)
The array backing BaseHashTable is intended to be sized as a power of
two. Due to a bug, the initial array size was calculated incorrectly
in some cases.
Also make the maximum array size the largest possible 31-bit power of
two. Previously it was a smaller size but this was due to a typo.
Reviewers: Ismael Juma <is...@juma.me.uk>, Jose Sancio <js...@gmail.com>, Chia-Ping Tsai <ch...@gmail.com>
---
.../org/apache/kafka/timeline/BaseHashTable.java | 30 +++++++++++++++++-----
.../apache/kafka/timeline/BaseHashTableTest.java | 20 +++++++++++++++
2 files changed, 43 insertions(+), 7 deletions(-)
diff --git a/metadata/src/main/java/org/apache/kafka/timeline/BaseHashTable.java b/metadata/src/main/java/org/apache/kafka/timeline/BaseHashTable.java
index 7183528..0531546 100644
--- a/metadata/src/main/java/org/apache/kafka/timeline/BaseHashTable.java
+++ b/metadata/src/main/java/org/apache/kafka/timeline/BaseHashTable.java
@@ -40,14 +40,14 @@ class BaseHashTable<T> {
private final static double MAX_LOAD_FACTOR = 0.75f;
/**
- * The natural log of 2
+ * The minimum number of slots we can have in the hash table.
*/
- private final static double LN_2 = Math.log(2);
+ final static int MIN_CAPACITY = 2;
/**
* The maximum number of slots we can have in the hash table.
*/
- private final static int MAX_CAPACITY = 0x4000000;
+ final static int MAX_CAPACITY = 1 << 30;
private Object[] elements;
private int size = 0;
@@ -56,12 +56,28 @@ class BaseHashTable<T> {
this.elements = new Object[expectedSizeToCapacity(expectedSize)];
}
+ /**
+ * Calculate the capacity we should provision, given the expected size.
+ *
+ * Our capacity must always be a power of 2, and never less than 2 or more
+ * than MAX_CAPACITY. We use 64-bit numbers here to avoid overflow
+ * concerns.
+ */
static int expectedSizeToCapacity(int expectedSize) {
- if (expectedSize <= 1) {
- return 2;
+ long minCapacity = (long) Math.ceil((float) expectedSize / MAX_LOAD_FACTOR);
+ return Math.max(MIN_CAPACITY,
+ (int) Math.min(MAX_CAPACITY, roundUpToPowerOfTwo(minCapacity)));
+ }
+
+ private static long roundUpToPowerOfTwo(long i) {
+ if (i <= 0) {
+ return 0;
+ } else if (i > (1L << 62)) {
+ throw new ArithmeticException("There are no 63-bit powers of 2 higher than " +
+ "or equal to " + i);
+ } else {
+ return 1L << -Long.numberOfLeadingZeros(i - 1);
}
- double sizeToFit = expectedSize / MAX_LOAD_FACTOR;
- return (int) Math.min(MAX_CAPACITY, Math.ceil(Math.log(sizeToFit) / LN_2));
}
final int baseSize() {
diff --git a/metadata/src/test/java/org/apache/kafka/timeline/BaseHashTableTest.java b/metadata/src/test/java/org/apache/kafka/timeline/BaseHashTableTest.java
index 11f774c..a73357c 100644
--- a/metadata/src/test/java/org/apache/kafka/timeline/BaseHashTableTest.java
+++ b/metadata/src/test/java/org/apache/kafka/timeline/BaseHashTableTest.java
@@ -121,4 +121,24 @@ public class BaseHashTableTest {
assertEquals(Integer.valueOf(i), table.baseRemove(Integer.valueOf(i)));
}
}
+
+ @Test
+ public void testExpectedSizeToCapacity() {
+ assertEquals(2, BaseHashTable.expectedSizeToCapacity(Integer.MIN_VALUE));
+ assertEquals(2, BaseHashTable.expectedSizeToCapacity(-123));
+ assertEquals(2, BaseHashTable.expectedSizeToCapacity(0));
+ assertEquals(2, BaseHashTable.expectedSizeToCapacity(1));
+ assertEquals(4, BaseHashTable.expectedSizeToCapacity(2));
+ assertEquals(4, BaseHashTable.expectedSizeToCapacity(3));
+ assertEquals(8, BaseHashTable.expectedSizeToCapacity(4));
+ assertEquals(16, BaseHashTable.expectedSizeToCapacity(12));
+ assertEquals(32, BaseHashTable.expectedSizeToCapacity(13));
+ assertEquals(0x2000000, BaseHashTable.expectedSizeToCapacity(0x1010400));
+ assertEquals(0x4000000, BaseHashTable.expectedSizeToCapacity(0x2000000));
+ assertEquals(0x4000000, BaseHashTable.expectedSizeToCapacity(0x2000001));
+ assertEquals(BaseHashTable.MAX_CAPACITY, BaseHashTable.expectedSizeToCapacity(BaseHashTable.MAX_CAPACITY));
+ assertEquals(BaseHashTable.MAX_CAPACITY, BaseHashTable.expectedSizeToCapacity(BaseHashTable.MAX_CAPACITY + 1));
+ assertEquals(BaseHashTable.MAX_CAPACITY, BaseHashTable.expectedSizeToCapacity(Integer.MAX_VALUE - 1));
+ assertEquals(BaseHashTable.MAX_CAPACITY, BaseHashTable.expectedSizeToCapacity(Integer.MAX_VALUE));
+ }
}