You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kafka.apache.org by cm...@apache.org on 2021/03/18 17:02:31 UTC

[kafka] branch 2.8 updated: MINOR: Fix BaseHashTable sizing (#10334)

This is an automated email from the ASF dual-hosted git repository.

cmccabe pushed a commit to branch 2.8
in repository https://gitbox.apache.org/repos/asf/kafka.git


The following commit(s) were added to refs/heads/2.8 by this push:
     new 37a54a0  MINOR: Fix BaseHashTable sizing (#10334)
37a54a0 is described below

commit 37a54a07d0baaa3ff29b5f32b36b0951b2efab4e
Author: Colin Patrick McCabe <cm...@confluent.io>
AuthorDate: Thu Mar 18 09:58:49 2021 -0700

    MINOR: Fix BaseHashTable sizing (#10334)
    
    The array backing BaseHashTable is intended to be sized as a power of
    two.  Due to a bug, the initial array size was calculated incorrectly
    in some cases.
    
    Also make the maximum array size the largest possible 31-bit power of
    two.  Previously it was a smaller size but this was due to a typo.
    
    Reviewers: Ismael Juma <is...@juma.me.uk>, Jose Sancio <js...@gmail.com>, Chia-Ping Tsai <ch...@gmail.com>
---
 .../org/apache/kafka/timeline/BaseHashTable.java   | 30 +++++++++++++++++-----
 .../apache/kafka/timeline/BaseHashTableTest.java   | 20 +++++++++++++++
 2 files changed, 43 insertions(+), 7 deletions(-)

diff --git a/metadata/src/main/java/org/apache/kafka/timeline/BaseHashTable.java b/metadata/src/main/java/org/apache/kafka/timeline/BaseHashTable.java
index 7183528..0531546 100644
--- a/metadata/src/main/java/org/apache/kafka/timeline/BaseHashTable.java
+++ b/metadata/src/main/java/org/apache/kafka/timeline/BaseHashTable.java
@@ -40,14 +40,14 @@ class BaseHashTable<T> {
     private final static double MAX_LOAD_FACTOR = 0.75f;
 
     /**
-     * The natural log of 2
+     * The minimum number of slots we can have in the hash table.
      */
-    private final static double LN_2 = Math.log(2);
+    final static int MIN_CAPACITY = 2;
 
     /**
      * The maximum number of slots we can have in the hash table.
      */
-    private final static int MAX_CAPACITY = 0x4000000;
+    final static int MAX_CAPACITY = 1 << 30;
 
     private Object[] elements;
     private int size = 0;
@@ -56,12 +56,28 @@ class BaseHashTable<T> {
         this.elements = new Object[expectedSizeToCapacity(expectedSize)];
     }
 
+    /**
+     * Calculate the capacity we should provision, given the expected size.
+     *
+     * Our capacity must always be a power of 2, and never less than 2 or more
+     * than MAX_CAPACITY.  We use 64-bit numbers here to avoid overflow
+     * concerns.
+     */
     static int expectedSizeToCapacity(int expectedSize) {
-        if (expectedSize <= 1) {
-            return 2;
+        long minCapacity = (long) Math.ceil((float) expectedSize / MAX_LOAD_FACTOR);
+        return Math.max(MIN_CAPACITY,
+                (int) Math.min(MAX_CAPACITY, roundUpToPowerOfTwo(minCapacity)));
+    }
+
+    private static long roundUpToPowerOfTwo(long i) {
+        if (i <= 0) {
+            return 0;
+        } else if (i > (1L << 62)) {
+            throw new ArithmeticException("There are no 63-bit powers of 2 higher than " +
+                    "or equal to " + i);
+        } else {
+            return 1L << -Long.numberOfLeadingZeros(i - 1);
         }
-        double sizeToFit = expectedSize / MAX_LOAD_FACTOR;
-        return (int) Math.min(MAX_CAPACITY, Math.ceil(Math.log(sizeToFit) / LN_2));
     }
 
     final int baseSize() {
diff --git a/metadata/src/test/java/org/apache/kafka/timeline/BaseHashTableTest.java b/metadata/src/test/java/org/apache/kafka/timeline/BaseHashTableTest.java
index 11f774c..a73357c 100644
--- a/metadata/src/test/java/org/apache/kafka/timeline/BaseHashTableTest.java
+++ b/metadata/src/test/java/org/apache/kafka/timeline/BaseHashTableTest.java
@@ -121,4 +121,24 @@ public class BaseHashTableTest {
             assertEquals(Integer.valueOf(i), table.baseRemove(Integer.valueOf(i)));
         }
     }
+
+    @Test
+    public void testExpectedSizeToCapacity() {
+        assertEquals(2, BaseHashTable.expectedSizeToCapacity(Integer.MIN_VALUE));
+        assertEquals(2, BaseHashTable.expectedSizeToCapacity(-123));
+        assertEquals(2, BaseHashTable.expectedSizeToCapacity(0));
+        assertEquals(2, BaseHashTable.expectedSizeToCapacity(1));
+        assertEquals(4, BaseHashTable.expectedSizeToCapacity(2));
+        assertEquals(4, BaseHashTable.expectedSizeToCapacity(3));
+        assertEquals(8, BaseHashTable.expectedSizeToCapacity(4));
+        assertEquals(16, BaseHashTable.expectedSizeToCapacity(12));
+        assertEquals(32, BaseHashTable.expectedSizeToCapacity(13));
+        assertEquals(0x2000000, BaseHashTable.expectedSizeToCapacity(0x1010400));
+        assertEquals(0x4000000, BaseHashTable.expectedSizeToCapacity(0x2000000));
+        assertEquals(0x4000000, BaseHashTable.expectedSizeToCapacity(0x2000001));
+        assertEquals(BaseHashTable.MAX_CAPACITY, BaseHashTable.expectedSizeToCapacity(BaseHashTable.MAX_CAPACITY));
+        assertEquals(BaseHashTable.MAX_CAPACITY, BaseHashTable.expectedSizeToCapacity(BaseHashTable.MAX_CAPACITY + 1));
+        assertEquals(BaseHashTable.MAX_CAPACITY, BaseHashTable.expectedSizeToCapacity(Integer.MAX_VALUE - 1));
+        assertEquals(BaseHashTable.MAX_CAPACITY, BaseHashTable.expectedSizeToCapacity(Integer.MAX_VALUE));
+    }
 }