You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ah...@apache.org on 2020/02/27 20:48:21 UTC

[commons-collections] 03/03: Compute the bit index into a Bloom filter using bit shifts.

This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-collections.git

commit 8b4ecbc084eaf8e61f002612f7ca156fcc1e107e
Author: Alex Herbert <ah...@apache.org>
AuthorDate: Tue Feb 25 00:23:27 2020 +0000

    Compute the bit index into a Bloom filter using bit shifts.
    
    Removes the use of Math.floorMod and integer division.
    
    Operations have been put into a class for reuse among filters.
---
 .../bloomfilter/AbstractBloomFilter.java           |  6 +-
 .../bloomfilter/BloomFilterIndexer.java            | 81 ++++++++++++++++++++++
 .../bloomfilter/HasherBloomFilter.java             |  9 ++-
 .../bloomfilter/BloomFilterIndexerTest.java        | 54 +++++++++++++++
 4 files changed, 142 insertions(+), 8 deletions(-)

diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilter.java
index 9bbeb43..8a6b948 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilter.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilter.java
@@ -117,9 +117,9 @@ public abstract class AbstractBloomFilter implements BloomFilter {
         final OfInt iter = hasher.getBits(shape);
         while (iter.hasNext()) {
             final int idx = iter.nextInt();
-            final int buffIdx = idx / Long.SIZE;
-            final int pwr = Math.floorMod(idx, Long.SIZE);
-            final long buffOffset = 1L << pwr;
+            BloomFilterIndexer.checkPositive(idx);
+            final int buffIdx = BloomFilterIndexer.getLongIndex(idx);
+            final long buffOffset = BloomFilterIndexer.getLongBit(idx);
             if ((buff[buffIdx] & buffOffset) == 0) {
                 return false;
             }
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilterIndexer.java b/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilterIndexer.java
new file mode 100644
index 0000000..9bed48b
--- /dev/null
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilterIndexer.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+/**
+ * Contains functions to convert {@code int} indices into Bloom filter bit positions.
+ */
+final class BloomFilterIndexer {
+    /** A bit shift to apply to an integer to divided by 64 (2^6). */
+    private static final int DIVIDE_BY_64 = 6;
+
+    /** Do not instantiate. */
+    private BloomFilterIndexer() {}
+
+    /**
+     * Check the index is positive.
+     *
+     * @param bitIndex the bit index
+     * @throws IndexOutOfBoundsException if the index is not positive
+     */
+    static void checkPositive(int bitIndex) {
+        if (bitIndex < 0) {
+            throw new IndexOutOfBoundsException("Negative bitIndex: " + bitIndex);
+        }
+    }
+
+    /**
+     * Gets the filter index for the specified bit index assuming the filter is using 64-bit longs
+     * to store bits starting at index 0.
+     *
+     * <p>The index is assumed to be positive. For a positive index the result will match
+     * {@code bitIndex / 64}.
+     *
+     * <p>The divide is performed using bit shifts. If the input is negative the behaviour
+     * is not defined.
+     *
+     * @param bitIndex the bit index (assumed to be positive)
+     * @return the filter index
+     * @see #checkPositive(int)
+     */
+    static int getLongIndex(int bitIndex) {
+        // Use a signed shift. Any negative index will produce a negative value
+        // by sign-extension and if used as an index into an array it will throw an exception.
+        return bitIndex >> DIVIDE_BY_64;
+    }
+
+    /**
+     * Gets the filter bit mask for the specified bit index assuming the filter is using 64-bit
+     * longs to store bits starting at index 0. The returned value is a {@code long} with only
+     * 1 bit set.
+     *
+     * <p>The index is assumed to be positive. For a positive index the result will match
+     * {@code 1L << (bitIndex % 64)}.
+     *
+     * <p>If the input is negative the behaviour is not defined.
+     *
+     * @param bitIndex the bit index (assumed to be positive)
+     * @return the filter bit
+     * @see #checkPositive(int)
+     */
+    static long getLongBit(int bitIndex) {
+        // Bit shifts only use the first 6 bits. Thus it is not necessary to mask this
+        // using 0x3f (63) or compute bitIndex % 64.
+        // If the index is negative the shift will be in the opposite direction.
+        return 1L << bitIndex;
+    }
+}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/HasherBloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/HasherBloomFilter.java
index 8390144..e6436a8 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/HasherBloomFilter.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/HasherBloomFilter.java
@@ -101,11 +101,10 @@ public class HasherBloomFilter extends AbstractBloomFilter {
         final long[] result = new long[n];
         final OfInt iter = hasher.getBits(hasher.getShape());
         iter.forEachRemaining((IntConsumer) idx -> {
-            long buff = result[idx / Long.SIZE];
-            final long pwr = Math.floorMod(idx, Long.SIZE);
-            final long buffOffset = 1L << pwr;
-            buff |= buffOffset;
-            result[idx / Long.SIZE] = buff;
+            BloomFilterIndexer.checkPositive(idx);
+            final int buffIdx = BloomFilterIndexer.getLongIndex(idx);
+            final long buffOffset = BloomFilterIndexer.getLongBit(idx);
+            result[buffIdx] |= buffOffset;
         });
 
         int limit = result.length;
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BloomFilterIndexerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BloomFilterIndexerTest.java
new file mode 100644
index 0000000..a552706
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/BloomFilterIndexerTest.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.Random;
+import java.util.concurrent.ThreadLocalRandom;
+
+/**
+ * Tests for the {@link BloomFilterIndexer}.
+ */
+public class BloomFilterIndexerTest {
+
+    @Test(expected = IndexOutOfBoundsException.class)
+    public void testCheckPositiveThrows() {
+        BloomFilterIndexer.checkPositive(-1);
+    }
+
+    @Test
+    public void testGetLongIndex() {
+        final Random rng = ThreadLocalRandom.current();
+        for (int i = 0; i < 10; i++) {
+            // Random positive number
+            final int index = rng.nextInt() >>> 1;
+            Assert.assertEquals(index / Long.SIZE, BloomFilterIndexer.getLongIndex(index));
+        }
+    }
+
+    @Test
+    public void testGetLongBit() {
+        final Random rng = ThreadLocalRandom.current();
+        for (int i = 0; i < 10; i++) {
+            // Random positive number
+            final int index = rng.nextInt() >>> 1;
+            Assert.assertEquals(1L << (index % Long.SIZE), BloomFilterIndexer.getLongBit(index));
+        }
+    }
+}