You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ah...@apache.org on 2022/09/10 09:03:01 UTC
[commons-collections] 01/04: Collections-763: Remove BloomFilter constructors that create initial entry
This is an automated email from the ASF dual-hosted git repository.
aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-collections.git
commit 9a58c1bbdf7c65e71b33ab1ba406f1de5a8191df
Author: Claude Warren, Jr <cl...@aiven.io>
AuthorDate: Fri Sep 9 22:18:32 2022 +0100
Collections-763: Remove BloomFilter constructors that create initial entry
---
.../bloomfilter/ArrayCountingBloomFilter.java | 33 -----
.../collections4/bloomfilter/BloomFilter.java | 41 +++++-
.../bloomfilter/CountingBloomFilter.java | 137 +++++++++++++++++++-
.../bloomfilter/SimpleBloomFilter.java | 79 ++----------
.../bloomfilter/SparseBloomFilter.java | 82 ++----------
.../collections4/bloomfilter/package-info.java | 90 +++++++-------
.../bloomfilter/AbstractBloomFilterTest.java | 129 ++++++++++++-------
.../AbstractCountingBloomFilterTest.java | 68 +++++++---
.../bloomfilter/AbstractHasherTest.java | 8 +-
.../bloomfilter/ArrayCountingBloomFilterTest.java | 24 ----
...ultBitMapProducerTest.java => ArrayHasher.java} | 42 ++++---
.../BitMapProducerFromSimpleBloomFilterTest.java | 4 +-
.../BitMapProducerFromSparseBloomFilterTest.java | 4 +-
.../bloomfilter/DefaultBitMapProducerTest.java | 42 ++++++-
.../bloomfilter/DefaultBloomFilterTest.java | 86 +++----------
.../bloomfilter/DefaultIndexProducerTest.java | 113 +++++++++++++++++
.../IndexProducerFromSimpleBloomFilterTest.java | 6 +-
.../IndexProducerFromSparseBloomFilterTest.java | 7 +-
.../bloomfilter/SetOperationsTest.java | 138 +++++++++++----------
.../bloomfilter/SimpleBloomFilterTest.java | 60 ---------
.../bloomfilter/SparseBloomFilterTest.java | 63 +---------
21 files changed, 662 insertions(+), 594 deletions(-)
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/ArrayCountingBloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/ArrayCountingBloomFilter.java
index ba13e6645..e76ddda44 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/ArrayCountingBloomFilter.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/ArrayCountingBloomFilter.java
@@ -125,39 +125,6 @@ public final class ArrayCountingBloomFilter implements CountingBloomFilter {
return (int) IntStream.range(0, counts.length).filter(i -> counts[i] > 0).count();
}
- @Override
- public boolean merge(final BloomFilter other) {
- Objects.requireNonNull(other, "other");
- try {
- return add(BitCountProducer.from(other));
- } catch (IndexOutOfBoundsException e) {
- throw new IllegalArgumentException( e );
- }
- }
-
- @Override
- public boolean merge(final Hasher hasher) {
- Objects.requireNonNull(hasher, "hasher");
- try {
- return add(BitCountProducer.from(hasher.uniqueIndices(shape)));
- } catch (IndexOutOfBoundsException e) {
- throw new IllegalArgumentException(
- String.format("Filter only accepts values in the [0,%d) range", shape.getNumberOfBits()));
- }
- }
-
- @Override
- public boolean remove(final BloomFilter other) {
- Objects.requireNonNull(other, "other");
- return subtract(BitCountProducer.from(other));
- }
-
- @Override
- public boolean remove(final Hasher hasher) {
- Objects.requireNonNull(hasher, "hasher");
- return subtract(BitCountProducer.from(hasher.uniqueIndices(shape)));
- }
-
@Override
public boolean add(final BitCountProducer other) {
Objects.requireNonNull(other, "other");
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilter.java
index 9471e5dcd..f939538b0 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilter.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilter.java
@@ -135,7 +135,9 @@ public interface BloomFilter extends IndexProducer, BitMapProducer {
* @param other The bloom filter to merge into this one.
* @return true if the merge was successful
*/
- boolean merge(BloomFilter other);
+ default boolean merge(BloomFilter other) {
+ return (characteristics() & SPARSE) != 0 ? merge((IndexProducer) other ) : merge((BitMapProducer) other);
+ }
/**
* Merges the specified hasher into this Bloom filter. Specifically all
@@ -143,7 +145,7 @@ public interface BloomFilter extends IndexProducer, BitMapProducer {
*
* <p><em>Note: This method should return {@code true} even if no additional bit indexes were
* enabled. A {@code false} result indicates that this filter may or may not contain
- * the {@code other} Bloom filter.</em> This state may occur in complex Bloom filter implementations like
+ * the {@code hasher} values.</em> This state may occur in complex Bloom filter implementations like
* counting Bloom filters.</p>
*
* @param hasher The hasher to merge.
@@ -151,12 +153,39 @@ public interface BloomFilter extends IndexProducer, BitMapProducer {
*/
default boolean merge(Hasher hasher) {
Objects.requireNonNull(hasher, "hasher");
- Shape shape = getShape();
- // create the Bloom filter that is most likely to merge quickly with this one
- BloomFilter result = (characteristics() & SPARSE) != 0 ? new SparseBloomFilter(shape, hasher) : new SimpleBloomFilter(shape, hasher);
- return merge(result);
+ return merge(hasher.indices(getShape()));
}
+ /**
+ * Merges the specified IndexProducer into this Bloom filter. Specifically all
+ * bit indexes that are identified by the {@code producer} will be enabled in this filter.
+ *
+ * <p><em>Note: This method should return {@code true} even if no additional bit indexes were
+ * enabled. A {@code false} result indicates that this filter may or may not contain all the indexes of
+ * the {@code producer}.</em> This state may occur in complex Bloom filter implementations like
+ * counting Bloom filters.</p>
+ *
+ * @param indexProducer The IndexProducer to merge.
+ * @return true if the merge was successful
+ * @throws IllegalArgumentException if producer sends illegal value.
+ */
+ boolean merge(IndexProducer indexProducer);
+
+ /**
+ * Merges the specified hasher into this Bloom filter. Specifically all
+ * bit indexes that are identified by the {@code producer} will be enabled in this filter.
+ *
+ * <p><em>Note: This method should return {@code true} even if no additional bit indexes were
+ * enabled. A {@code false} result indicates that this filter may or may not contain all the indexes
+ * enabled in the {@code producer}.</em> This state may occur in complex Bloom filter implementations like
+ * counting Bloom filters.</p>
+ *
+ * @param bitMapProducer The producer to merge.
+ * @return true if the merge was successful
+ * @throws IllegalArgumentException if producer sends illegal value.
+ */
+ boolean merge(BitMapProducer bitMapProducer);
+
// Counting Operations
/**
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilter.java
index 186c284e9..1d6f65cf5 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilter.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilter.java
@@ -16,6 +16,8 @@
*/
package org.apache.commons.collections4.bloomfilter;
+import java.util.Objects;
+
/**
* The interface that describes a Bloom filter that associates a count with each
* bit index to allow reversal of merge operations with remove operations.
@@ -77,6 +79,84 @@ public interface CountingBloomFilter extends BloomFilter, BitCountProducer {
// Modification Operations
+ /**
+ * Merges the specified Bloom filter into this Bloom filter.
+ *
+ * <p>Specifically: all counts for the indexes identified by the {@code other} filter will be incremented by 1,</p>
+ *
+ * <p>Note: If the other filter is a counting Bloom filter the index counts are ignored and it is treated as an
+ * IndexProducer.</p>
+ *
+ * <p>This method will return {@code true} if the filter is valid after the operation.</p>
+ *
+ * @param other the other Bloom filter
+ * @return {@code true} if the removal was successful and the state is valid
+ * @see #isValid()
+ * @see #add(BitCountProducer)
+ */
+ default boolean merge(final BloomFilter other) {
+ Objects.requireNonNull(other, "other");
+ return merge((IndexProducer) other);
+ }
+
+ /**
+ * Merges the specified Hasher into this Bloom filter.
+ *
+ * <p>Specifically: all counts for the unique indexes identified by the {@code hasher} will be incremented by 1,</p>
+ *
+ * <p>This method will return {@code true} if the filter is valid after the operation.</p>
+ *
+ * @param hasher the hasher
+ * @return {@code true} if the removal was successful and the state is valid
+ * @see #isValid()
+ * @see #add(BitCountProducer)
+ */
+ default boolean merge(final Hasher hasher) {
+ Objects.requireNonNull(hasher, "hasher");
+ return merge(hasher.uniqueIndices(getShape()));
+ }
+
+ /**
+ * Merges the specified index producer into this Bloom filter.
+ *
+ * <p>Specifically: all counts for the indexes identified by the {@code indexProducer} will be incremented by 1,</p>
+ *
+ * <p>This method will return {@code true} if the filter is valid after the operation.</p>
+ *
+ * <p>Note: Indices that are returned multiple times will be incremented multiple times.</p>
+ *
+ * @param indexProducer the IndexProducer
+ * @return {@code true} if the removal was successful and the state is valid
+ * @see #isValid()
+ * @see #add(BitCountProducer)
+ */
+ default boolean merge(final IndexProducer indexProducer) {
+ Objects.requireNonNull(indexProducer, "indexProducer");
+ try {
+ return add(BitCountProducer.from(indexProducer));
+ } catch (IndexOutOfBoundsException e) {
+ throw new IllegalArgumentException(
+ String.format("Filter only accepts values in the [0,%d) range", getShape().getNumberOfBits()), e);
+ }
+ }
+
+ /**
+ * Merges the specified BitMap producer into this Bloom filter.
+ *
+ * <p>Specifically: all counts for the indexes identified by the {@code bitMapProducer} will be incremented by 1,</p>
+ *
+ * <p>This method will return {@code true} if the filter is valid after the operation.</p>
+ *
+ * @param bitMapProducer the BitMapProducer
+ * @return {@code true} if the removal was successful and the state is valid
+ * @see #isValid()
+ * @see #add(BitCountProducer)
+ */
+ default boolean merge(final BitMapProducer bitMapProducer) {
+ Objects.requireNonNull(bitMapProducer, "bitMapProducer");
+ return merge(IndexProducer.fromBitMapProducer(bitMapProducer));
+ }
+
/**
* Removes the specified Bloom filter from this Bloom filter.
*
@@ -92,12 +172,15 @@ public interface CountingBloomFilter extends BloomFilter, BitCountProducer {
* @see #isValid()
* @see #subtract(BitCountProducer)
*/
- boolean remove(BloomFilter other);
+ default boolean remove(final BloomFilter other) {
+ Objects.requireNonNull(other, "other");
+ return remove((IndexProducer) other);
+ }
/**
- * Removes the specified hasher from the Bloom filter from this Bloom filter.
+ * Removes the unique values from the specified hasher from this Bloom filter.
*
- * <p>Specifically all counts for the indices produced by the {@code hasher} will be
+ * <p>Specifically all counts for the unique indices produced by the {@code hasher} will be
* decremented by 1.</p>
*
* <p>For HasherCollections each enclosed Hasher will be considered a single item and decremented
@@ -110,7 +193,53 @@ public interface CountingBloomFilter extends BloomFilter, BitCountProducer {
* @see #isValid()
* @see #subtract(BitCountProducer)
*/
- boolean remove(Hasher hasher);
+ default boolean remove(final Hasher hasher) {
+ Objects.requireNonNull(hasher, "hasher");
+ return remove(hasher.uniqueIndices(getShape()));
+ }
+
+ /**
+ * Removes the values from the specified IndexProducer from the Bloom filter from this Bloom filter.
+ *
+ * <p>Specifically all counts for the unique indices produced by the {@code hasher} will be
+ * decremented by 1.</p>
+ *
+ * <p>This method will return {@code true} if the filter is valid after the operation.</p>
+ *
+ * <p>Node: This method expects index producers that produce unique values.</p>
+ *
+ * @param indexProducer the IndexProducer to provide the indexes
+ * @return {@code true} if the removal was successful and the state is valid
+ * @see #isValid()
+ * @see #subtract(BitCountProducer)
+ */
+ default boolean remove(final IndexProducer indexProducer) {
+ Objects.requireNonNull(indexProducer, "indexProducer");
+ try {
+ return subtract(BitCountProducer.from(indexProducer));
+ } catch (IndexOutOfBoundsException e) {
+ throw new IllegalArgumentException(
+ String.format("Filter only accepts values in the [0,%d) range", getShape().getNumberOfBits()));
+ }
+ }
+
+ /**
+ * Removes the specified BitMapProducer from this Bloom filter.
+ *
+ * <p>Specifically all counts for the indices produced by the {@code bitMapProducer} will be
+ * decremented by 1.</p>
+ *
+ * <p>This method will return {@code true} if the filter is valid after the operation.</p>
+ *
+ * @param bitMapProducer the BitMapProducer to provide the indexes
+ * @return {@code true} if the removal was successful and the state is valid
+ * @see #isValid()
+ * @see #subtract(BitCountProducer)
+ */
+ default boolean remove(final BitMapProducer bitMapProducer) {
+ Objects.requireNonNull(bitMapProducer, "bitMapProducer");
+ return remove(IndexProducer.fromBitMapProducer(bitMapProducer));
+ }
/**
* Adds the specified BitCountProducer to this Bloom filter.
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/SimpleBloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/SimpleBloomFilter.java
index eb777f726..df5ee707b 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/SimpleBloomFilter.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/SimpleBloomFilter.java
@@ -55,58 +55,6 @@ public final class SimpleBloomFilter implements BloomFilter {
this.cardinality = 0;
}
- /**
- * Creates an instance that is equivalent to {@code other}.
- *
- * @param other The bloom filter to copy.
- */
- public SimpleBloomFilter(BloomFilter other) {
- Objects.requireNonNull(other, "other");
- this.shape = other.getShape();
- this.bitMap = new long[BitMap.numberOfBitMaps(shape.getNumberOfBits())];
- this.cardinality = 0;
- if ((other.characteristics() & SPARSE) != 0) {
- merge((IndexProducer) other);
- } else {
- merge((BitMapProducer) other);
- }
- }
-
- /**
- * Creates a populated instance.
- * @param shape The shape for the filter.
- * @param hasher the Hasher to initialize the filter with.
- */
- public SimpleBloomFilter(final Shape shape, Hasher hasher) {
- this(shape);
- Objects.requireNonNull(hasher, "hasher");
- merge(hasher);
- }
-
- /**
- * Creates a populated instance.
- * @param shape The shape for the filter.
- * @param indices the IndexProducer to initialize the filter with.
- * @throws IllegalArgumentException if producer sends illegal value.
- */
- public SimpleBloomFilter(final Shape shape, IndexProducer indices) {
- this(shape);
- Objects.requireNonNull(indices, "indices");
- merge(indices);
- }
-
- /**
- * Creates a populated instance.
- * @param shape The shape for the filter.
- * @param bitMaps the BitMapProducer to initialize the filter with.
- * @throws IllegalArgumentException if the producer returns too many or too few bit maps.
- */
- public SimpleBloomFilter(final Shape shape, BitMapProducer bitMaps) {
- this(shape);
- Objects.requireNonNull(bitMaps, "bitMaps");
- merge(bitMaps);
- }
-
/**
* Copy constructor for {@code copy()} use.
* @param source
@@ -139,29 +87,24 @@ public final class SimpleBloomFilter implements BloomFilter {
return new SimpleBloomFilter(this);
}
- /**
- * Performs a merge using an IndexProducer.
- * @param indexProducer the IndexProducer to merge from.
- * @throws IllegalArgumentException if producer sends illegal value.
- */
- private void merge(IndexProducer indexProducer) {
+ @Override
+ public boolean merge(IndexProducer indexProducer) {
+ Objects.requireNonNull(indexProducer, "indexProducer");
indexProducer.forEachIndex(idx -> {
if (idx < 0 || idx >= shape.getNumberOfBits()) {
throw new IllegalArgumentException(String.format(
- "IndexProducer should only send values in the range[0,%s]", shape.getNumberOfBits() - 1));
+ "IndexProducer should only send values in the range[0,%s)", shape.getNumberOfBits()));
}
BitMap.set(bitMap, idx);
return true;
});
cardinality = -1;
+ return true;
}
- /**
- * Performs a merge using an BitMapProducer.
- * @param bitMapProducer the BitMapProducer to merge from.
- * @throws IllegalArgumentException if producer sends illegal value.
- */
- private void merge(BitMapProducer bitMapProducer) {
+ @Override
+ public boolean merge(BitMapProducer bitMapProducer) {
+ Objects.requireNonNull(bitMapProducer, "bitMapProducer");
try {
int[] idx = new int[1];
bitMapProducer.forEachBitMap(value -> {
@@ -173,7 +116,7 @@ public final class SimpleBloomFilter implements BloomFilter {
int idxLimit = BitMap.getLongIndex(shape.getNumberOfBits());
if (idxLimit < idx[0]) {
throw new IllegalArgumentException(String.format(
- "BitMapProducer set a bit higher than the limit for the shape: %s", shape.getNumberOfBits()));
+ "BitMapProducer set a bit higher than the limit for the shape: %s", shape.getNumberOfBits() - 1));
}
if (idxLimit == idx[0]) {
long excess = (bitMap[idxLimit] >> shape.getNumberOfBits());
@@ -188,13 +131,13 @@ public final class SimpleBloomFilter implements BloomFilter {
throw new IllegalArgumentException(
String.format("BitMapProducer should send at most %s maps", bitMap.length), e);
}
+ return true;
}
@Override
public boolean merge(Hasher hasher) {
Objects.requireNonNull(hasher, "hasher");
- merge(hasher.indices(shape));
- return true;
+ return merge(hasher.indices(shape));
}
@Override
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/SparseBloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/SparseBloomFilter.java
index 9359e6a39..fdea9b469 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/SparseBloomFilter.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/SparseBloomFilter.java
@@ -49,68 +49,6 @@ public final class SparseBloomFilter implements BloomFilter {
this.indices = new TreeSet<>();
}
- /**
- * Creates an instance that is equivalent to {@code other}.
- *
- * @param other The bloom filter to copy.
- */
- public SparseBloomFilter(BloomFilter other) {
- Objects.requireNonNull(other, "other");
- this.shape = other.getShape();
- this.indices = new TreeSet<>();
- if ((other.characteristics() & SPARSE) != 0) {
- merge((IndexProducer) other);
- } else {
- merge(IndexProducer.fromBitMapProducer(other));
- }
- }
-
- private void checkIndices(Shape shape) {
- if (this.indices.floor(-1) != null || this.indices.ceiling(shape.getNumberOfBits()) != null) {
- throw new IllegalArgumentException(
- String.format("Filter only accepts values in the [0,%d) range", shape.getNumberOfBits()));
- }
- }
-
- /**
- * Constructs a populated Bloom filter.
- * @param shape the shape for the bloom filter.
- * @param hasher the hasher to provide the initial data.
- */
- public SparseBloomFilter(final Shape shape, Hasher hasher) {
- this(shape);
- Objects.requireNonNull(hasher, "hasher");
- hasher.indices(shape).forEachIndex(this::add);
- checkIndices(shape);
- }
-
- /**
- * Constructs a populated Bloom filter.
- * @param shape the shape of the filter.
- * @param indices an index producer for the indices to to enable.
- * @throws IllegalArgumentException if indices contains a value greater than the number
- * of bits in the shape.
- */
- public SparseBloomFilter(Shape shape, IndexProducer indices) {
- this(shape);
- Objects.requireNonNull(indices, "indices");
- indices.forEachIndex(this::add);
- checkIndices(shape);
- }
-
- /**
- * Constructs a populated Bloom filter.
- * @param shape the shape of the filter.
- * @param bitMaps a BitMapProducer for the bit maps to add.
- * @throws IllegalArgumentException if the bit maps contain a value greater than the number
- * of bits in the shape.
- */
- public SparseBloomFilter(Shape shape, BitMapProducer bitMaps) {
- this(shape);
- Objects.requireNonNull(bitMaps, "bitMaps");
- merge(IndexProducer.fromBitMapProducer(bitMaps));
- }
-
private SparseBloomFilter(SparseBloomFilter source) {
shape = source.shape;
indices = new TreeSet<>(source.indices);
@@ -140,23 +78,27 @@ public final class SparseBloomFilter implements BloomFilter {
return true;
}
- /**
- * Performs a merge using an IndexProducer.
- * @param indexProducer the IndexProducer to merge from.
- * @throws IllegalArgumentException if producer sends illegal value.
- */
- private void merge(IndexProducer indexProducer) {
+ @Override
+ public boolean merge(IndexProducer indexProducer) {
+ Objects.requireNonNull(indexProducer, "indexProducer");
indexProducer.forEachIndex(this::add);
if (!this.indices.isEmpty()) {
if (this.indices.last() >= shape.getNumberOfBits()) {
throw new IllegalArgumentException(String.format("Value in list %s is greater than maximum value (%s)",
- this.indices.last(), shape.getNumberOfBits()));
+ this.indices.last(), shape.getNumberOfBits() - 1));
}
if (this.indices.first() < 0) {
throw new IllegalArgumentException(
String.format("Value in list %s is less than 0", this.indices.first()));
}
}
+ return true;
+ }
+
+ @Override
+ public boolean merge(BitMapProducer bitMapProducer) {
+ Objects.requireNonNull(bitMapProducer, "bitMapProducer");
+ return this.merge(IndexProducer.fromBitMapProducer(bitMapProducer));
}
@Override
@@ -213,7 +155,7 @@ public final class SparseBloomFilter implements BloomFilter {
* because our indices are always in order we can shorten the time necessary to
* create the longs for the consumer
*/
- // the currenlty constructed bitMap
+ // the currently constructed bitMap
long bitMap = 0;
// the bitmap we are working on
int idx = 0;
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/package-info.java b/src/main/java/org/apache/commons/collections4/bloomfilter/package-info.java
index c20725456..a4f5ce4f2 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/package-info.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/package-info.java
@@ -20,58 +20,57 @@
*
* <h2>Background:</h2>
*
- * <p>The Bloom filter is a probabilistic data structure that indicates where things are not.
- * Conceptually it is a bit vector. You create a Bloom filter by creating hashes
- * and converting those to enabled bits in the vector. Multiple Bloom filters may be merged
- * together into one Bloom filter. It is possible to test if a filter {@code B} has merged into
- * another filter {@code A} by verifying that {@code (A & B) == B}.</p>
- *
- * <p>Bloom filters are generally used where hash
- * tables would be too large, or as a filter front end for longer processes. For example
- * most browsers have a Bloom filter that is built from all known bad URLs (ones that
- * serve up malware). When you enter a URL the browser builds a Bloom filter and checks to
- * see if it is "in" the bad URL filter. If not the URL is good, if it matches, then the
- * expensive lookup on a remote system is made to see if it actually is in the list. There
- * are lots of other uses, and in most cases the reason is to perform a fast check as a
- * gateway for a longer operation. </p>
+ * <p>The Bloom filter is a probabilistic data structure that indicates where things are not. Conceptually it is a bit
+ * vector. You create a Bloom filter by creating hashes and converting those to enabled bits in the vector. Multiple
+ * Bloom filters may be merged together into one Bloom filter. It is possible to test if a filter {@code B} has merged
+ * into another filter {@code A} by verifying that {@code (A & B) == B}.</p>
+ *
+ * <p>Bloom filters are generally used where hash tables would be too large, or as a filter front end for longer processes.
+ * For example most browsers have a Bloom filter that is built from all known bad URLs (ones that serve up malware).
+ * When you enter a URL the browser builds a Bloom filter and checks to see if it is "in" the bad URL filter. If not the
+ * URL is good, if it matches, then the expensive lookup on a remote system is made to see if it actually is in the
+ * list. There are lots of other uses, and in most cases the reason is to perform a fast check as a gateway for a longer
+ * operation.</p>
*
* <h3>BloomFilter</h3>
*
- * <p>The Bloom filter architecture here is designed so that the implementation of the storage of bits is abstracted.
+ * <p>The Bloom filter architecture here is designed for speed of execution, so some methods like {@code merge}, {@code remove},
+ * {@code add}, and {@code subtract} may throw exceptions. Once an exception is thrown the state of the Bloom filter is unknown.
+ * The choice to use not use atomic transactions was made to achieve maximum performance under correct usage.</p>
+ *
+ * <p>In addition the architecture is designed so that the implementation of the storage of bits is abstracted.
* Programs that utilize the Bloom filters may use the {@code BitMapProducer} or {@code IndexProducer} to retrieve a
- * representation of the internal structure. Additional methods are available in the {@code BitMap} to assist in
+ * representation of the internal structure. Additional methods are available in the {@code BitMap} to assist in
* manipulation of the representations.</p>
*
- * <p>The bloom filter code is an interface that requires implementation of 6 methods:</p>
+ * <p>The bloom filter code is an interface that requires implementation of 9 methods:</p>
* <ul>
- * <li>{@code cardinality()}
- * returns the number of bits enabled in the Bloom filter.</li>
+ * <li>{@link BloomFilter#cardinality()} returns the number of bits enabled in the Bloom filter.</li>
*
- * <li>{@code contains(BitMapProducer)} which
- * returns true if the bits specified by the bit maps generated by the BitMapProducer are enabled in the Bloom filter.</li>
+ * <li>{@link BloomFilter#characteristics()} which returns a integer of characteristics flags.</li>
*
- * <li>{@code contains(IndexProducer)} which
- * returns true if the bits specified by the indices generated by IndexProducer are enabled in the Bloom filter.</li>
+ * <li>{@link BloomFilter#clear()} which resets the Bloomfilter to its initial empty state.</li>
*
- * <li>{@code getShape()} which
- * returns the shape the Bloom filter was created with.</li>
-
- * <li>{@code isSparse()} which
- * returns true if an the implementation tracks indices natively, false if bit maps are used. In cases where
- * neither are used the {@code isSparse} return value should reflect which is faster to produce.</li>
+ * <li>{@link BloomFilter#contains(IndexProducer)} which returns true if the bits specified by the indices generated by
+ * IndexProducer are enabled in the Bloom filter.</li>
+ *
+ * <li>{@link BloomFilter#copy()} which returns a fresh copy of the bitmap.</li>
+ *
+ * <li>{@link BloomFilter#getShape()} which returns the shape the Bloom filter was created with.</li>
+ *
+ * <li>{@link BloomFilter#merge(BitMapProducer)} which merges the BitMaps from the BitMapProducer into the internal
+ * representation of the Bloom filter.</li>
*
- * <li>{@code mergeInPlace(BloomFilter)} which
- * utilizes either the {@code BitMapProducer} or {@code IndexProducer} from the argument to enable extra bits
- * in the internal representation of the Bloom filter.</li>
+ * <li>{@link BloomFilter#merge(IndexProducer)} which merges the indices from the IndexProducer into the internal
+ * representation of the Bloom filter.</li>
* </ul>
*
- * <p>Other methods should be implemented where they can be done so more efficiently than the default implementations.
- * </p>
+ * <p>Other methods should be implemented where they can be done so more efficiently than the default implementations.</p>
*
* <h3>CountingBloomFilter</h3>
*
* <p>The counting bloom filter extends the Bloom filter by counting the number of times a specific bit has been
- * enabled or disabled. This allows the removal (opposite of merge) of Bloom filters at the expense of additional
+ * enabled or disabled. This allows the removal (opposite of merge) of Bloom filters at the expense of additional
* overhead.</p>
*
* <h3>Shape</h3>
@@ -80,22 +79,23 @@
*
* <h3>Hasher</h3>
*
- * <p>A Hasher converts bytes into a series of integers based on a Shape. With the exception of the HasherCollecton,
- * each hasher represents one item being added to the Bloom filter. The HasherCollection represents the
- * number of items as the sum of the number of items represented by the Hashers in the collection.</p>
+ * <p>A Hasher converts bytes into a series of integers based on a Shape. With the exception of the HasherCollecton,
+ * each hasher represents one item being added to the Bloom filter. The HasherCollection represents the number of
+ * items as the sum of the number of items represented by the Hashers in the collection.</p>
*
- * <p>The SimpleHasher uses a combinatorial generation technique to create the integers. It is easily
- * initialized by using a standard {@code MessageDigest} or other Hash function to hash the item to insert and
- * then splitting the hash bytes in half and considering each as a long value.</p>
+ * <p>The EnhancedDoubleHasher uses a combinatorial generation technique to create the integers. It is easily
+ * initialized by using a byte array returned by the standard {@code MessageDigest} or other hash function to
+ * initialize the Hasher. Alternatively a pair of a long values may also be used.</p>
*
- * <p>Other implementations of the Hasher are easy to implement, and should make use of the {@code Hasher.Filter}
- * and/or {@code Hasher.FileredIntConsumer} classes to filter out duplicate indices.</p>
+ * <p>Other implementations of the Hasher are easy to implement, and should make use of the {@code Hasher.Filter}
+ * and/or {@code Hasher.FileredIntConsumer} classes to filter out duplicate indices when implementing
+ * {@code Hasher.uniqueIndices(Shape)}.</p>
*
* <h2>References</h2>
*
* <ol>
- * <li> https://www.eecs.harvard.edu/~michaelm/postscripts/tr-02-05.pdf</li>
- * <li> https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/utils/BloomFilter.java#L60</li>
+ * <li>https://www.eecs.harvard.edu/~michaelm/postscripts/tr-02-05.pdf</li>
+ * <li>https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/utils/BloomFilter.java#L60</li>
* </ol>
*
* @since 4.5
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterTest.java
index da2aa897c..91eb763af 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterTest.java
@@ -16,6 +16,7 @@
*/
package org.apache.commons.collections4.bloomfilter;
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
@@ -24,6 +25,8 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
import java.util.ArrayList;
import java.util.List;
+import java.util.SortedSet;
+
import org.junit.jupiter.api.Test;
/**
@@ -70,7 +73,11 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
* @param hasher the hasher to use to create the filter.
* @return a BloomFilter implementation.
*/
- protected abstract T createFilter(Shape shape, Hasher hasher);
+ protected final T createFilter(Shape shape, Hasher hasher) {
+ T bf = createEmptyFilter(shape);
+ bf.merge(hasher);
+ return bf;
+ }
/**
* Create the BloomFilter implementation we are testing.
@@ -79,7 +86,11 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
* @param producer A BitMap producer to build the filter with.
* @return a BloomFilter implementation.
*/
- protected abstract T createFilter(Shape shape, BitMapProducer producer);
+ protected final T createFilter(Shape shape, BitMapProducer producer) {
+ T bf = createEmptyFilter(shape);
+ bf.merge(producer);
+ return bf;
+ }
/**
* Create the BloomFilter implementation we are testing.
@@ -88,57 +99,85 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
* @param producer An Index producer to build the filter with.
* @return a BloomFilter implementation.
*/
- protected abstract T createFilter(Shape shape, IndexProducer producer);
+ protected final T createFilter(Shape shape, IndexProducer producer) {
+ T bf = createEmptyFilter(shape);
+ bf.merge(producer);
+ return bf;
+ }
/**
*
*/
@Test
- public void testConstructWithBadHasher() {
+ public void testMergeWithBadHasher() {
// value too large
+ final BloomFilter f = createEmptyFilter(getTestShape());
assertThrows(IllegalArgumentException.class,
- () -> createFilter(getTestShape(), new BadHasher(getTestShape().getNumberOfBits())));
+ () -> f.merge(new BadHasher(getTestShape().getNumberOfBits())));
// negative value
- assertThrows(IllegalArgumentException.class, () -> createFilter(getTestShape(), new BadHasher(-1)));
+ BloomFilter f2 = createEmptyFilter(getTestShape());
+ assertThrows(IllegalArgumentException.class, () -> f2.merge(new BadHasher(-1)));
}
@Test
- public void testConstructWitBitMapProducer() {
- long[] values = { from11Value, 0x9L };
- BloomFilter f = createFilter(getTestShape(), BitMapProducer.fromBitMapArray(values));
- List<Long> lst = new ArrayList<>();
- for (long l : values) {
- lst.add(l);
+ public void testMergeWithHasher() {
+ for (int i=0; i<5000; i++) {
+ final BloomFilter f = createEmptyFilter(getTestShape());
+ int[] expected = DefaultIndexProducerTest.generateIntArray(getTestShape().getNumberOfHashFunctions(), getTestShape().getNumberOfBits());
+ Hasher hasher = new ArrayHasher(expected);
+ f.merge(hasher);
+ // create sorted unique array of expected values
+ assertArrayEquals(DefaultIndexProducerTest.unique(expected), f.asIndexArray( ));
}
- assertTrue(f.forEachBitMap(l -> {
- return lst.remove(Long.valueOf(l));
- }));
- assertTrue(lst.isEmpty());
+ }
- BitMapProducer badProducer = BitMapProducer.fromBitMapArray(0L, Long.MAX_VALUE);
+ @Test
+ public void testMergeWithBitMapProducer() {
+ for (int i=0; i<5000; i++) {
+ long[] values = new long[2];
+ for (int idx : DefaultIndexProducerTest.generateIntArray(getTestShape().getNumberOfHashFunctions(), getTestShape().getNumberOfBits())) {
+ BitMap.set(values, idx);
+ }
+ BloomFilter f = createFilter(getTestShape(), BitMapProducer.fromBitMapArray(values));
+ List<Long> lst = new ArrayList<>();
+ for (long l : values) {
+ lst.add(l);
+ }
+ assertTrue(f.forEachBitMap(l -> {
+ return lst.remove(Long.valueOf(l));
+ }));
+ assertTrue(lst.isEmpty());
+ }
// values too large
- assertThrows(IllegalArgumentException.class, () -> createFilter(getTestShape(), badProducer));
+ final BitMapProducer badProducer = BitMapProducer.fromBitMapArray(0L, Long.MAX_VALUE);
+ final BloomFilter bf = createEmptyFilter(getTestShape());
+ assertThrows(IllegalArgumentException.class, () -> bf.merge(badProducer));
+
+ // test where merged bits exceed expected bits but both bitmaps are the same length.
+ final BitMapProducer badProducer2 = BitMapProducer.fromBitMapArray(0x80_00_00_00_00_00_00_00L);
+ final BloomFilter bf2 = createEmptyFilter(Shape.fromKM(3, 32));
+ assertThrows(IllegalArgumentException.class, () -> bf2.merge(badProducer2));
}
@Test
- public void testConstructWithIndexProducer() {
- int[] values = new int[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 };
- BloomFilter f = createFilter(getTestShape(), IndexProducer.fromIndexArray(values));
- List<Integer> lst = new ArrayList<>();
- for (int i : values) {
- lst.add(i);
+ public void testMergeWithIndexProducer() {
+ for (int i=0; i<5000; i++) {
+ int[] values = DefaultIndexProducerTest.generateIntArray(getTestShape().getNumberOfHashFunctions(), getTestShape().getNumberOfBits());
+ BloomFilter f = createFilter(getTestShape(), IndexProducer.fromIndexArray(values));
+ SortedSet<Integer> uniqueValues = DefaultIndexProducerTest.uniqueSet(values);
+ assertTrue(f.forEachIndex(idx -> {
+ return uniqueValues.remove(Integer.valueOf(idx));
+ }));
+ assertTrue(uniqueValues.isEmpty());
}
- assertTrue(f.forEachIndex(i -> {
- return lst.remove(Integer.valueOf(i));
- }));
- assertTrue(lst.isEmpty());
-
// value to large
- assertThrows(IllegalArgumentException.class, () -> createFilter(getTestShape(),
- IndexProducer.fromIndexArray(new int[] { getTestShape().getNumberOfBits() })));
+ final BloomFilter f1 = createEmptyFilter(getTestShape());
+ assertThrows(IllegalArgumentException.class,
+ () -> f1.merge(IndexProducer.fromIndexArray(new int[] { getTestShape().getNumberOfBits() })));
// negative value
+ final BloomFilter f2 = createEmptyFilter(getTestShape());
assertThrows(IllegalArgumentException.class,
- () -> createFilter(getTestShape(), IndexProducer.fromIndexArray(new int[] { -1 })));
+ () -> f2.merge(IndexProducer.fromIndexArray(new int[] { -1 })));
}
@Test
@@ -228,7 +267,7 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
@Test
public final void testEstimateN() {
// build a filter
- BloomFilter filter1 = new SimpleBloomFilter(getTestShape(), from1);
+ BloomFilter filter1 = createFilter(getTestShape(), from1);
assertEquals(1, filter1.estimateN());
// the data provided above do not generate an estimate that is equivalent to the
@@ -316,20 +355,20 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
assertThrows(IllegalArgumentException.class, () -> bf1.merge(new BadHasher(-1)));
// test error when bloom filter returns values out of range
- final BloomFilter bf5 = new SimpleBloomFilter(
- Shape.fromKM(getTestShape().getNumberOfHashFunctions(), 3 * Long.SIZE),
- new IncrementingHasher(Long.SIZE * 2, 1));
+ BloomFilter bf5 = new SimpleBloomFilter(
+ Shape.fromKM(getTestShape().getNumberOfHashFunctions(), 3 * Long.SIZE));
+ bf5.merge(new IncrementingHasher(Long.SIZE * 2, 1));
assertThrows(IllegalArgumentException.class, () -> bf1.merge(bf5));
- final BloomFilter bf6 = new SparseBloomFilter(
- Shape.fromKM(getTestShape().getNumberOfHashFunctions(), 3 * Long.SIZE),
- new IncrementingHasher(Long.SIZE * 2, 1));
+ BloomFilter bf6 = new SparseBloomFilter(
+ Shape.fromKM(getTestShape().getNumberOfHashFunctions(), 3 * Long.SIZE));
+ bf6.merge(new IncrementingHasher(Long.SIZE * 2, 1));
assertThrows(IllegalArgumentException.class, () -> bf1.merge(bf6));
}
- private static void assertIndexProducerConstructor(Shape shape, int[] values, int[] expected) {
+ private void assertIndexProducerMerge(Shape shape, int[] values, int[] expected) {
IndexProducer indices = IndexProducer.fromIndexArray(values);
- SparseBloomFilter filter = new SparseBloomFilter(shape, indices);
+ BloomFilter filter = createFilter(shape, indices);
List<Integer> lst = new ArrayList<>();
filter.forEachIndex(x -> {
lst.add(x);
@@ -347,18 +386,18 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
}
@Test
- public void testIndexProducerConstructor() {
+ public void testIndexProducerMerge() {
Shape shape = Shape.fromKM(5, 10);
- assertIndexProducerConstructor(shape, new int[] { 0, 2, 4, 6, 8 }, new int[] { 0, 2, 4, 6, 8 });
+ assertIndexProducerMerge(shape, new int[] { 0, 2, 4, 6, 8 }, new int[] { 0, 2, 4, 6, 8 });
// test duplicate values
- assertIndexProducerConstructor(shape, new int[] { 0, 2, 4, 2, 8 }, new int[] { 0, 2, 4, 8 });
+ assertIndexProducerMerge(shape, new int[] { 0, 2, 4, 2, 8 }, new int[] { 0, 2, 4, 8 });
// test negative values
assertFailedIndexProducerConstructor(shape, new int[] { 0, 2, 4, -2, 8 });
// test index too large
assertFailedIndexProducerConstructor(shape, new int[] { 0, 2, 4, 12, 8 });
// test no indices
- assertIndexProducerConstructor(shape, new int[0], new int[0]);
+ assertIndexProducerMerge(shape, new int[0], new int[0]);
}
@Test
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractCountingBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractCountingBloomFilterTest.java
index b7ca7dd37..48fbd92f4 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractCountingBloomFilterTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractCountingBloomFilterTest.java
@@ -17,6 +17,7 @@
package org.apache.commons.collections4.bloomfilter;
import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -94,7 +95,8 @@ public abstract class AbstractCountingBloomFilterTest<T extends CountingBloomFil
@Test
public final void testCountingBloomFilterSpecificContains() {
- final BloomFilter bf = new SimpleBloomFilter(getTestShape(), from1);
+ final BloomFilter bf = new SimpleBloomFilter(getTestShape());
+ bf.merge(from1);
final CountingBloomFilter bf2 = createFilter(getTestShape(), bigHasher);
assertTrue(bf.contains(bf), "BF Should contain itself");
@@ -112,7 +114,8 @@ public abstract class AbstractCountingBloomFilterTest<T extends CountingBloomFil
public final void testCountingSpecificMerge() {
final BloomFilter bf1 = createFilter(getTestShape(), from1);
- final BloomFilter bf2 = new SimpleBloomFilter(getTestShape(), from11);
+ final BloomFilter bf2 = new SimpleBloomFilter(getTestShape());
+ bf2.merge(from11);
final BloomFilter bf3 = bf1.copy();
bf3.merge(bf2);
@@ -133,7 +136,9 @@ public abstract class AbstractCountingBloomFilterTest<T extends CountingBloomFil
assertTrue(bf5.isValid(), "Should be valid");
CountingBloomFilter bf6 = bf5.copy();
- bf6.merge(new SimpleBloomFilter(getTestShape(), from1));
+ BloomFilter bf7 = new SimpleBloomFilter(getTestShape());
+ bf7.merge(from1);
+ bf6.merge(bf7);
assertFalse(bf6.isValid(), "Should not be valid");
}
@@ -195,10 +200,13 @@ public abstract class AbstractCountingBloomFilterTest<T extends CountingBloomFil
*/
@Test
public final void testRemove() {
+ BloomFilter simple = new SimpleBloomFilter(getTestShape());
+ simple.merge(from11);
+
final CountingBloomFilter bf1 = createFilter(getTestShape(), from1);
bf1.add(BitCountProducer.from(from11.indices(getTestShape())));
- assertTrue(bf1.remove(new SimpleBloomFilter(getTestShape(), from11)), "Remove should work");
+ assertTrue(bf1.remove(simple), "Remove should work");
assertFalse(bf1.contains(from11), "Should not contain");
assertTrue(bf1.contains(from1), "Should contain");
@@ -215,17 +223,45 @@ public abstract class AbstractCountingBloomFilterTest<T extends CountingBloomFil
assertCounts(bf2, from1Counts);
// test underflow
-
final CountingBloomFilter bf3 = createFilter(getTestShape(), from1);
-
- final BloomFilter bf4 = new SimpleBloomFilter(getTestShape(), from11);
-
- assertFalse(bf3.remove(bf4), "Subtract should not work");
+ assertFalse(bf3.remove(simple), "Subtract should not work");
assertFalse(bf3.isValid(), "isValid should return false");
assertFalse(bf3.contains(from1), "Should not contain");
- assertFalse(bf3.contains(bf4), "Should not contain");
+ assertFalse(bf3.contains(simple), "Should not contain");
assertCounts(bf3, new int[] { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 });
+
+ // with IndexProducer
+ IndexProducer ip = from11.indices(getTestShape());
+
+ final CountingBloomFilter bf4 = createFilter(getTestShape(), from1);
+ bf4.add(BitCountProducer.from(from11.indices(getTestShape())));
+
+ assertTrue(bf4.remove(ip), "Remove should work");
+ assertFalse(bf4.contains(from11), "Should not contain");
+ assertTrue(bf4.contains(from1), "Should contain");
+
+ assertCounts(bf4, from1Counts);
+
+ // with BitMapProducer
+ final BitMapProducer bmp = BitMapProducer.fromIndexProducer(ip, getTestShape().getNumberOfBits());
+ final CountingBloomFilter bf5 = createFilter(getTestShape(), from1);
+ bf5.add(BitCountProducer.from(from11.indices(getTestShape())));
+
+ assertTrue(bf5.remove(bmp), "Remove should work");
+ assertFalse(bf5.contains(from11), "Should not contain");
+ assertTrue(bf5.contains(from1), "Should contain");
+
+ assertCounts(bf5, from1Counts);
+
+ // test producer errors
+ IndexProducer ip2 = IndexProducer.fromIndexArray(1, 2, getTestShape().getNumberOfBits());
+ final CountingBloomFilter bf6 = createFilter(getTestShape(), from1);
+ assertThrows( IllegalArgumentException.class, () -> bf6.remove(ip2));
+
+ final CountingBloomFilter bf7 = createFilter(getTestShape(), from1);
+ final BitMapProducer bmp2 = BitMapProducer.fromIndexProducer(ip2, getTestShape().getNumberOfBits());
+ assertThrows( IllegalArgumentException.class, () -> bf7.remove(bmp2));
}
@Test
@@ -247,20 +283,12 @@ public abstract class AbstractCountingBloomFilterTest<T extends CountingBloomFil
bf1.merge(hasher);
assertEquals(6, bf1.cardinality());
bf1.forEachCount((x, y) -> {
- assertEquals(1, y, "Hasher in mergeInPlace results in value not equal to 1");
- return true;
- });
-
- bf1 = createEmptyFilter(shape);
- CountingBloomFilter bf2 = bf1.copy();
- bf2.merge(hasher);
- assertEquals(6, bf2.cardinality());
- bf2.forEachCount((x, y) -> {
assertEquals(1, y, "Hasher in merge results in value not equal to 1");
return true;
});
- bf1 = createFilter(shape, hasher);
+ bf1 = createEmptyFilter(shape);
+ bf1.merge(hasher);
bf1.remove(hasher);
assertEquals(0, bf1.cardinality());
assertTrue(bf1.forEachCount((x, y) -> (false)), "Hasher in removes results in value not equal to 0");
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractHasherTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractHasherTest.java
index cd5eda18c..0e9fae410 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractHasherTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractHasherTest.java
@@ -85,7 +85,7 @@ public abstract class AbstractHasherTest extends AbstractIndexProducerTest {
count[0]++;
return false;
});
- assertEquals(1, count[0], "did not exit early" );
+ assertEquals(1, count[0], "did not exit early");
}
@Test
@@ -97,8 +97,8 @@ public abstract class AbstractHasherTest extends AbstractIndexProducerTest {
List<Integer> full = Arrays.stream(producer.asIndexArray()).boxed().collect(Collectors.toList());
producer = hasher.uniqueIndices(shape);
List<Integer> unique = Arrays.stream(producer.asIndexArray()).boxed().collect(Collectors.toList());
- assertTrue( full.size() > unique.size() );
- Set<Integer> set = new HashSet<Integer>( unique );
- assertEquals( set.size(), unique.size() );
+ assertTrue(full.size() > unique.size());
+ Set<Integer> set = new HashSet<>(unique);
+ assertEquals(set.size(), unique.size());
}
}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/ArrayCountingBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/ArrayCountingBloomFilterTest.java
index 86bd638b7..e7141dade 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/ArrayCountingBloomFilterTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/ArrayCountingBloomFilterTest.java
@@ -25,28 +25,4 @@ public class ArrayCountingBloomFilterTest extends AbstractCountingBloomFilterTes
protected ArrayCountingBloomFilter createEmptyFilter(Shape shape) {
return new ArrayCountingBloomFilter(shape);
}
-
- @Override
- protected ArrayCountingBloomFilter createFilter(Shape shape, Hasher hasher) {
- return createFilter(shape, hasher.uniqueIndices(shape));
- }
-
- @Override
- protected ArrayCountingBloomFilter createFilter(Shape shape, BitMapProducer producer) {
- return createFilter(shape, IndexProducer.fromBitMapProducer(producer));
- }
-
- @Override
- protected ArrayCountingBloomFilter createFilter(Shape shape, IndexProducer producer) {
- ArrayCountingBloomFilter filter = createEmptyFilter(shape);
- try {
- filter.add(BitCountProducer.from(producer));
- return filter;
- } catch (ArrayIndexOutOfBoundsException e) {
- // since ArrayCountingBloomFilter does not ahave a constructor that takes a
- // hasher
- // we have to duplicate the expected results here.
- throw new IllegalArgumentException(e);
- }
- }
}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBitMapProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/ArrayHasher.java
similarity index 51%
copy from src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBitMapProducerTest.java
copy to src/test/java/org/apache/commons/collections4/bloomfilter/ArrayHasher.java
index e7af149b1..babd13d2c 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBitMapProducerTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/ArrayHasher.java
@@ -16,36 +16,46 @@
*/
package org.apache.commons.collections4.bloomfilter;
-import java.util.function.LongPredicate;
+import java.util.Objects;
+import java.util.function.IntPredicate;
-public class DefaultBitMapProducerTest extends AbstractBitMapProducerTest {
+/**
+ * A Testing Hasher that returns the array values % shape.getNumberOfBits().
+ *
+ * @since 4.5
+ */
+public final class ArrayHasher implements Hasher {
+ final int[] values;
- @Override
- protected BitMapProducer createProducer() {
- return new DefaultBitMapProducer(new long[] { 1L, 2L });
+ ArrayHasher(final int... values) {
+ this.values = values;
}
@Override
- protected BitMapProducer createEmptyProducer() {
- return new DefaultBitMapProducer(new long[0]);
+ public IndexProducer indices(final Shape shape) {
+ Objects.requireNonNull(shape, "shape");
+ return new Producer(shape);
}
@Override
- protected boolean emptyIsZeroLength() {
- return true;
+ public IndexProducer uniqueIndices(Shape shape) {
+ return new Producer(shape);
}
- class DefaultBitMapProducer implements BitMapProducer {
- long[] bitMaps;
+ private class Producer implements IndexProducer {
+ Shape shape;
- DefaultBitMapProducer(long[] bitMaps) {
- this.bitMaps = bitMaps;
+ Producer(Shape shape) {
+ this.shape = shape;
}
@Override
- public boolean forEachBitMap(LongPredicate predicate) {
- for (long bitmap : bitMaps) {
- if (!predicate.test(bitmap)) {
+ public boolean forEachIndex(IntPredicate consumer) {
+ int pos = 0;
+ for (int i=0; i<shape.getNumberOfHashFunctions(); i++) {
+ int result = values[pos++] % shape.getNumberOfBits();
+ pos = pos % values.length;
+ if (!consumer.test(result)) {
return false;
}
}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerFromSimpleBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerFromSimpleBloomFilterTest.java
index aa000797e..96f121f0a 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerFromSimpleBloomFilterTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerFromSimpleBloomFilterTest.java
@@ -23,7 +23,9 @@ public class BitMapProducerFromSimpleBloomFilterTest extends AbstractBitMapProdu
@Override
protected BitMapProducer createProducer() {
Hasher hasher = new IncrementingHasher(0, 1);
- return new SimpleBloomFilter(shape, hasher);
+ BloomFilter bf = new SimpleBloomFilter(shape);
+ bf.merge(hasher);
+ return bf;
}
@Override
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerFromSparseBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerFromSparseBloomFilterTest.java
index 0c80b6d0d..7d22dee12 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerFromSparseBloomFilterTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerFromSparseBloomFilterTest.java
@@ -23,7 +23,9 @@ public class BitMapProducerFromSparseBloomFilterTest extends AbstractBitMapProdu
@Override
protected BitMapProducer createProducer() {
Hasher hasher = new IncrementingHasher(0, 1);
- return new SparseBloomFilter(shape, hasher);
+ BloomFilter bf = new SparseBloomFilter(shape);
+ bf.merge(hasher);
+ return bf;
}
@Override
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBitMapProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBitMapProducerTest.java
index e7af149b1..c14f15212 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBitMapProducerTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBitMapProducerTest.java
@@ -16,13 +16,21 @@
*/
package org.apache.commons.collections4.bloomfilter;
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.util.Random;
import java.util.function.LongPredicate;
+import org.junit.jupiter.api.Test;
+
public class DefaultBitMapProducerTest extends AbstractBitMapProducerTest {
+ long[] values = generateLongArray( 5 );
+
@Override
protected BitMapProducer createProducer() {
- return new DefaultBitMapProducer(new long[] { 1L, 2L });
+ return new DefaultBitMapProducer(values);
}
@Override
@@ -52,4 +60,36 @@ public class DefaultBitMapProducerTest extends AbstractBitMapProducerTest {
return true;
}
}
+
+ /**
+ * Generates an array of random long values.
+ * @param size the number of values to generate
+ * @return the array of random values.
+ */
+ public static long[] generateLongArray( int size ) {
+ Random rnd = new Random();
+ long[] expected = new long[size];
+ for (int i=0; i<size; i++) {
+ expected[i] = rnd.nextLong();
+ }
+ return expected;
+ }
+
+ @Test
+ public void testFromIndexProducer() {
+ int[] expected = DefaultIndexProducerTest.generateIntArray(10, 256);
+ IndexProducer ip = IndexProducer.fromIndexArray(expected);
+ long[] ary = BitMapProducer.fromIndexProducer(ip, 256).asBitMapArray();
+ for (int idx : expected) {
+ assertTrue( BitMap.contains(ary, idx));
+ }
+ }
+
+ @Test
+ public void testFromBitMapArray() {
+ int nOfBitMaps = BitMap.numberOfBitMaps(256);
+ long[] expected = generateLongArray( nOfBitMaps );
+ long[] ary = BitMapProducer.fromBitMapArray(expected).asBitMapArray();
+ assertArrayEquals( expected, ary );
+ }
}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBloomFilterTest.java
index ea7bd25f6..eb23d84a3 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBloomFilterTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBloomFilterTest.java
@@ -34,21 +34,6 @@ public class DefaultBloomFilterTest extends AbstractBloomFilterTest<DefaultBloom
return new SparseDefaultBloomFilter(shape);
}
- @Override
- protected AbstractDefaultBloomFilter createFilter(final Shape shape, final Hasher hasher) {
- return new SparseDefaultBloomFilter(shape, hasher);
- }
-
- @Override
- protected AbstractDefaultBloomFilter createFilter(final Shape shape, final BitMapProducer producer) {
- return new SparseDefaultBloomFilter(shape, producer);
- }
-
- @Override
- protected AbstractDefaultBloomFilter createFilter(final Shape shape, final IndexProducer producer) {
- return new SparseDefaultBloomFilter(shape, producer);
- }
-
@Test
public void testDefaultBloomFilterSimpleSpecificMerge() {
AbstractDefaultBloomFilter filter = new SparseDefaultBloomFilter(Shape.fromKM(3, 150));
@@ -62,14 +47,14 @@ public class DefaultBloomFilterTest extends AbstractBloomFilterTest<DefaultBloom
public void testDefaultBloomFilterSparseSpecificMerge() {
Shape shape = Shape.fromKM(3, 150);
AbstractDefaultBloomFilter filter = new SparseDefaultBloomFilter(shape);
- AbstractDefaultBloomFilter filter2 = new SparseDefaultBloomFilter(shape, new IncrementingHasher(0, 1));
+ AbstractDefaultBloomFilter filter2 = createFilter(shape, new IncrementingHasher(0, 1));
BloomFilter newFilter = filter.copy();
newFilter.merge(filter2);
assertEquals(3, newFilter.cardinality());
}
@Test
- public void testHasherBasedMergeInPlaceWithDifferingSparseness() {
+ public void testHasherBasedMergeWithDifferingSparseness() {
Hasher hasher = new IncrementingHasher(1, 1);
BloomFilter bf1 = new NonSparseDefaultBloomFilter(getTestShape());
@@ -92,26 +77,6 @@ public class DefaultBloomFilterTest extends AbstractBloomFilterTest<DefaultBloom
this.indices = new TreeSet<>();
}
- AbstractDefaultBloomFilter(Shape shape, Hasher hasher) {
- this(shape, hasher.indices(shape));
- }
-
- AbstractDefaultBloomFilter(Shape shape, BitMapProducer producer) {
- this(shape, IndexProducer.fromBitMapProducer(producer));
- }
-
- AbstractDefaultBloomFilter(Shape shape, IndexProducer producer) {
- this(shape);
- producer.forEachIndex((i) -> {
- indices.add(i);
- return true;
- });
- if (this.indices.floor(-1) != null || this.indices.ceiling(shape.getNumberOfBits()) != null) {
- throw new IllegalArgumentException(
- String.format("Filter only accepts values in the [0,%d) range", shape.getNumberOfBits()));
- }
- }
-
@Override
public void clear() {
indices.clear();
@@ -147,12 +112,7 @@ public class DefaultBloomFilterTest extends AbstractBloomFilterTest<DefaultBloom
return contains(IndexProducer.fromBitMapProducer(bitMapProducer));
}
- @Override
- public boolean merge(BloomFilter other) {
- other.forEachIndex((i) -> {
- indices.add(i);
- return true;
- });
+ private void checkIndicesRange() {
if (!indices.isEmpty()) {
if (indices.last() >= shape.getNumberOfBits()) {
throw new IllegalArgumentException(String.format("Value in list %s is greater than maximum value (%s)",
@@ -163,28 +123,30 @@ public class DefaultBloomFilterTest extends AbstractBloomFilterTest<DefaultBloom
String.format("Value in list %s is less than 0", indices.first()));
}
}
- return true;
}
@Override
- public int cardinality() {
- return indices.size();
+ public boolean merge(IndexProducer indexProducer) {
+ boolean result = indexProducer.forEachIndex(x -> {
+ indices.add(x);
+ return true;
+ });
+ checkIndicesRange();
+ return result;
}
- }
- static class SparseDefaultBloomFilter extends AbstractDefaultBloomFilter {
-
- SparseDefaultBloomFilter(Shape shape, BitMapProducer producer) {
- super(shape, producer);
+ @Override
+ public boolean merge(BitMapProducer bitMapProducer){
+ return merge(IndexProducer.fromBitMapProducer(bitMapProducer));
}
- SparseDefaultBloomFilter(Shape shape, Hasher hasher) {
- super(shape, hasher);
+ @Override
+ public int cardinality() {
+ return indices.size();
}
+ }
- SparseDefaultBloomFilter(Shape shape, IndexProducer producer) {
- super(shape, producer);
- }
+ static class SparseDefaultBloomFilter extends AbstractDefaultBloomFilter {
SparseDefaultBloomFilter(Shape shape) {
super(shape);
@@ -205,18 +167,6 @@ public class DefaultBloomFilterTest extends AbstractBloomFilterTest<DefaultBloom
static class NonSparseDefaultBloomFilter extends AbstractDefaultBloomFilter {
- NonSparseDefaultBloomFilter(Shape shape, BitMapProducer producer) {
- super(shape, producer);
- }
-
- NonSparseDefaultBloomFilter(Shape shape, Hasher hasher) {
- super(shape, hasher);
- }
-
- NonSparseDefaultBloomFilter(Shape shape, IndexProducer producer) {
- super(shape, producer);
- }
-
NonSparseDefaultBloomFilter(Shape shape) {
super(shape);
}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultIndexProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultIndexProducerTest.java
new file mode 100644
index 000000000..a36f545d0
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultIndexProducerTest.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
+
+import java.util.Random;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeSet;
+import java.util.function.IntPredicate;
+
+import org.junit.jupiter.api.Test;
+
+public class DefaultIndexProducerTest extends AbstractIndexProducerTest {
+
+ private int[] values = generateIntArray( 10, 512 );
+
+ @Override
+ protected IndexProducer createProducer() {
+ return IndexProducer.fromIndexArray( values );
+ }
+
+ @Override
+ protected IndexProducer createEmptyProducer() {
+ return new IndexProducer() {
+
+ @Override
+ public boolean forEachIndex(IntPredicate predicate) {
+ return true;
+ }
+ };
+ }
+
+ /**
+ * Generates an array of integers.
+ * @param size the size of the array
+ * @param bound the upper bound (exclusive) of the values in the array.
+ * @return an array of int.
+ */
+ public static int[] generateIntArray( int size, int bound ) {
+ Random rnd = new Random();
+ int[] expected = new int[size];
+ for (int i=0; i<size; i++) {
+ expected[i] = rnd.nextInt(bound);
+ }
+ return expected;
+ }
+
+ /**
+ * Creates a sorted set of Integers.
+ * @param ary the array to sort and make unique
+ * @return the sorted Set.
+ */
+ public static SortedSet<Integer> uniqueSet(int[] ary) {
+ SortedSet<Integer> uniq = new TreeSet<Integer>();
+ for (int idx : ary) {
+ uniq.add(idx);
+ }
+ return uniq;
+ }
+
+ /**
+ * Creates a sorted unique array of ints.
+ * @param ary the array to sort and make unique
+ * @return the sorted unique array.
+ */
+ public static int[] unique(int[] ary) {
+ Set<Integer> uniq = uniqueSet(ary);
+ int[] result = new int[uniq.size()];
+ int i=0;
+ for (int idx : uniq) {
+ result[i++] = idx;
+ }
+ return result;
+ }
+
+ @Test
+ public void testFromBitMapProducer() {
+ for (int i=0; i<5000; i++) {
+ int[] expected = generateIntArray( 7, 256 );
+ long[] bits = new long[BitMap.numberOfBitMaps(256)];
+ for (int bitIndex : expected) {
+ BitMap.set(bits, bitIndex);
+ }
+ IndexProducer ip = IndexProducer.fromBitMapProducer(BitMapProducer.fromBitMapArray(bits));
+ assertArrayEquals(unique(expected), ip.asIndexArray());
+ }
+ }
+
+ @Test
+ public void testFromIndexArray() {
+ for (int i=0; i<5000; i++) {
+ int[] expected = generateIntArray(10, 256);
+ IndexProducer ip = IndexProducer.fromIndexArray(expected);
+ assertArrayEquals(unique(expected), ip.asIndexArray());
+ }
+ }
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromSimpleBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromSimpleBloomFilterTest.java
index d62ac959e..40fded6dd 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromSimpleBloomFilterTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromSimpleBloomFilterTest.java
@@ -23,11 +23,13 @@ public class IndexProducerFromSimpleBloomFilterTest extends AbstractIndexProduce
@Override
protected IndexProducer createProducer() {
Hasher hasher = new IncrementingHasher(0, 1);
- return new SparseBloomFilter(shape, hasher);
+ BloomFilter bf = new SimpleBloomFilter(shape);
+ bf.merge(hasher);
+ return bf;
}
@Override
protected IndexProducer createEmptyProducer() {
- return new SparseBloomFilter(shape);
+ return new SimpleBloomFilter(shape);
}
}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromSparseBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromSparseBloomFilterTest.java
index b51e01b40..bd1e34836 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromSparseBloomFilterTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromSparseBloomFilterTest.java
@@ -23,11 +23,14 @@ public class IndexProducerFromSparseBloomFilterTest extends AbstractIndexProduce
@Override
protected IndexProducer createProducer() {
Hasher hasher = new IncrementingHasher(0, 1);
- return new SimpleBloomFilter(shape, hasher);
+ BloomFilter bf = new SparseBloomFilter(shape);
+ bf.merge(hasher);
+ return bf;
+
}
@Override
protected IndexProducer createEmptyProducer() {
- return new SimpleBloomFilter(shape);
+ return new SparseBloomFilter(shape);
}
}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/SetOperationsTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/SetOperationsTest.java
index 890e22f8f..98a62604f 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/SetOperationsTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/SetOperationsTest.java
@@ -49,22 +49,34 @@ public class SetOperationsTest {
assertEquals(expected, operation.applyAsDouble(filter2, filter1), "op(filter2, filter1)");
}
+ private BloomFilter createFilter(Shape shape, Hasher hasher) {
+ BloomFilter bf = new SimpleBloomFilter(shape);
+ bf.merge(hasher);
+ return bf;
+ }
+
+ private BloomFilter createFilter(Shape shape, IndexProducer producer) {
+ BloomFilter bf = new SparseBloomFilter(shape);
+ bf.merge(producer);
+ return bf;
+ }
+
/**
* Tests that the Cosine similarity is correctly calculated.
*/
@Test
public final void testCosineDistance() {
- BloomFilter filter1 = new SimpleBloomFilter(shape, from1);
- BloomFilter filter2 = new SimpleBloomFilter(shape, from1);
+ BloomFilter filter1 = createFilter(shape, from1);
+ BloomFilter filter2 = createFilter(shape, from1);
// identical filters should have no distance.
double expected = 0;
assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2);
Shape shape2 = Shape.fromKM(2, 72);
- filter1 = new SimpleBloomFilter(shape2, from1);
- filter2 = new SimpleBloomFilter(shape2, new IncrementingHasher(2, 1));
+ filter1 = createFilter(shape2, from1);
+ filter2 = createFilter(shape2, new IncrementingHasher(2, 1));
int dotProduct = /* [1,2] & [2,3] = [2] = */ 1;
int cardinalityA = 2;
@@ -72,8 +84,8 @@ public class SetOperationsTest {
expected = 1 - (dotProduct / Math.sqrt(cardinalityA * cardinalityB));
assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2);
- filter1 = new SimpleBloomFilter(shape, from1);
- filter2 = new SimpleBloomFilter(shape, from11);
+ filter1 = createFilter(shape, from1);
+ filter2 = createFilter(shape, from11);
dotProduct = /* [1..17] & [11..27] = [] = */ 7;
cardinalityA = 17;
cardinalityB = 17;
@@ -81,20 +93,19 @@ public class SetOperationsTest {
assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2);
// test with no values
- filter1 = new SimpleBloomFilter(shape, from1);
+ filter1 = createFilter(shape, from1);
filter2 = new SimpleBloomFilter(shape);
- BloomFilter filter3 = new SimpleBloomFilter(shape);
dotProduct = /* [1,2] & [] = [] = */ 0;
cardinalityA = 2;
cardinalityB = 0;
- expected = /* 1 - (dotProduct/Math.sqrt( cardinalityA * cardinalityB )) = */ 1.0;
+ expected = /* 1 - (dotProduct/Math.sqrt(cardinalityA * cardinalityB)) = */ 1.0;
assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2);
dotProduct = /* [] & [] = [] = */ 0;
cardinalityA = 0;
cardinalityB = 0;
- expected = /* 1 - (dotProduct/Math.sqrt( cardinalityA * cardinalityB )) = */ 1.0;
+ expected = /* 1 - (dotProduct/Math.sqrt(cardinalityA * cardinalityB)) = */ 1.0;
assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2);
}
@@ -103,27 +114,27 @@ public class SetOperationsTest {
*/
@Test
public final void testCosineSimilarity() {
- BloomFilter filter1 = new SimpleBloomFilter(shape, from1);
- BloomFilter filter2 = new SimpleBloomFilter(shape, from1);
+ BloomFilter filter1 = createFilter(shape, from1);
+ BloomFilter filter2 = createFilter(shape, from1);
int dotProduct = /* [1..17] & [1..17] = [1..17] = */ 17;
int cardinalityA = 17;
int cardinalityB = 17;
- double expected = /* dotProduct/Sqrt( cardinalityA * cardinalityB ) = */ 1.0;
+ double expected = /* dotProduct/Sqrt(cardinalityA * cardinalityB) = */ 1.0;
assertSymmetricOperation(expected, SetOperations::cosineSimilarity, filter1, filter2);
dotProduct = /* [1..17] & [11..27] = [11..17] = */ 7;
cardinalityA = 17;
cardinalityB = 17;
expected = dotProduct / Math.sqrt(cardinalityA * cardinalityB);
- filter2 = new SimpleBloomFilter(shape, from11);
+ filter2 = createFilter(shape, from11);
assertSymmetricOperation(expected, SetOperations::cosineSimilarity, filter1, filter2);
// test no values
filter1 = new SimpleBloomFilter(shape);
filter2 = new SimpleBloomFilter(shape);
// build a filter
- BloomFilter filter3 = new SimpleBloomFilter(shape, from1);
+ BloomFilter filter3 = createFilter(shape, from1);
assertSymmetricOperation(0.0, SetOperations::cosineSimilarity, filter1, filter2);
assertSymmetricOperation(0.0, SetOperations::cosineSimilarity, filter1, filter3);
}
@@ -133,13 +144,13 @@ public class SetOperationsTest {
*/
@Test
public final void testHammingDistance() {
- final BloomFilter filter1 = new SimpleBloomFilter(shape, from1);
- BloomFilter filter2 = new SimpleBloomFilter(shape, from1);
+ final BloomFilter filter1 = createFilter(shape, from1);
+ BloomFilter filter2 = createFilter(shape, from1);
int hammingDistance = /* [1..17] ^ [1..17] = [] = */ 0;
assertSymmetricOperation(hammingDistance, SetOperations::hammingDistance, filter1, filter2);
- filter2 = new SimpleBloomFilter(shape, from11);
+ filter2 = createFilter(shape, from11);
hammingDistance = /* [1..17] ^ [11..27] = [1..10][17-27] = */ 20;
assertSymmetricOperation(hammingDistance, SetOperations::hammingDistance, filter1, filter2);
}
@@ -149,13 +160,13 @@ public class SetOperationsTest {
*/
@Test
public final void testJaccardDistance() {
- BloomFilter filter1 = new SimpleBloomFilter(shape, from1);
- BloomFilter filter2 = new SimpleBloomFilter(shape, from1);
+ BloomFilter filter1 = createFilter(shape, from1);
+ BloomFilter filter2 = createFilter(shape, from1);
// 1 - jaccardSimilarity -- see jaccardSimilarityTest
assertSymmetricOperation(0.0, SetOperations::jaccardDistance, filter1, filter2);
- filter2 = new SimpleBloomFilter(shape, from11);
+ filter2 = createFilter(shape, from11);
double intersection = /* [1..17] & [11..27] = [11..17] = */ 7.0;
int union = /* [1..17] | [11..27] = [1..27] = */ 27;
double expected = 1 - (intersection / union);
@@ -164,7 +175,7 @@ public class SetOperationsTest {
// test no values
filter1 = new SimpleBloomFilter(shape);
filter2 = new SimpleBloomFilter(shape);
- BloomFilter filter3 = new SimpleBloomFilter(shape, from1);
+ BloomFilter filter3 = createFilter(shape, from1);
// 1 - jaccardSimilarity -- see jaccardSimilarityTest
assertSymmetricOperation(1.0, SetOperations::jaccardDistance, filter1, filter2);
@@ -176,15 +187,15 @@ public class SetOperationsTest {
*/
@Test
public final void testJaccardSimilarity() {
- BloomFilter filter1 = new SimpleBloomFilter(shape, from1);
- BloomFilter filter2 = new SimpleBloomFilter(shape, from1);
+ BloomFilter filter1 = createFilter(shape, from1);
+ BloomFilter filter2 = createFilter(shape, from1);
double intersection = /* [1..17] & [1..17] = [1..17] = */ 17.0;
int union = /* [1..17] | [1..17] = [1..17] = */ 17;
double expected = intersection / union;
assertSymmetricOperation(expected, SetOperations::jaccardSimilarity, filter1, filter2);
- filter2 = new SimpleBloomFilter(shape, from11);
+ filter2 = createFilter(shape, from11);
intersection = /* [1..17] & [11..27] = [11..17] = */ 7.0;
union = /* [1..17] | [11..27] = [1..27] = */ 27;
expected = intersection / union;
@@ -193,7 +204,6 @@ public class SetOperationsTest {
// test no values
filter1 = new SimpleBloomFilter(shape);
filter2 = new SimpleBloomFilter(shape);
- BloomFilter filter3 = new SimpleBloomFilter(shape, from1);
assertSymmetricOperation(0.0, SetOperations::jaccardSimilarity, filter1, filter2);
intersection = /* [] & [1..17] = [] = */ 0.0;
@@ -205,16 +215,16 @@ public class SetOperationsTest {
@Test
public final void testOrCardinality() {
Shape shape = Shape.fromKM(3, 128);
- SparseBloomFilter filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
- SparseBloomFilter filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
+ BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
+ BloomFilter filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
assertSymmetricOperation(5, SetOperations::orCardinality, filter1, filter2);
- filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
- filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
+ filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
+ filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
assertSymmetricOperation(5, SetOperations::orCardinality, filter1, filter2);
- filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
- filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
+ filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
+ filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
assertSymmetricOperation(4, SetOperations::orCardinality, filter1, filter2);
}
@@ -222,32 +232,32 @@ public class SetOperationsTest {
public final void testOrCardinalityWithDifferentLengthFilters() {
Shape shape = Shape.fromKM(3, 128);
Shape shape2 = Shape.fromKM(3, 192);
- SparseBloomFilter filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
- SparseBloomFilter filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
+ BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
+ BloomFilter filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
assertSymmetricOperation(5, SetOperations::orCardinality, filter1, filter2);
- filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
- filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
+ filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
+ filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
assertSymmetricOperation(5, SetOperations::orCardinality, filter1, filter2);
- filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
- filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
+ filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
+ filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
assertSymmetricOperation(4, SetOperations::orCardinality, filter1, filter2);
}
@Test
public final void testAndCardinality() {
Shape shape = Shape.fromKM(3, 128);
- SparseBloomFilter filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
- SparseBloomFilter filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
+ BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
+ BloomFilter filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
assertSymmetricOperation(1, SetOperations::andCardinality, filter1, filter2);
- filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
- filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
+ filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
+ filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
assertSymmetricOperation(0, SetOperations::andCardinality, filter1, filter2);
- filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
- filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
+ filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
+ filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
assertSymmetricOperation(1, SetOperations::andCardinality, filter1, filter2);
}
@@ -255,37 +265,37 @@ public class SetOperationsTest {
public final void testAndCardinalityWithDifferentLengthFilters() {
Shape shape = Shape.fromKM(3, 128);
Shape shape2 = Shape.fromKM(3, 192);
- SparseBloomFilter filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
- SparseBloomFilter filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
+ BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
+ BloomFilter filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
assertSymmetricOperation(1, SetOperations::andCardinality, filter1, filter2);
- filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
- filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
+ filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
+ filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
assertSymmetricOperation(0, SetOperations::andCardinality, filter1, filter2);
- filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
- filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
+ filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
+ filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
assertSymmetricOperation(1, SetOperations::andCardinality, filter1, filter2);
}
@Test
public final void testXorCardinality() {
Shape shape = Shape.fromKM(3, 128);
- SparseBloomFilter filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
- SparseBloomFilter filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
+ BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
+ BloomFilter filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
assertSymmetricOperation(4, SetOperations::xorCardinality, filter1, filter2);
- filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
- filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
+ filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
+ filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
assertSymmetricOperation(5, SetOperations::xorCardinality, filter1, filter2);
- filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
- filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
+ filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
+ filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 }));
assertSymmetricOperation(3, SetOperations::xorCardinality, filter1, filter2);
Shape bigShape = Shape.fromKM(3, 192);
- filter1 = new SparseBloomFilter(bigShape, IndexProducer.fromIndexArray(new int[] { 1, 63, 185}));
- filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63, 69 }));
+ filter1 = createFilter(bigShape, IndexProducer.fromIndexArray(new int[] { 1, 63, 185}));
+ filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63, 69 }));
assertSymmetricOperation(4, SetOperations::xorCardinality, filter1, filter2);
}
@@ -294,16 +304,16 @@ public class SetOperationsTest {
Shape shape = Shape.fromKM(3, 128);
Shape shape2 = Shape.fromKM(3, 192);
- SparseBloomFilter filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
- SparseBloomFilter filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
+ BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 }));
+ BloomFilter filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
assertSymmetricOperation(4, SetOperations::xorCardinality, filter1, filter2);
- filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
- filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
+ filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 }));
+ filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
assertSymmetricOperation(5, SetOperations::xorCardinality, filter1, filter2);
- filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
- filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
+ filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 }));
+ filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 }));
assertSymmetricOperation(3, SetOperations::xorCardinality, filter1, filter2);
}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/SimpleBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/SimpleBloomFilterTest.java
index 15aeea62c..b37c0ffe8 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/SimpleBloomFilterTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/SimpleBloomFilterTest.java
@@ -16,7 +16,6 @@
*/
package org.apache.commons.collections4.bloomfilter;
-import org.junit.jupiter.api.Test;
/**
* Tests for the {@link SimpleBloomFilter}.
@@ -26,63 +25,4 @@ public class SimpleBloomFilterTest extends AbstractBloomFilterTest<SimpleBloomFi
protected SimpleBloomFilter createEmptyFilter(final Shape shape) {
return new SimpleBloomFilter(shape);
}
-
- @Override
- protected SimpleBloomFilter createFilter(final Shape shape, final Hasher hasher) {
- return new SimpleBloomFilter(shape, hasher);
- }
-
- @Override
- protected SimpleBloomFilter createFilter(final Shape shape, final BitMapProducer producer) {
- return new SimpleBloomFilter(shape, producer);
- }
-
- @Override
- protected SimpleBloomFilter createFilter(final Shape shape, final IndexProducer producer) {
- return new SimpleBloomFilter(shape, producer);
- }
-
- private void executeNestedTest(SimpleBloomFilterTest nestedTest) {
- nestedTest.testAsBitMapArray();
- nestedTest.testContains();
- nestedTest.testEstimateIntersection();
- nestedTest.testEstimateN();
- nestedTest.testEstimateUnion();
- nestedTest.testIsFull();
- nestedTest.testMerge();
- }
-
- @Test
- public void testConstructors() {
-
- // // copy of Sparse
- SimpleBloomFilterTest nestedTest = new SimpleBloomFilterTest() {
-
- @Override
- protected SimpleBloomFilter createEmptyFilter(Shape shape) {
- return new SimpleBloomFilter(new SparseBloomFilter(shape));
- }
-
- @Override
- protected SimpleBloomFilter createFilter(Shape shape, Hasher hasher) {
- return new SimpleBloomFilter(new SparseBloomFilter(shape, hasher));
- }
- };
- executeNestedTest(nestedTest);
-
- // copy of Simple
- nestedTest = new SimpleBloomFilterTest() {
-
- @Override
- protected SimpleBloomFilter createEmptyFilter(Shape shape) {
- return new SimpleBloomFilter(new SimpleBloomFilter(shape));
- }
-
- @Override
- protected SimpleBloomFilter createFilter(Shape shape, Hasher hasher) {
- return new SimpleBloomFilter(new SimpleBloomFilter(shape, hasher));
- }
- };
- executeNestedTest(nestedTest);
- }
}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/SparseBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/SparseBloomFilterTest.java
index d3645f1e8..95484d967 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/SparseBloomFilterTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/SparseBloomFilterTest.java
@@ -31,64 +31,6 @@ public class SparseBloomFilterTest extends AbstractBloomFilterTest<SparseBloomFi
return new SparseBloomFilter(shape);
}
- @Override
- protected SparseBloomFilter createFilter(final Shape shape, final Hasher hasher) {
- return new SparseBloomFilter(shape, hasher);
- }
-
- @Override
- protected SparseBloomFilter createFilter(final Shape shape, final BitMapProducer producer) {
- return new SparseBloomFilter(shape, producer);
- }
-
- @Override
- protected SparseBloomFilter createFilter(final Shape shape, final IndexProducer producer) {
- return new SparseBloomFilter(shape, producer);
- }
-
- private void executeNestedTest(SparseBloomFilterTest nestedTest) {
- nestedTest.testContains();
- nestedTest.testEstimateIntersection();
- nestedTest.testEstimateN();
- nestedTest.testEstimateUnion();
- nestedTest.testIsFull();
- nestedTest.testMerge();
- }
-
- @Test
- public void testConstructors() {
-
- // copy of Sparse
- SparseBloomFilterTest nestedTest = new SparseBloomFilterTest() {
-
- @Override
- protected SparseBloomFilter createEmptyFilter(Shape shape) {
- return new SparseBloomFilter(new SparseBloomFilter(shape));
- }
-
- @Override
- protected SparseBloomFilter createFilter(Shape shape, Hasher hasher) {
- return new SparseBloomFilter(new SparseBloomFilter(shape, hasher));
- }
- };
- executeNestedTest(nestedTest);
-
- // copy of Simple
- nestedTest = new SparseBloomFilterTest() {
-
- @Override
- protected SparseBloomFilter createEmptyFilter(Shape shape) {
- return new SparseBloomFilter(new SimpleBloomFilter(shape));
- }
-
- @Override
- protected SparseBloomFilter createFilter(Shape shape, Hasher hasher) {
- return new SparseBloomFilter(new SimpleBloomFilter(shape, hasher));
- }
- };
- executeNestedTest(nestedTest);
- }
-
@Test
public void testBitMapProducerEdgeCases() {
int[] values = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 65, 66, 67, 68, 69, 70, 71 };
@@ -140,9 +82,10 @@ public class SparseBloomFilterTest extends AbstractBloomFilterTest<SparseBloomFi
}
@Test
- public void testBloomFilterBasedMergeInPlaceEdgeCases() {
+ public void testBloomFilterBasedMergeEdgeCases() {
BloomFilter bf1 = createEmptyFilter(getTestShape());
- BloomFilter bf2 = new SimpleBloomFilter(getTestShape(), from1);
+ BloomFilter bf2 = new SimpleBloomFilter(getTestShape());
+ bf2.merge(from1);
bf1.merge(bf2);
assertTrue(bf2.forEachBitMapPair(bf1, (x, y) -> x == y));
}