You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by gg...@apache.org on 2020/02/16 20:16:09 UTC
[commons-collections] 03/06: Sort methods in AB order.
This is an automated email from the ASF dual-hosted git repository.
ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-collections.git
commit 38c36b568551c18f417161951145d6d53e2717ab
Author: Gary Gregory <ga...@gmail.com>
AuthorDate: Sun Feb 16 15:15:26 2020 -0500
Sort methods in AB order.
---
.../bloomfilter/AbstractBloomFilter.java | 228 +++++------
.../bloomfilter/BitSetBloomFilter.java | 68 ++--
.../collections4/bloomfilter/BloomFilter.java | 74 ++--
.../bloomfilter/CountingBloomFilter.java | 102 ++---
.../bloomfilter/HasherBloomFilter.java | 46 +--
.../collections4/bloomfilter/SetOperations.java | 130 +++---
.../bloomfilter/hasher/DynamicHasher.java | 174 ++++----
.../bloomfilter/hasher/HashFunctionIdentity.java | 66 +--
.../hasher/HashFunctionIdentityImpl.java | 16 +-
.../collections4/bloomfilter/hasher/Hasher.java | 50 +--
.../collections4/bloomfilter/hasher/Shape.java | 216 +++++-----
.../bloomfilter/hasher/StaticHasher.java | 76 ++--
.../bloomfilter/hasher/function/MD5Cyclic.java | 26 +-
.../hasher/function/Murmur128x86Cyclic.java | 26 +-
.../hasher/function/Murmur32x86Iterative.java | 24 +-
.../hasher/function/ObjectsHashIterative.java | 16 +-
.../bloomfilter/AbstractBloomFilterTest.java | 444 ++++++++++-----------
.../bloomfilter/BitSetBloomFilterTest.java | 64 +--
.../bloomfilter/CountingBloomFilterTest.java | 206 +++++-----
.../bloomfilter/DefaultBloomFilterMethodsTest.java | 20 +-
.../bloomfilter/HasherBloomFilterTest.java | 20 +-
.../bloomfilter/SetOperationsTest.java | 282 ++++++-------
.../bloomfilter/hasher/CommonComparatorTest.java | 42 +-
.../bloomfilter/hasher/DeepComparatorTest.java | 42 +-
.../hasher/DynamicHasherBuilderTest.java | 36 +-
.../bloomfilter/hasher/DynamicHasherTest.java | 16 +-
.../hasher/HashFunctionIdentityImplTest.java | 16 +-
.../collections4/bloomfilter/hasher/ShapeTest.java | 412 +++++++++----------
.../bloomfilter/hasher/StaticHasherTest.java | 192 ++++-----
29 files changed, 1565 insertions(+), 1565 deletions(-)
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilter.java
index 5eca78d..64a60bd 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilter.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilter.java
@@ -54,67 +54,101 @@ public abstract class AbstractBloomFilter implements BloomFilter {
private final Shape shape;
/**
- * Gets an array of little-endian long values representing the on bits of this filter.
- * bits 0-63 are in the first long.
+ * Construct a Bloom filter with the specified shape.
*
- * @return the LongBuffer representation of this filter.
+ * @param shape The shape.
*/
- @Override
- public abstract long[] getBits();
+ protected AbstractBloomFilter(final Shape shape) {
+ this.shape = shape;
+ }
/**
- * Creates a StaticHasher that contains the indexes of the bits that are on in this
- * filter.
+ * Performs a logical "AND" with the other Bloom filter and returns the cardinality of
+ * the result.
*
- * @return a StaticHasher for that produces this Bloom filter.
+ * @param other the other Bloom filter.
+ * @return the cardinality of the result of {@code ( this AND other )}.
*/
@Override
- public abstract StaticHasher getHasher();
+ public int andCardinality(final BloomFilter other) {
+ verifyShape(other);
+ final long[] mine = getBits();
+ final long[] theirs = other.getBits();
+ final int limit = Integer.min(mine.length, theirs.length);
+ final long[] result = new long[limit];
+ for (int i = 0; i < limit; i++) {
+ result[i] = mine[i] & theirs[i];
+ }
+ return BitSet.valueOf(result).cardinality();
+ }
/**
- * Construct a Bloom filter with the specified shape.
+ * Gets the cardinality of this Bloom filter.
*
- * @param shape The shape.
+ * @return the cardinality (number of enabled bits) in this filter.
*/
- protected AbstractBloomFilter(final Shape shape) {
- this.shape = shape;
+ @Override
+ public int cardinality() {
+ return BitSet.valueOf(getBits()).cardinality();
}
/**
- * Verify the other Bloom filter has the same shape as this Bloom filter.
+ * Performs a contains check. Effectively this AND other == other.
*
- * @param other the other filter to check.
- * @throws IllegalArgumentException if the shapes are not the same.
+ * @param other the Other Bloom filter.
+ * @return true if this filter matches the other.
*/
- protected void verifyShape(final BloomFilter other) {
- verifyShape(other.getShape());
+ @Override
+ public boolean contains(final BloomFilter other) {
+ verifyShape(other);
+ return other.cardinality() == andCardinality(other);
}
/**
- * Verify the specified shape has the same shape as this Bloom filter.
+ * Performs a contains check against a decomposed Bloom filter. The shape must match
+ * the shape of this filter. The hasher provides bit indexes to check for. Effectively
+ * decomposed AND this == decomposed.
*
- * @param shape the other shape to check.
- * @throws IllegalArgumentException if the shapes are not the same.
+ * @param hasher The hasher containing the bits to check.
+ * @return true if this filter contains the other.
+ * @throws IllegalArgumentException if the shape argument does not match the shape of
+ * this filter, or if the hasher is not the specified one
*/
- protected void verifyShape(final Shape shape) {
- if (!this.shape.equals(shape)) {
- throw new IllegalArgumentException(String.format("Shape %s is not the same as %s", shape, this.shape));
+ @Override
+ public boolean contains(final Hasher hasher) {
+ verifyHasher( hasher );
+ final long[] buff = getBits();
+
+ final OfInt iter = hasher.getBits(shape);
+ while (iter.hasNext()) {
+ final int idx = iter.nextInt();
+ final int buffIdx = idx / Long.SIZE;
+ final int pwr = Math.floorMod(idx, Long.SIZE);
+ final long buffOffset = 1L << pwr;
+ if ((buff[buffIdx] & buffOffset) == 0) {
+ return false;
+ }
}
+ return true;
}
/**
- * Verifies that the hasher has the same name as the shape.
+ * Gets an array of little-endian long values representing the on bits of this filter.
+ * bits 0-63 are in the first long.
*
- * @param hasher the Hasher to check
+ * @return the LongBuffer representation of this filter.
*/
- protected void verifyHasher(final Hasher hasher) {
- if (shape.getHashFunctionIdentity().getSignature() != hasher.getHashFunctionIdentity().getSignature()) {
- throw new IllegalArgumentException(
- String.format("Hasher (%s) is not the hasher for shape (%s)",
- HashFunctionIdentity.asCommonString(hasher.getHashFunctionIdentity()),
- shape.toString()));
- }
- }
+ @Override
+ public abstract long[] getBits();
+
+ /**
+ * Creates a StaticHasher that contains the indexes of the bits that are on in this
+ * filter.
+ *
+ * @return a StaticHasher for that produces this Bloom filter.
+ */
+ @Override
+ public abstract StaticHasher getHasher();
/**
* Gets the shape of this filter.
@@ -127,6 +161,16 @@ public abstract class AbstractBloomFilter implements BloomFilter {
}
/**
+ * Determines if the bloom filter is "full". Full is defined as having no unset
+ * bits.
+ *
+ * @return true if the filter is full.
+ */
+ public final boolean isFull() {
+ return cardinality() == getShape().getNumberOfBits();
+ }
+
+ /**
* Merge the other Bloom filter into this one.
*
* @param other the other Bloom filter.
@@ -145,36 +189,6 @@ public abstract class AbstractBloomFilter implements BloomFilter {
@Override
abstract public void merge(Hasher hasher);
- /**
- * Gets the cardinality of this Bloom filter.
- *
- * @return the cardinality (number of enabled bits) in this filter.
- */
- @Override
- public int cardinality() {
- return BitSet.valueOf(getBits()).cardinality();
- }
-
- /**
- * Performs a logical "AND" with the other Bloom filter and returns the cardinality of
- * the result.
- *
- * @param other the other Bloom filter.
- * @return the cardinality of the result of {@code ( this AND other )}.
- */
- @Override
- public int andCardinality(final BloomFilter other) {
- verifyShape(other);
- final long[] mine = getBits();
- final long[] theirs = other.getBits();
- final int limit = Integer.min(mine.length, theirs.length);
- final long[] result = new long[limit];
- for (int i = 0; i < limit; i++) {
- result[i] = mine[i] & theirs[i];
- }
- return BitSet.valueOf(result).cardinality();
- }
-
@Override
public int orCardinality(final BloomFilter other) {
verifyShape(other);
@@ -202,6 +216,42 @@ public abstract class AbstractBloomFilter implements BloomFilter {
}
/**
+ * Verifies that the hasher has the same name as the shape.
+ *
+ * @param hasher the Hasher to check
+ */
+ protected void verifyHasher(final Hasher hasher) {
+ if (shape.getHashFunctionIdentity().getSignature() != hasher.getHashFunctionIdentity().getSignature()) {
+ throw new IllegalArgumentException(
+ String.format("Hasher (%s) is not the hasher for shape (%s)",
+ HashFunctionIdentity.asCommonString(hasher.getHashFunctionIdentity()),
+ shape.toString()));
+ }
+ }
+
+ /**
+ * Verify the other Bloom filter has the same shape as this Bloom filter.
+ *
+ * @param other the other filter to check.
+ * @throws IllegalArgumentException if the shapes are not the same.
+ */
+ protected void verifyShape(final BloomFilter other) {
+ verifyShape(other.getShape());
+ }
+
+ /**
+ * Verify the specified shape has the same shape as this Bloom filter.
+ *
+ * @param shape the other shape to check.
+ * @throws IllegalArgumentException if the shapes are not the same.
+ */
+ protected void verifyShape(final Shape shape) {
+ if (!this.shape.equals(shape)) {
+ throw new IllegalArgumentException(String.format("Shape %s is not the same as %s", shape, this.shape));
+ }
+ }
+
+ /**
* Performs a logical "XOR" with the other Bloom filter and returns the cardinality of
* the result.
*
@@ -234,54 +284,4 @@ public abstract class AbstractBloomFilter implements BloomFilter {
return BitSet.valueOf(result).cardinality();
}
- /**
- * Performs a contains check. Effectively this AND other == other.
- *
- * @param other the Other Bloom filter.
- * @return true if this filter matches the other.
- */
- @Override
- public boolean contains(final BloomFilter other) {
- verifyShape(other);
- return other.cardinality() == andCardinality(other);
- }
-
- /**
- * Performs a contains check against a decomposed Bloom filter. The shape must match
- * the shape of this filter. The hasher provides bit indexes to check for. Effectively
- * decomposed AND this == decomposed.
- *
- * @param hasher The hasher containing the bits to check.
- * @return true if this filter contains the other.
- * @throws IllegalArgumentException if the shape argument does not match the shape of
- * this filter, or if the hasher is not the specified one
- */
- @Override
- public boolean contains(final Hasher hasher) {
- verifyHasher( hasher );
- final long[] buff = getBits();
-
- final OfInt iter = hasher.getBits(shape);
- while (iter.hasNext()) {
- final int idx = iter.nextInt();
- final int buffIdx = idx / Long.SIZE;
- final int pwr = Math.floorMod(idx, Long.SIZE);
- final long buffOffset = 1L << pwr;
- if ((buff[buffIdx] & buffOffset) == 0) {
- return false;
- }
- }
- return true;
- }
-
- /**
- * Determines if the bloom filter is "full". Full is defined as having no unset
- * bits.
- *
- * @return true if the filter is full.
- */
- public final boolean isFull() {
- return cardinality() == getShape().getNumberOfBits();
- }
-
}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/BitSetBloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/BitSetBloomFilter.java
index a85f11e..63ed587 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/BitSetBloomFilter.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/BitSetBloomFilter.java
@@ -59,24 +59,28 @@ public class BitSetBloomFilter extends AbstractBloomFilter {
this.bitSet = new BitSet();
}
+ /**
+ * Calculates the andCardinality with another BitSetBloomFilter. <p> This method takes
+ * advantage of internal structures of BitSetBloomFilter. </p>
+ *
+ * @param other the other BitSetBloomFilter.
+ * @return the cardinality of the result of {@code ( this AND other )}.
+ * @see #andCardinality(BloomFilter)
+ */
@Override
- public long[] getBits() {
- return bitSet.toLongArray();
- }
-
- @Override
- public StaticHasher getHasher() {
- return new StaticHasher(bitSet.stream().iterator(), getShape());
+ public int andCardinality(final BloomFilter other) {
+ if (other instanceof BitSetBloomFilter) {
+ verifyShape(other);
+ final BitSet result = (BitSet) bitSet.clone();
+ result.and(((BitSetBloomFilter)other).bitSet);
+ return result.cardinality();
+ }
+ return super.andCardinality(other);
}
@Override
- public void merge(final BloomFilter other) {
- verifyShape(other);
- if (other instanceof BitSetBloomFilter) {
- bitSet.or(((BitSetBloomFilter)other).bitSet);
- } else {
- bitSet.or(BitSet.valueOf(other.getBits()));
- }
+ public int cardinality() {
+ return bitSet.cardinality();
}
@Override
@@ -92,13 +96,23 @@ public class BitSetBloomFilter extends AbstractBloomFilter {
}
@Override
- public int cardinality() {
- return bitSet.cardinality();
+ public long[] getBits() {
+ return bitSet.toLongArray();
}
@Override
- public String toString() {
- return bitSet.toString();
+ public StaticHasher getHasher() {
+ return new StaticHasher(bitSet.stream().iterator(), getShape());
+ }
+
+ @Override
+ public void merge(final BloomFilter other) {
+ verifyShape(other);
+ if (other instanceof BitSetBloomFilter) {
+ bitSet.or(((BitSetBloomFilter)other).bitSet);
+ } else {
+ bitSet.or(BitSet.valueOf(other.getBits()));
+ }
}
@@ -108,23 +122,9 @@ public class BitSetBloomFilter extends AbstractBloomFilter {
hasher.getBits(getShape()).forEachRemaining((IntConsumer) bitSet::set);
}
- /**
- * Calculates the andCardinality with another BitSetBloomFilter. <p> This method takes
- * advantage of internal structures of BitSetBloomFilter. </p>
- *
- * @param other the other BitSetBloomFilter.
- * @return the cardinality of the result of {@code ( this AND other )}.
- * @see #andCardinality(BloomFilter)
- */
@Override
- public int andCardinality(final BloomFilter other) {
- if (other instanceof BitSetBloomFilter) {
- verifyShape(other);
- final BitSet result = (BitSet) bitSet.clone();
- result.and(((BitSetBloomFilter)other).bitSet);
- return result.cardinality();
- }
- return super.andCardinality(other);
+ public String toString() {
+ return bitSet.toString();
}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilter.java
index 717771d..d3005f2 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilter.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilter.java
@@ -28,6 +28,43 @@ import org.apache.commons.collections4.bloomfilter.hasher.StaticHasher;
public interface BloomFilter {
/**
+ * Performs a logical "AND" with the other Bloom filter and returns the cardinality of
+ * the result.
+ *
+ * @param other the other Bloom filter.
+ * @return the cardinality of the result of {@code ( this AND other )}.
+ */
+ int andCardinality(BloomFilter other);
+
+ /**
+ * Gets the cardinality of this Bloom filter.
+ * <p>This is also known as the Hamming value.</p>
+ *
+ * @return the cardinality (number of enabled bits) in this filter.
+ */
+ int cardinality();
+
+ /**
+ * Performs a contains check. Effectively this AND other == other.
+ *
+ * @param other the Other Bloom filter.
+ * @return true if this filter matches the other.
+ */
+ boolean contains(BloomFilter other);
+
+ /**
+ * Performs a contains check against a decomposed Bloom filter. The shape must match
+ * the shape of this filter. The hasher provides bit indexes to check for. Effectively
+ * decomposed AND this == decomposed.
+ *
+ * @param hasher The hasher containing the bits to check.
+ * @return true if this filter contains the other.
+ * @throws IllegalArgumentException if the shape argument does not match the shape of
+ * this filter, or if the hasher is not the specified one
+ */
+ boolean contains(Hasher hasher);
+
+ /**
* Gets an array of little-endian long values representing the on bits of this filter.
* bits 0-63 are in the first long.
*
@@ -68,23 +105,6 @@ public interface BloomFilter {
void merge(Hasher hasher);
/**
- * Gets the cardinality of this Bloom filter.
- * <p>This is also known as the Hamming value.</p>
- *
- * @return the cardinality (number of enabled bits) in this filter.
- */
- int cardinality();
-
- /**
- * Performs a logical "AND" with the other Bloom filter and returns the cardinality of
- * the result.
- *
- * @param other the other Bloom filter.
- * @return the cardinality of the result of {@code ( this AND other )}.
- */
- int andCardinality(BloomFilter other);
-
- /**
* Performs a logical "OR" with the other Bloom filter and returns the cardinality of
* the result.
*
@@ -102,26 +122,6 @@ public interface BloomFilter {
*/
int xorCardinality(BloomFilter other);
- /**
- * Performs a contains check. Effectively this AND other == other.
- *
- * @param other the Other Bloom filter.
- * @return true if this filter matches the other.
- */
- boolean contains(BloomFilter other);
-
- /**
- * Performs a contains check against a decomposed Bloom filter. The shape must match
- * the shape of this filter. The hasher provides bit indexes to check for. Effectively
- * decomposed AND this == decomposed.
- *
- * @param hasher The hasher containing the bits to check.
- * @return true if this filter contains the other.
- * @throws IllegalArgumentException if the shape argument does not match the shape of
- * this filter, or if the hasher is not the specified one
- */
- boolean contains(Hasher hasher);
-
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilter.java
index 644b6a8..30d9be4 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilter.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilter.java
@@ -68,16 +68,6 @@ public class CountingBloomFilter extends AbstractBloomFilter {
}
/**
- * Constructs an empty Counting filter with the specified shape.
- *
- * @param shape The shape of the resulting filter.
- */
- public CountingBloomFilter(final Shape shape) {
- super(shape);
- this.counts = new TreeMap<>();
- }
-
- /**
* Constructs a counting Bloom filter with the provided counts and shape
*
* @param counts A map of data counts.
@@ -104,6 +94,50 @@ public class CountingBloomFilter extends AbstractBloomFilter {
}
/**
+ * Constructs an empty Counting filter with the specified shape.
+ *
+ * @param shape The shape of the resulting filter.
+ */
+ public CountingBloomFilter(final Shape shape) {
+ super(shape);
+ this.counts = new TreeMap<>();
+ }
+
+ @Override
+ public int andCardinality(final BloomFilter other) {
+ if (other instanceof CountingBloomFilter) {
+ final Set<Integer> result = new HashSet<>( counts.keySet());
+ result.retainAll( ((CountingBloomFilter)other).counts.keySet() );
+ return result.size();
+ }
+ return super.andCardinality(other);
+ }
+
+ @Override
+ public int cardinality() {
+ return counts.size();
+ }
+
+ @Override
+ public boolean contains(final Hasher hasher) {
+ verifyHasher(hasher);
+ final OfInt iter = hasher.getBits(getShape());
+ while (iter.hasNext()) {
+ if (counts.get(iter.nextInt()) == null) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ @Override
+ public long[] getBits() {
+ final BitSet bs = new BitSet();
+ counts.keySet().stream().forEach(bs::set);
+ return bs.toLongArray();
+ }
+
+ /**
* Gets the count for each enabled bit.
*
* @return an immutable map of enabled bits (key) to counts for that bit
@@ -115,12 +149,8 @@ public class CountingBloomFilter extends AbstractBloomFilter {
}
@Override
- public String toString() {
- final StringBuilder sb = new StringBuilder("{ ");
- for (final Map.Entry<Integer, Integer> e : counts.entrySet()) {
- sb.append(String.format("(%s,%s) ", e.getKey(), e.getValue()));
- }
- return sb.append("}").toString();
+ public StaticHasher getHasher() {
+ return new StaticHasher(counts.keySet().iterator(), getShape());
}
/**
@@ -231,41 +261,11 @@ public class CountingBloomFilter extends AbstractBloomFilter {
}
@Override
- public long[] getBits() {
- final BitSet bs = new BitSet();
- counts.keySet().stream().forEach(bs::set);
- return bs.toLongArray();
- }
-
- @Override
- public StaticHasher getHasher() {
- return new StaticHasher(counts.keySet().iterator(), getShape());
- }
-
- @Override
- public boolean contains(final Hasher hasher) {
- verifyHasher(hasher);
- final OfInt iter = hasher.getBits(getShape());
- while (iter.hasNext()) {
- if (counts.get(iter.nextInt()) == null) {
- return false;
- }
- }
- return true;
- }
-
- @Override
- public int cardinality() {
- return counts.size();
- }
-
- @Override
- public int andCardinality(final BloomFilter other) {
- if (other instanceof CountingBloomFilter) {
- final Set<Integer> result = new HashSet<>( counts.keySet());
- result.retainAll( ((CountingBloomFilter)other).counts.keySet() );
- return result.size();
+ public String toString() {
+ final StringBuilder sb = new StringBuilder("{ ");
+ for (final Map.Entry<Integer, Integer> e : counts.entrySet()) {
+ sb.append(String.format("(%s,%s) ", e.getKey(), e.getValue()));
}
- return super.andCardinality(other);
+ return sb.append("}").toString();
}
}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/HasherBloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/HasherBloomFilter.java
index e78b13e..e781179 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/HasherBloomFilter.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/HasherBloomFilter.java
@@ -71,6 +71,29 @@ public class HasherBloomFilter extends AbstractBloomFilter {
}
@Override
+ public int cardinality() {
+ return hasher.size();
+ }
+
+ @Override
+ public boolean contains(final Hasher hasher) {
+ verifyHasher(hasher);
+ final Set<Integer> set = new TreeSet<>();
+ hasher.getBits(getShape()).forEachRemaining((IntConsumer) idx -> {
+ set.add(idx);
+ });
+ final OfInt iter = this.hasher.getBits(getShape());
+ while (iter.hasNext()) {
+ final int idx = iter.nextInt();
+ set.remove(idx);
+ if (set.isEmpty()) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ @Override
public long[] getBits() {
if (hasher.size() == 0) {
return new long[0];
@@ -117,27 +140,4 @@ public class HasherBloomFilter extends AbstractBloomFilter {
this.hasher = new StaticHasher(iter, getShape());
}
- @Override
- public int cardinality() {
- return hasher.size();
- }
-
- @Override
- public boolean contains(final Hasher hasher) {
- verifyHasher(hasher);
- final Set<Integer> set = new TreeSet<>();
- hasher.getBits(getShape()).forEachRemaining((IntConsumer) idx -> {
- set.add(idx);
- });
- final OfInt iter = this.hasher.getBits(getShape());
- while (iter.hasNext()) {
- final int idx = iter.nextInt();
- set.remove(idx);
- if (set.isEmpty()) {
- return true;
- }
- }
- return false;
- }
-
}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/SetOperations.java b/src/main/java/org/apache/commons/collections4/bloomfilter/SetOperations.java
index d823a93..2d0358b 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/SetOperations.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/SetOperations.java
@@ -26,64 +26,16 @@ import org.apache.commons.collections4.bloomfilter.hasher.Shape;
public final class SetOperations {
/**
- * Do not instantiate.
- */
- private SetOperations() {}
-
- /**
- * Verifies the Bloom filters have the same shape.
- *
- * @param first the first filter to check.
- * @param second the second filter to check.
- * @throws IllegalArgumentException if the shapes are not the same.
- */
- private static void verifyShape(final BloomFilter first, final BloomFilter second) {
- if (!first.getShape().equals(second.getShape())) {
- throw new IllegalArgumentException(String.format("Shape %s is not the same as %s",
- first.getShape(), second.getShape()));
- }
- }
-
- /**
- * Calculates the Hamming distance between two Bloom filters.
- *
- * @param first the first Bloom filter.
- * @param second the second Bloom filter.
- * @return the Hamming distance.
- */
- public static int hammingDistance(final BloomFilter first, final BloomFilter second) {
- verifyShape(first,second);
- return first.xorCardinality(second);
- }
-
-
- /**
- * Calculates the Jaccard similarity between two Bloom filters.
- *
- * <p>Also known as Jaccard index, Intersection over Union, and Jaccard similarity coefficient</p>
- *
- * @param first the first Bloom filter.
- * @param second the second Bloom filter.
- * @return the Jaccard similarity.
- */
- public static double jaccardSimilarity(final BloomFilter first, final BloomFilter second) {
- verifyShape(first,second);
- final int orCard = first.orCardinality(second);
- // if the orCard is zero then the hamming distance will also be zero.
- return orCard==0?0:hammingDistance(first,second) / (double) orCard;
- }
-
- /**
- * Calculates the Jaccard distance between two Bloom filters.
+ * Calculates the Cosine distance between two Bloom filters.
*
- * <p>Jaccard distance is defined as {@code 1 - Jaccard similarity}</p>
+ * <p>Cosine distance is defined as {@code 1 - Cosine similarity}</p>
*
* @param first the first Bloom filter.
* @param second the second Bloom filter.
- * @return the Jaccard distance.
+ * @return the jaccard distance.
*/
- public static double jaccardDistance(final BloomFilter first, final BloomFilter second) {
- return 1.0 - jaccardSimilarity(first,second);
+ public static double cosineDistance(final BloomFilter first, final BloomFilter second) {
+ return 1.0 - cosineSimilarity(first,second);
}
/**
@@ -105,18 +57,20 @@ public final class SetOperations {
}
/**
- * Calculates the Cosine distance between two Bloom filters.
- *
- * <p>Cosine distance is defined as {@code 1 - Cosine similarity}</p>
+ * Estimates the number of items in the intersection of the sets represented by two
+ * Bloom filters.
*
* @param first the first Bloom filter.
* @param second the second Bloom filter.
- * @return the jaccard distance.
+ * @return an estimate of the size of the intersection between the two filters.
*/
- public static double cosineDistance(final BloomFilter first, final BloomFilter second) {
- return 1.0 - cosineSimilarity(first,second);
+ public static long estimateIntersectionSize(final BloomFilter first, final BloomFilter second) {
+ verifyShape(first,second);
+ // do subtraction early to avoid Long overflow.
+ return estimateSize(first) - estimateUnionSize(first,second) + estimateSize(second);
}
+
/**
* Estimates the number of items in the Bloom filter based on the shape and the number
* of bits that are enabled.
@@ -150,16 +104,62 @@ public final class SetOperations {
}
/**
- * Estimates the number of items in the intersection of the sets represented by two
- * Bloom filters.
+ * Calculates the Hamming distance between two Bloom filters.
*
* @param first the first Bloom filter.
* @param second the second Bloom filter.
- * @return an estimate of the size of the intersection between the two filters.
+ * @return the Hamming distance.
*/
- public static long estimateIntersectionSize(final BloomFilter first, final BloomFilter second) {
+ public static int hammingDistance(final BloomFilter first, final BloomFilter second) {
verifyShape(first,second);
- // do subtraction early to avoid Long overflow.
- return estimateSize(first) - estimateUnionSize(first,second) + estimateSize(second);
+ return first.xorCardinality(second);
}
+
+ /**
+ * Calculates the Jaccard distance between two Bloom filters.
+ *
+ * <p>Jaccard distance is defined as {@code 1 - Jaccard similarity}</p>
+ *
+ * @param first the first Bloom filter.
+ * @param second the second Bloom filter.
+ * @return the Jaccard distance.
+ */
+ public static double jaccardDistance(final BloomFilter first, final BloomFilter second) {
+ return 1.0 - jaccardSimilarity(first,second);
+ }
+
+ /**
+ * Calculates the Jaccard similarity between two Bloom filters.
+ *
+ * <p>Also known as Jaccard index, Intersection over Union, and Jaccard similarity coefficient</p>
+ *
+ * @param first the first Bloom filter.
+ * @param second the second Bloom filter.
+ * @return the Jaccard similarity.
+ */
+ public static double jaccardSimilarity(final BloomFilter first, final BloomFilter second) {
+ verifyShape(first,second);
+ final int orCard = first.orCardinality(second);
+ // if the orCard is zero then the hamming distance will also be zero.
+ return orCard==0?0:hammingDistance(first,second) / (double) orCard;
+ }
+
+ /**
+ * Verifies the Bloom filters have the same shape.
+ *
+ * @param first the first filter to check.
+ * @param second the second filter to check.
+ * @throws IllegalArgumentException if the shapes are not the same.
+ */
+ private static void verifyShape(final BloomFilter first, final BloomFilter second) {
+ if (!first.getShape().equals(second.getShape())) {
+ throw new IllegalArgumentException(String.format("Shape %s is not the same as %s",
+ first.getShape(), second.getShape()));
+ }
+ }
+
+ /**
+ * Do not instantiate.
+ */
+ private SetOperations() {}
}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasher.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasher.java
index a4f943c..6e54cf4 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasher.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasher.java
@@ -30,56 +30,57 @@ import java.util.PrimitiveIterator;
public class DynamicHasher implements Hasher {
/**
- * The list of byte arrays that are to be hashed.
+ * The builder for DynamicHashers.
+ * @since 4.5
*/
- private final List<byte[]> buffers;
+ public static class Builder implements Hasher.Builder {
+ /**
+ * The list of byte[] that are to be hashed.
+ */
+ private final List<byte[]> buffers;
- /**
- * The function to hash the buffers.
- */
- private final HashFunction function;
+ /**
+ * The function that the resulting DynamicHasher will use.
+ */
+ private final HashFunction function;
- /**
- * Constructs a DynamicHasher.
- *
- * @param function the function to use.
- * @param buffers the byte buffers that will be hashed.
- */
- public DynamicHasher(final HashFunction function, final List<byte[]> buffers) {
- this.buffers = new ArrayList<>(buffers);
- this.function = function;
- }
+ /**
+ * Constructs a DynamicHasher builder.
+ *
+ * @param function the function implementation.
+ */
+ public Builder(final HashFunction function) {
+ this.function = function;
+ this.buffers = new ArrayList<>();
- @Override
- public HashFunctionIdentity getHashFunctionIdentity() {
- return function;
- }
+ }
- @Override
- public boolean isEmpty() {
- return buffers.isEmpty();
- }
+ /**
+ * Builds the hasher.
+ *
+ * @return A DynamicHasher with the specified name, function and buffers.
+ */
+ @Override
+ public DynamicHasher build() throws IllegalArgumentException {
+ return new DynamicHasher(function, buffers);
+ }
- /**
- * Return an iterator of integers that are the bits to enable in the Bloom filter
- * based on the shape. The iterator may return the same value multiple times. There is
- * no guarantee made as to the order of the integers.
- *
- * @param shape the shape of the desired Bloom filter.
- * @return the Iterator of integers;
- * @throws IllegalArgumentException if {@code shape.getHasherName()} does not equal
- * {@code getName()}
- */
- @Override
- public PrimitiveIterator.OfInt getBits(final Shape shape) {
- if (HashFunctionIdentity.COMMON_COMPARATOR.compare(getHashFunctionIdentity(),
- shape.getHashFunctionIdentity()) != 0) {
- throw new IllegalArgumentException(
- String.format("Shape hasher %s is not %s",
- HashFunctionIdentity.asCommonString(shape.getHashFunctionIdentity()),
- HashFunctionIdentity.asCommonString(getHashFunctionIdentity())));
+ @Override
+ public final Builder with(final byte property) {
+ return with(new byte[] {property});
}
- return new Iterator(shape);
+
+ @Override
+ public final Builder with(final byte[] property) {
+ buffers.add(property);
+ return this;
+ }
+
+ @Override
+ public final Builder with(final String property) {
+ return with(property.getBytes(StandardCharsets.UTF_8));
+ }
+
}
/**
@@ -122,57 +123,56 @@ public class DynamicHasher implements Hasher {
}
/**
- * The builder for DynamicHashers.
- * @since 4.5
+ * The list of byte arrays that are to be hashed.
*/
- public static class Builder implements Hasher.Builder {
- /**
- * The list of byte[] that are to be hashed.
- */
- private final List<byte[]> buffers;
-
- /**
- * The function that the resulting DynamicHasher will use.
- */
- private final HashFunction function;
-
- /**
- * Constructs a DynamicHasher builder.
- *
- * @param function the function implementation.
- */
- public Builder(final HashFunction function) {
- this.function = function;
- this.buffers = new ArrayList<>();
-
- }
+ private final List<byte[]> buffers;
- /**
- * Builds the hasher.
- *
- * @return A DynamicHasher with the specified name, function and buffers.
- */
- @Override
- public DynamicHasher build() throws IllegalArgumentException {
- return new DynamicHasher(function, buffers);
- }
+ /**
+ * The function to hash the buffers.
+ */
+ private final HashFunction function;
- @Override
- public final Builder with(final byte property) {
- return with(new byte[] {property});
- }
+ /**
+ * Constructs a DynamicHasher.
+ *
+ * @param function the function to use.
+ * @param buffers the byte buffers that will be hashed.
+ */
+ public DynamicHasher(final HashFunction function, final List<byte[]> buffers) {
+ this.buffers = new ArrayList<>(buffers);
+ this.function = function;
+ }
- @Override
- public final Builder with(final byte[] property) {
- buffers.add(property);
- return this;
+ /**
+ * Return an iterator of integers that are the bits to enable in the Bloom filter
+ * based on the shape. The iterator may return the same value multiple times. There is
+ * no guarantee made as to the order of the integers.
+ *
+ * @param shape the shape of the desired Bloom filter.
+ * @return the Iterator of integers;
+ * @throws IllegalArgumentException if {@code shape.getHasherName()} does not equal
+ * {@code getName()}
+ */
+ @Override
+ public PrimitiveIterator.OfInt getBits(final Shape shape) {
+ if (HashFunctionIdentity.COMMON_COMPARATOR.compare(getHashFunctionIdentity(),
+ shape.getHashFunctionIdentity()) != 0) {
+ throw new IllegalArgumentException(
+ String.format("Shape hasher %s is not %s",
+ HashFunctionIdentity.asCommonString(shape.getHashFunctionIdentity()),
+ HashFunctionIdentity.asCommonString(getHashFunctionIdentity())));
}
+ return new Iterator(shape);
+ }
- @Override
- public final Builder with(final String property) {
- return with(property.getBytes(StandardCharsets.UTF_8));
- }
+ @Override
+ public HashFunctionIdentity getHashFunctionIdentity() {
+ return function;
+ }
+ @Override
+ public boolean isEmpty() {
+ return buffers.isEmpty();
}
}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionIdentity.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionIdentity.java
index 84e5d1a..bf5ea95 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionIdentity.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionIdentity.java
@@ -29,6 +29,25 @@ import java.util.Locale;
public interface HashFunctionIdentity {
/**
+ * An enum that identifies the process type of this function. <dl> <dt>Iterative
+ * processes</dt> <dd>Call the underlying algorithm for each buffer, seed pair call to
+ * {@code apply}.</dd> <dt>Cyclic processes</dt> <dd>Call the underlying algorithm to
+ * generate two values for each buffer. It returns the first value on the call with
+ * seed 0, and increments the result with the second value before returning it on all
+ * subsequent calls.</dd> </dl>
+ */
+ enum ProcessType {
+ CYCLIC, ITERATIVE
+ }
+
+ /**
+ * An enum that identifies the Signedness of the calculations for this function.
+ */
+ enum Signedness {
+ SIGNED, UNSIGNED
+ }
+
+ /**
* A comparator implementation that performs the most common comparison using the
* HashFunctionIdentity name, signedness, and process.
*/
@@ -94,25 +113,6 @@ public interface HashFunctionIdentity {
}
/**
- * An enum that identifies the Signedness of the calculations for this function.
- */
- enum Signedness {
- SIGNED, UNSIGNED
- }
-
- /**
- * An enum that identifies the process type of this function. <dl> <dt>Iterative
- * processes</dt> <dd>Call the underlying algorithm for each buffer, seed pair call to
- * {@code apply}.</dd> <dt>Cyclic processes</dt> <dd>Call the underlying algorithm to
- * generate two values for each buffer. It returns the first value on the call with
- * seed 0, and increments the result with the second value before returning it on all
- * subsequent calls.</dd> </dl>
- */
- enum ProcessType {
- CYCLIC, ITERATIVE
- }
-
- /**
* Gets the name of this hash function.
* <p> Hash function should be the common name
* for the hash. This may include indications as to hash length
@@ -124,6 +124,13 @@ public interface HashFunctionIdentity {
String getName();
/**
+ * Gets the process of this function.
+ *
+ * @return process of this function.
+ */
+ ProcessType getProcessType();
+
+ /**
* Gets the name of the provider of this hash function implementation.
* <p>
* Provider names are not case specific. Thus, "Apache Commons Collection" and
@@ -134,20 +141,6 @@ public interface HashFunctionIdentity {
String getProvider();
/**
- * Gets the signedness of this function.
- *
- * @return signedness of this function.
- */
- Signedness getSignedness();
-
- /**
- * Gets the process of this function.
- *
- * @return process of this function.
- */
- ProcessType getProcessType();
-
- /**
* Get the signature of this function. <p> The signature of this function is
* calculated as: {@code
* apply( String.format( "%s-%s-%s", getName(), getSignedness(), getProcess() )
@@ -158,4 +151,11 @@ public interface HashFunctionIdentity {
*/
long getSignature();
+ /**
+ * Gets the signedness of this function.
+ *
+ * @return signedness of this function.
+ */
+ Signedness getSignedness();
+
}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionIdentityImpl.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionIdentityImpl.java
index 17e9ab2..b1bb202 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionIdentityImpl.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionIdentityImpl.java
@@ -65,23 +65,23 @@ public final class HashFunctionIdentityImpl implements HashFunctionIdentity {
}
@Override
- public String getProvider() {
- return provider;
+ public ProcessType getProcessType() {
+ return process;
}
@Override
- public Signedness getSignedness() {
- return signedness;
+ public String getProvider() {
+ return provider;
}
@Override
- public ProcessType getProcessType() {
- return process;
+ public long getSignature() {
+ return signature;
}
@Override
- public long getSignature() {
- return signature;
+ public Signedness getSignedness() {
+ return signedness;
}
}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/Hasher.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/Hasher.java
index 38e2e46..2608ca6 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/Hasher.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/Hasher.java
@@ -34,31 +34,6 @@ import java.util.PrimitiveIterator;
public interface Hasher {
/**
- * Gets HashFunctionIdentity of the hash function this Hasher uses.
- *
- * @return HashFunctionIdentity of the hash function this Hasher uses.
- */
- HashFunctionIdentity getHashFunctionIdentity();
-
- /**
- * Returns true if the hasher specifies no bits.
- * @return true if the hasher does not specify any bits.
- */
- boolean isEmpty();
-
- /**
- * Return an iterator of integers that are the bits to enable in the Bloom
- * filter based on the shape. No guarantee is made as to order
- * or duplication of values.
- *
- * @param shape the shape of the desired Bloom filter.
- * @return the Iterator of integers;
- * @throws IllegalArgumentException if {@code shape.getHasherName()} does not
- * equal {@code getName()}
- */
- PrimitiveIterator.OfInt getBits(Shape shape);
-
- /**
* A builder to build a hasher.
* @since 4.5
*/
@@ -101,4 +76,29 @@ public interface Hasher {
Builder with(String property);
}
+
+ /**
+ * Return an iterator of integers that are the bits to enable in the Bloom
+ * filter based on the shape. No guarantee is made as to order
+ * or duplication of values.
+ *
+ * @param shape the shape of the desired Bloom filter.
+ * @return the Iterator of integers;
+ * @throws IllegalArgumentException if {@code shape.getHasherName()} does not
+ * equal {@code getName()}
+ */
+ PrimitiveIterator.OfInt getBits(Shape shape);
+
+ /**
+ * Gets HashFunctionIdentity of the hash function this Hasher uses.
+ *
+ * @return HashFunctionIdentity of the hash function this Hasher uses.
+ */
+ HashFunctionIdentity getHashFunctionIdentity();
+
+ /**
+ * Returns true if the hasher specifies no bits.
+ * @return true if the hasher does not specify any bits.
+ */
+ boolean isEmpty();
}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/Shape.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/Shape.java
index a8eb323..712c185 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/Shape.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/Shape.java
@@ -83,6 +83,57 @@ public class Shape {
/**
* Create a filter configuration with the specified number of items and
+ * probability.
+ *
+ * @param hashFunctionIdentity The HashFunctionIdentity of the hash function this shape uses.
+ * @param probability The probability of duplicates. Must be in the range
+ * (0.0,1.0).
+ * @param numberOfBits The number of bits in the filter.
+ * @param numberOfHashFunctions The number of hash functions in the filter.
+ */
+ public Shape(final HashFunctionIdentity hashFunctionIdentity, final double probability, final int numberOfBits,
+ final int numberOfHashFunctions) {
+ if (hashFunctionIdentity == null) {
+ throw new IllegalArgumentException("Hash function name may not be null");
+ }
+ if (probability <= 0.0) {
+ throw new IllegalArgumentException("Probability must be greater than 0.0");
+ }
+ if (probability >= 1.0) {
+ throw new IllegalArgumentException("Probability must be less than 1.0");
+ }
+ if (numberOfBits < 8) {
+ throw new IllegalArgumentException("Number of bits must be greater than or equal to 8");
+ }
+ if (numberOfHashFunctions < 1) {
+ throw new IllegalArgumentException("Number of hash functions must be greater than or equal to 8");
+ }
+ this.hashFunctionIdentity = hashFunctionIdentity;
+ this.numberOfBits = numberOfBits;
+ this.numberOfHashFunctions = numberOfHashFunctions;
+
+ // n = ceil(m / (-k / log(1 - exp(log(p) / k))))
+ final double n = Math.ceil(numberOfBits /
+ (-numberOfHashFunctions / Math.log(1 - Math.exp(Math.log(probability) / numberOfHashFunctions))));
+
+ // log of probability is always < 0
+ // number of hash functions is >= 1
+ // e^x where x < 0 = [0,1)
+ // log 1-e^x = [log1, log0) = <0 with an effective lower limit of -53
+ // numberOfBits/ (-numberOfHashFunctions / [-53,0) ) >0
+ // ceil( >0 ) >= 1
+ // so we can not produce a negative value thus we don't check for it.
+ //
+ // similarly we can not produce a number greater than numberOfBits so we
+ // do not have to check for Integer.MAX_VALUE either.
+ this.numberOfItems = (int) n;
+ this.hashCode = generateHashCode();
+ // check that probability is within range
+ getProbability();
+ }
+
+ /**
+ * Create a filter configuration with the specified number of items and
* probability. <p> The actual probability will be approximately equal to the
* desired probability but will be dependent upon the calculated bloom filter size
* and function count. </p>
@@ -185,68 +236,6 @@ public class Shape {
}
/**
- * Create a filter configuration with the specified number of items and
- * probability.
- *
- * @param hashFunctionIdentity The HashFunctionIdentity of the hash function this shape uses.
- * @param probability The probability of duplicates. Must be in the range
- * (0.0,1.0).
- * @param numberOfBits The number of bits in the filter.
- * @param numberOfHashFunctions The number of hash functions in the filter.
- */
- public Shape(final HashFunctionIdentity hashFunctionIdentity, final double probability, final int numberOfBits,
- final int numberOfHashFunctions) {
- if (hashFunctionIdentity == null) {
- throw new IllegalArgumentException("Hash function name may not be null");
- }
- if (probability <= 0.0) {
- throw new IllegalArgumentException("Probability must be greater than 0.0");
- }
- if (probability >= 1.0) {
- throw new IllegalArgumentException("Probability must be less than 1.0");
- }
- if (numberOfBits < 8) {
- throw new IllegalArgumentException("Number of bits must be greater than or equal to 8");
- }
- if (numberOfHashFunctions < 1) {
- throw new IllegalArgumentException("Number of hash functions must be greater than or equal to 8");
- }
- this.hashFunctionIdentity = hashFunctionIdentity;
- this.numberOfBits = numberOfBits;
- this.numberOfHashFunctions = numberOfHashFunctions;
-
- // n = ceil(m / (-k / log(1 - exp(log(p) / k))))
- final double n = Math.ceil(numberOfBits /
- (-numberOfHashFunctions / Math.log(1 - Math.exp(Math.log(probability) / numberOfHashFunctions))));
-
- // log of probability is always < 0
- // number of hash functions is >= 1
- // e^x where x < 0 = [0,1)
- // log 1-e^x = [log1, log0) = <0 with an effective lower limit of -53
- // numberOfBits/ (-numberOfHashFunctions / [-53,0) ) >0
- // ceil( >0 ) >= 1
- // so we can not produce a negative value thus we don't check for it.
- //
- // similarly we can not produce a number greater than numberOfBits so we
- // do not have to check for Integer.MAX_VALUE either.
- this.numberOfItems = (int) n;
- this.hashCode = generateHashCode();
- // check that probability is within range
- getProbability();
- }
-
- private int generateHashCode() {
- return Objects.hash(hashFunctionIdentity, numberOfBits, numberOfHashFunctions);
- }
-
- @Override
- public String toString() {
- return String.format("Shape[ %s n=%s m=%s k=%s ]",
- HashFunctionIdentity.asCommonString(hashFunctionIdentity),
- numberOfItems, numberOfBits, numberOfHashFunctions);
- }
-
- /**
* Calculates the number of hash functions given numberOfItems and numberofBits.
* This is a method so that the calculation is consistent across all constructors.
*
@@ -273,37 +262,29 @@ public class Shape {
return (int) k;
}
- /**
- * Calculates the probability of false positives (AKA: {@code p} given
- * numberOfItems, numberofBits and numberOfHashFunctions. This is a method so that
- * the calculation is consistent across all constructors.
- *
- * @return the probability of collision.
- */
- public final double getProbability() {
- // (1 - exp(-kn/m))^k
- final double p = Math.pow(1.0 - Math.exp(-1.0 * numberOfHashFunctions * numberOfItems / numberOfBits),
- numberOfHashFunctions);
- /*
- * We do not need to check for p < = since we only allow positive values for
- * parameters and the closest we can come to exp(-kn/m) == 1 is
- * exp(-1/Integer.MAX_INT) approx 0.9999999995343387 so Math.pow( x, y ) will
- * always be 0<x<1 and y>0
- */
- if (p >= 1.0) {
- throw new IllegalArgumentException(
- String.format("Calculated probability (%s) is greater than or equal to 1.0", p));
+ @Override
+ public boolean equals(final Object o) {
+ if (o instanceof Shape) {
+ final Shape other = (Shape) o;
+ return
+ other.getNumberOfBits() == getNumberOfBits() &&
+ other.getNumberOfHashFunctions() == getNumberOfHashFunctions() &&
+ HashFunctionIdentity.COMMON_COMPARATOR.compare( getHashFunctionIdentity(),
+ other.getHashFunctionIdentity()) == 0;
}
- return p;
+ return false;
+ }
+
+ private int generateHashCode() {
+ return Objects.hash(hashFunctionIdentity, numberOfBits, numberOfHashFunctions);
}
/**
- * Gets the number of items that are expected in the filter. AKA: {@code n}
- *
- * @return the number of items.
+ * Gets the HashFunctionIdentity of the hash function this shape uses.
+ * @return the HashFunctionIdentity of the hash function this shape uses.
*/
- public int getNumberOfItems() {
- return numberOfItems;
+ public HashFunctionIdentity getHashFunctionIdentity() {
+ return hashFunctionIdentity;
}
/**
@@ -316,6 +297,15 @@ public class Shape {
}
/**
+ * Gets the number of bytes in the Bloom filter.
+ *
+ * @return the number of bytes in the Bloom filter.
+ */
+ public int getNumberOfBytes() {
+ return Double.valueOf(Math.ceil(numberOfBits / (double)Byte.SIZE )).intValue();
+ }
+
+ /**
* Gets the number of hash functions used to construct the filter. AKA: {@code k}
*
* @return the number of hash functions used to construct the filter.
@@ -325,25 +315,36 @@ public class Shape {
}
/**
- * Gets the number of bytes in the Bloom filter.
+ * Gets the number of items that are expected in the filter. AKA: {@code n}
*
- * @return the number of bytes in the Bloom filter.
+ * @return the number of items.
*/
- public int getNumberOfBytes() {
- return Double.valueOf(Math.ceil(numberOfBits / (double)Byte.SIZE )).intValue();
+ public int getNumberOfItems() {
+ return numberOfItems;
}
- @Override
- public boolean equals(final Object o) {
- if (o instanceof Shape) {
- final Shape other = (Shape) o;
- return
- other.getNumberOfBits() == getNumberOfBits() &&
- other.getNumberOfHashFunctions() == getNumberOfHashFunctions() &&
- HashFunctionIdentity.COMMON_COMPARATOR.compare( getHashFunctionIdentity(),
- other.getHashFunctionIdentity()) == 0;
+ /**
+ * Calculates the probability of false positives (AKA: {@code p} given
+ * numberOfItems, numberofBits and numberOfHashFunctions. This is a method so that
+ * the calculation is consistent across all constructors.
+ *
+ * @return the probability of collision.
+ */
+ public final double getProbability() {
+ // (1 - exp(-kn/m))^k
+ final double p = Math.pow(1.0 - Math.exp(-1.0 * numberOfHashFunctions * numberOfItems / numberOfBits),
+ numberOfHashFunctions);
+ /*
+ * We do not need to check for p < = since we only allow positive values for
+ * parameters and the closest we can come to exp(-kn/m) == 1 is
+ * exp(-1/Integer.MAX_INT) approx 0.9999999995343387 so Math.pow( x, y ) will
+ * always be 0<x<1 and y>0
+ */
+ if (p >= 1.0) {
+ throw new IllegalArgumentException(
+ String.format("Calculated probability (%s) is greater than or equal to 1.0", p));
}
- return false;
+ return p;
}
@Override
@@ -351,11 +352,10 @@ public class Shape {
return hashCode;
}
- /**
- * Gets the HashFunctionIdentity of the hash function this shape uses.
- * @return the HashFunctionIdentity of the hash function this shape uses.
- */
- public HashFunctionIdentity getHashFunctionIdentity() {
- return hashFunctionIdentity;
+ @Override
+ public String toString() {
+ return String.format("Shape[ %s n=%s m=%s k=%s ]",
+ HashFunctionIdentity.asCommonString(hashFunctionIdentity),
+ numberOfItems, numberOfBits, numberOfHashFunctions);
}
}
\ No newline at end of file
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/StaticHasher.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/StaticHasher.java
index 50bd3b8..f1e5306 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/StaticHasher.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/StaticHasher.java
@@ -40,21 +40,6 @@ public final class StaticHasher implements Hasher {
private final int[] values;
/**
- * Constructs the StaticHasher from a StaticHasher and a Shape.
- * @param hasher the StaticHasher to read.
- * @param shape the Shape for the resulting values.
- * @throws IllegalArgumentException if the shape of the hasher and the shape parameter are not the same.
- */
- public StaticHasher(final StaticHasher hasher, final Shape shape) {
- if (!hasher.shape.equals(shape)) {
- throw new IllegalArgumentException(String.format("Hasher shape (%s) is not the same as shape (%s)",
- hasher.getShape().toString(), shape.toString()));
- }
- this.shape = shape;
- this.values = hasher.values;
- }
-
- /**
* Constructs the StaticHasher from a Hasher and a Shape.
* @param hasher the Hasher to read.
* @param shape the Shape for the resulting values.
@@ -99,30 +84,18 @@ public final class StaticHasher implements Hasher {
}
/**
- * Gets the shape this static hasher was created with.
- *
- * @return the Shape of this hasher.
- */
- public Shape getShape() {
- return shape;
- }
-
- @Override
- public boolean isEmpty() {
- return values.length == 0;
- }
-
- @Override
- public HashFunctionIdentity getHashFunctionIdentity() {
- return shape.getHashFunctionIdentity();
- }
-
- /**
- * Gets the the number of unique values in this hasher.
- * @return the number of unique values.
+ * Constructs the StaticHasher from a StaticHasher and a Shape.
+ * @param hasher the StaticHasher to read.
+ * @param shape the Shape for the resulting values.
+ * @throws IllegalArgumentException if the shape of the hasher and the shape parameter are not the same.
*/
- public int size() {
- return values.length;
+ public StaticHasher(final StaticHasher hasher, final Shape shape) {
+ if (!hasher.shape.equals(shape)) {
+ throw new IllegalArgumentException(String.format("Hasher shape (%s) is not the same as shape (%s)",
+ hasher.getShape().toString(), shape.toString()));
+ }
+ this.shape = shape;
+ this.values = hasher.values;
}
/**
@@ -143,4 +116,31 @@ public final class StaticHasher implements Hasher {
}
return Arrays.stream( values ).iterator();
}
+
+ @Override
+ public HashFunctionIdentity getHashFunctionIdentity() {
+ return shape.getHashFunctionIdentity();
+ }
+
+ /**
+ * Gets the shape this static hasher was created with.
+ *
+ * @return the Shape of this hasher.
+ */
+ public Shape getShape() {
+ return shape;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return values.length == 0;
+ }
+
+ /**
+ * Gets the the number of unique values in this hasher.
+ * @return the number of unique values.
+ */
+ public int size() {
+ return values.length;
+ }
}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/MD5Cyclic.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/MD5Cyclic.java
index 5a390a4..3e07521 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/MD5Cyclic.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/MD5Cyclic.java
@@ -33,6 +33,11 @@ import org.apache.commons.collections4.bloomfilter.hasher.HashFunctionIdentity;
public final class MD5Cyclic implements HashFunction {
/**
+ * The name of this hash function.
+ */
+ public static final String NAME = "MD5";
+
+ /**
* The MD5 digest implementation.
*/
private final MessageDigest messageDigest;
@@ -48,11 +53,6 @@ public final class MD5Cyclic implements HashFunction {
private final long[] result = new long[2];
/**
- * The name of this hash function.
- */
- public static final String NAME = "MD5";
-
- /**
* Constructs the MD5 hashing function.
*/
public MD5Cyclic() {
@@ -90,23 +90,23 @@ public final class MD5Cyclic implements HashFunction {
}
@Override
- public String getProvider() {
- return "Apache Commons Collections";
+ public ProcessType getProcessType() {
+ return ProcessType.CYCLIC;
}
@Override
- public Signedness getSignedness() {
- return Signedness.SIGNED;
+ public String getProvider() {
+ return "Apache Commons Collections";
}
@Override
- public ProcessType getProcessType() {
- return ProcessType.CYCLIC;
+ public long getSignature() {
+ return signature;
}
@Override
- public long getSignature() {
- return signature;
+ public Signedness getSignedness() {
+ return Signedness.SIGNED;
}
}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/Murmur128x86Cyclic.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/Murmur128x86Cyclic.java
index d922afa..9c9ed74 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/Murmur128x86Cyclic.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/Murmur128x86Cyclic.java
@@ -31,6 +31,11 @@ import org.apache.commons.collections4.bloomfilter.hasher.HashFunctionIdentity;
*/
public final class Murmur128x86Cyclic implements HashFunction {
/**
+ * The name of this hash method.
+ */
+ public static final String NAME = "Murmur3_x64_128";
+
+ /**
* The result of the hash 0 call.
*/
private long[] parts = null;
@@ -41,11 +46,6 @@ public final class Murmur128x86Cyclic implements HashFunction {
private final long signature;
/**
- * The name of this hash method.
- */
- public static final String NAME = "Murmur3_x64_128";
-
- /**
* Constructs a Murmur3 x64 128 hash.
*/
public Murmur128x86Cyclic() {
@@ -69,23 +69,23 @@ public final class Murmur128x86Cyclic implements HashFunction {
}
@Override
- public String getProvider() {
- return "Apache Commons Collections";
+ public ProcessType getProcessType() {
+ return ProcessType.CYCLIC;
}
@Override
- public Signedness getSignedness() {
- return Signedness.SIGNED;
+ public String getProvider() {
+ return "Apache Commons Collections";
}
@Override
- public ProcessType getProcessType() {
- return ProcessType.CYCLIC;
+ public long getSignature() {
+ return signature;
}
@Override
- public long getSignature() {
- return signature;
+ public Signedness getSignedness() {
+ return Signedness.SIGNED;
}
}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/Murmur32x86Iterative.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/Murmur32x86Iterative.java
index 868d4ea..c9c2120 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/Murmur32x86Iterative.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/Murmur32x86Iterative.java
@@ -32,14 +32,14 @@ import org.apache.commons.collections4.bloomfilter.hasher.HashFunctionIdentity;
public final class Murmur32x86Iterative implements HashFunction {
/**
- * The signature for this hash function.
+ * The name of this hash function.
*/
- private final long signature;
+ public static final String NAME = "Murmur3_x86_32";
/**
- * The name of this hash function.
+ * The signature for this hash function.
*/
- public static final String NAME = "Murmur3_x86_32";
+ private final long signature;
/**
* Constructs a Murmur3 x86 32 hash
@@ -58,22 +58,22 @@ public final class Murmur32x86Iterative implements HashFunction {
return NAME;
}
@Override
- public String getProvider() {
- return "Apache Commons Collections";
+ public ProcessType getProcessType() {
+ return ProcessType.ITERATIVE;
}
@Override
- public Signedness getSignedness() {
- return Signedness.SIGNED;
+ public String getProvider() {
+ return "Apache Commons Collections";
}
@Override
- public ProcessType getProcessType() {
- return ProcessType.ITERATIVE;
+ public long getSignature() {
+ return signature;
}
@Override
- public long getSignature() {
- return signature;
+ public Signedness getSignedness() {
+ return Signedness.SIGNED;
}
}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/ObjectsHashIterative.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/ObjectsHashIterative.java
index 788c32a..47ea409 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/ObjectsHashIterative.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/ObjectsHashIterative.java
@@ -72,22 +72,22 @@ public final class ObjectsHashIterative implements HashFunction {
}
@Override
- public String getProvider() {
- return "Apache Commons Collections";
+ public ProcessType getProcessType() {
+ return ProcessType.ITERATIVE;
}
@Override
- public Signedness getSignedness() {
- return Signedness.SIGNED;
+ public String getProvider() {
+ return "Apache Commons Collections";
}
@Override
- public ProcessType getProcessType() {
- return ProcessType.ITERATIVE;
+ public long getSignature() {
+ return signature;
}
@Override
- public long getSignature() {
- return signature;
+ public Signedness getSignedness() {
+ return Signedness.SIGNED;
}
}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterTest.java
index e66c3bd..9608373 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterTest.java
@@ -39,15 +39,6 @@ import org.junit.Test;
public abstract class AbstractBloomFilterTest {
/**
- * Create the BloomFilter implementation we are testing.
- *
- * @param hasher the hasher to use to create the filter..
- * @param shape the shape of the filter.
- * @return a BloomFilter implementation.
- */
- protected abstract AbstractBloomFilter createFilter(Hasher hasher, Shape shape);
-
- /**
* A HashFunctionIdentity for testing.
*/
protected HashFunctionIdentity testFunction = new HashFunctionIdentity() {
@@ -58,23 +49,23 @@ public abstract class AbstractBloomFilterTest {
}
@Override
- public String getProvider() {
- return "Apache Commons Collection Tests";
+ public ProcessType getProcessType() {
+ return ProcessType.CYCLIC;
}
@Override
- public Signedness getSignedness() {
- return Signedness.SIGNED;
+ public String getProvider() {
+ return "Apache Commons Collection Tests";
}
@Override
- public ProcessType getProcessType() {
- return ProcessType.CYCLIC;
+ public long getSignature() {
+ return 0;
}
@Override
- public long getSignature() {
- return 0;
+ public Signedness getSignedness() {
+ return Signedness.SIGNED;
}
};
@@ -89,51 +80,96 @@ public abstract class AbstractBloomFilterTest {
}
@Override
- public String getProvider() {
- return "Apache Commons Collection Tests";
+ public ProcessType getProcessType() {
+ return ProcessType.CYCLIC;
}
@Override
- public Signedness getSignedness() {
- return Signedness.SIGNED;
+ public String getProvider() {
+ return "Apache Commons Collection Tests";
}
@Override
- public ProcessType getProcessType() {
- return ProcessType.CYCLIC;
+ public long getSignature() {
+ return 1;
}
@Override
- public long getSignature() {
- return 1;
+ public Signedness getSignedness() {
+ return Signedness.SIGNED;
}
};
/**
- * Create an empty version of the BloomFilter implementation we are testing.
- *
- * @param shape the shape of the filter.
- * @return a BloomFilter implementation.
+ * The shape of the Bloom filters for testing
*/
- protected abstract AbstractBloomFilter createEmptyFilter(Shape shape);
+ protected Shape shape = new Shape(testFunction, 3, 72, 17);
/**
- * The shape of the Bloom filters for testing
+ * Tests that the andCardinality calculations are correct.
*/
- protected Shape shape = new Shape(testFunction, 3, 72, 17);
+ @Test
+ public final void andCardinalityTest() {
+ final List<Integer> lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
+ final Hasher hasher = new StaticHasher(lst.iterator(), shape);
+
+ final BloomFilter bf = createFilter(hasher, shape);
+
+ final List<Integer> lst2 = Arrays.asList(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27);
+ final Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
+
+ final BloomFilter bf2 = createFilter(hasher2, shape);
+
+ assertEquals(7, bf.andCardinality(bf2));
+ }
/**
- * Tests that creating a filter with a hasher works as expected.
+ * Tests that the andCardinality calculations are correct when there are more than Long.LENGTH bits.
*/
@Test
- public final void constructorTest_Hasher() {
- final List<Integer> lst = Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ public final void andCardinalityTest_ExtraLongs() {
+ final List<Integer> lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
final Hasher hasher = new StaticHasher(lst.iterator(), shape);
final BloomFilter bf = createFilter(hasher, shape);
- final long[] lb = bf.getBits();
- assertEquals(0x1FFFF, lb[0]);
- assertEquals(1, lb.length);
+
+ final List<Integer> lst2 = Arrays.asList(11, 12, 13, 14, 15, 16, 17, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69);
+ final Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
+
+ final BloomFilter bf2 = createFilter(hasher2, shape);
+
+ assertEquals(7, bf.andCardinality(bf2));
+ assertEquals(7, bf2.andCardinality(bf));
+ }
+
+ /**
+ * Compare 2 static hashers to verify they have the same bits enabled.
+ *
+ * @param hasher1 the first static hasher.
+ * @param hasher2 the second static hasher.
+ */
+ private void assertSameBits(final StaticHasher hasher1, final StaticHasher hasher2) {
+ final OfInt iter1 = hasher1.getBits(shape);
+ final OfInt iter2 = hasher2.getBits(shape);
+
+ while (iter1.hasNext()) {
+ assertTrue("Not enough data in second hasher", iter2.hasNext());
+ assertEquals(iter1.nextInt(), iter2.nextInt());
+ }
+ assertFalse("Too much data in second hasher", iter2.hasNext());
+ }
+
+ /**
+ * Tests that cardinality is correct.
+ */
+ @Test
+ public final void cardinalityTest() {
+
+ final List<Integer> lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
+ final Hasher hasher = new StaticHasher(lst.iterator(), shape);
+
+ final BloomFilter bf = createFilter(hasher, shape);
+ assertEquals(17, bf.cardinality());
}
/**
@@ -148,6 +184,20 @@ public abstract class AbstractBloomFilterTest {
}
/**
+ * Tests that creating a filter with a hasher works as expected.
+ */
+ @Test
+ public final void constructorTest_Hasher() {
+ final List<Integer> lst = Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ final Hasher hasher = new StaticHasher(lst.iterator(), shape);
+
+ final BloomFilter bf = createFilter(hasher, shape);
+ final long[] lb = bf.getBits();
+ assertEquals(0x1FFFF, lb[0]);
+ assertEquals(1, lb.length);
+ }
+
+ /**
* Tests that creating a Bloom filter with a Static hasher that has one shape and a
* different specified shape fails.
*/
@@ -166,125 +216,156 @@ public abstract class AbstractBloomFilterTest {
}
/**
- * Tests that cardinality is correct.
+ * Tests that contains() with a Bloom filter argument returns the proper results.
*/
@Test
- public final void cardinalityTest() {
-
- final List<Integer> lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
+ public final void containsTest_BloomFilter() {
+ final List<Integer> lst = Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
final Hasher hasher = new StaticHasher(lst.iterator(), shape);
-
final BloomFilter bf = createFilter(hasher, shape);
- assertEquals(17, bf.cardinality());
+
+ final List<Integer> lst2 = Arrays.asList(4, 5, 6, 7, 8, 9, 10);
+ final Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
+ final BloomFilter bf2 = createFilter(hasher2, shape);
+ assertTrue(bf.contains(bf2));
+ assertFalse(bf2.contains(bf));
}
/**
- * Tests that the orCardinality calculations are correct.
+ * Tests that contains() fails properly if the other Bloom filter is not of the proper shape.
*/
@Test
- public final void orCardinalityTest() {
- final List<Integer> lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
+ public final void containsTest_BloomFilter_WrongShape() {
+ final List<Integer> lst = Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
final Hasher hasher = new StaticHasher(lst.iterator(), shape);
+ final BloomFilter bf = createFilter(hasher, shape);
- final AbstractBloomFilter bf = createFilter(hasher, shape);
-
- final List<Integer> lst2 = Arrays.asList(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27);
- final Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
-
- final BloomFilter bf2 = createFilter(hasher2, shape);
-
- assertEquals(27, bf.orCardinality(bf2));
+ final Shape anotherShape = new Shape(testFunctionX, 3, 72, 17);
+ final Hasher hasher2 = new StaticHasher(lst.iterator(), anotherShape);
+ final BloomFilter bf2 = createFilter(hasher2, anotherShape);
+ try {
+ bf.contains(bf2);
+ fail("Should throw IllegalArgumentException");
+ } catch (final IllegalArgumentException expected) {
+ // do nothing.
+ }
}
/**
- * Tests that the orCardinality calculations are correct when there are more than Long.LENGTH bits.
+ * Tests that contains() with a Hasher argument returns the proper results.
*/
@Test
- public final void orCardinalityTest_ExtraLongs() {
- final List<Integer> lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
+ public final void containsTest_Hasher() {
+ final List<Integer> lst = Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
final Hasher hasher = new StaticHasher(lst.iterator(), shape);
+ final BloomFilter bf = createFilter(hasher, shape);
- final AbstractBloomFilter bf = createFilter(hasher, shape);
-
- final List<Integer> lst2 = Arrays.asList(11, 12, 13, 14, 15, 16, 17, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69);
- final Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
+ List<Integer> lst2 = Arrays.asList(4, 5, 6, 7, 8, 9, 10);
+ Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
+ assertTrue(bf.contains(hasher2));
- final AbstractBloomFilter bf2 = createFilter(hasher2, shape);
+ lst2 = Arrays.asList(17, 18, 19, 20);
+ hasher2 = new StaticHasher(lst2.iterator(), shape);
+ assertFalse(bf.contains(hasher2));
- assertEquals(27, bf.orCardinality(bf2));
- assertEquals(27, bf2.orCardinality(bf));
+ lst2 = Arrays.asList(10, 11, 12, 17, 18, 19, 20);
+ hasher2 = new StaticHasher(lst2.iterator(), shape);
+ assertFalse(bf.contains(hasher2));
}
/**
- * Tests that the andCardinality calculations are correct.
+ * Tests that contains() fails properly if the hasher is not of the proper shape.
*/
@Test
- public final void andCardinalityTest() {
- final List<Integer> lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
+ public final void containsTest_Hasher_WrongShape() {
+ final List<Integer> lst = Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
final Hasher hasher = new StaticHasher(lst.iterator(), shape);
-
final BloomFilter bf = createFilter(hasher, shape);
- final List<Integer> lst2 = Arrays.asList(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27);
- final Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
-
- final BloomFilter bf2 = createFilter(hasher2, shape);
+ final Shape anotherShape = new Shape(testFunctionX, 3, 72, 17);
- assertEquals(7, bf.andCardinality(bf2));
+ final List<Integer> lst2 = Arrays.asList(4, 5, 6, 7, 8, 9, 10);
+ final Hasher hasher2 = new StaticHasher(lst2.iterator(), anotherShape);
+ try {
+ bf.contains(hasher2);
+ fail("Should have thrown IllegalArgumentException");
+ } catch (final IllegalArgumentException expected) {
+ // do nothing
+ }
}
/**
- * Tests that the andCardinality calculations are correct when there are more than Long.LENGTH bits.
+ * Create an empty version of the BloomFilter implementation we are testing.
+ *
+ * @param shape the shape of the filter.
+ * @return a BloomFilter implementation.
*/
- @Test
- public final void andCardinalityTest_ExtraLongs() {
- final List<Integer> lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
-
- final BloomFilter bf = createFilter(hasher, shape);
-
- final List<Integer> lst2 = Arrays.asList(11, 12, 13, 14, 15, 16, 17, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69);
- final Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
+ protected abstract AbstractBloomFilter createEmptyFilter(Shape shape);
- final BloomFilter bf2 = createFilter(hasher2, shape);
+ /**
+ * Create the BloomFilter implementation we are testing.
+ *
+ * @param hasher the hasher to use to create the filter..
+ * @param shape the shape of the filter.
+ * @return a BloomFilter implementation.
+ */
+ protected abstract AbstractBloomFilter createFilter(Hasher hasher, Shape shape);
- assertEquals(7, bf.andCardinality(bf2));
- assertEquals(7, bf2.andCardinality(bf));
+ /**
+ * Tests that getBits() works correctly when multiple long values are returned.
+ */
+ @Test
+ public final void getBitsTest_SpanLong() {
+ final List<Integer> lst = Arrays.asList(63, 64);
+ final StaticHasher hasher = new StaticHasher(lst.iterator(), shape);
+ final BloomFilter bf = createFilter(hasher, shape);
+ final long[] lb = bf.getBits();
+ assertEquals(2, lb.length);
+ assertEquals(0x8000000000000000L, lb[0]);
+ assertEquals(0x1, lb[1]);
}
/**
- * Tests that the zorCardinality calculations are correct.
+ * Tests that the the hasher returned from getHasher() works correctly.
*/
@Test
- public final void xorCardinalityTest() {
- final List<Integer> lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
-
+ public final void getHasherTest() {
+ final List<Integer> lst = Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ final StaticHasher hasher = new StaticHasher(lst.iterator(), shape);
final BloomFilter bf = createFilter(hasher, shape);
- final List<Integer> lst2 = Arrays.asList(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27);
- final Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
- final BloomFilter bf2 = createFilter(hasher2, shape);
+ final StaticHasher hasher2 = bf.getHasher();
- assertEquals(20, bf.xorCardinality(bf2));
+ assertEquals(shape, hasher2.getShape());
+ assertSameBits(hasher, hasher2);
}
/**
- * Tests that the xorCardinality calculations are correct when there are more than Long.LENGTH bits.
+ * Tests that isFull() returns the proper values.
*/
@Test
- public final void xorCardinalityTest_ExtraLongs() {
- final List<Integer> lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
+ public final void isFullTest() {
- final BloomFilter bf = createFilter(hasher, shape);
+ // create empty filter
+ AbstractBloomFilter filter = createEmptyFilter(shape);
+ assertFalse(filter.isFull());
- final List<Integer> lst2 = Arrays.asList(11, 12, 13, 14, 15, 16, 17, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69);
- final Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
- final BloomFilter bf2 = createFilter(hasher2, shape);
+ final List<Integer> values = new ArrayList<>(shape.getNumberOfBits());
+ for (int i = 0; i < shape.getNumberOfBits(); i++) {
+ values.add(i);
+ }
+
+ StaticHasher hasher2 = new StaticHasher(values.iterator(), shape);
+ filter = createFilter(hasher2, shape);
+
+ assertTrue(filter.isFull());
+
+ final int mid = shape.getNumberOfBits() / 2;
+ values.remove(Integer.valueOf(mid));
+ hasher2 = new StaticHasher(values.iterator(), shape);
+ filter = createFilter(hasher2, shape);
+ assertFalse(filter.isFull());
- assertEquals(20, bf.xorCardinality(bf2));
- assertEquals(20, bf2.xorCardinality(bf));
}
/**
@@ -368,156 +449,75 @@ public abstract class AbstractBloomFilterTest {
}
/**
- * Tests that isFull() returns the proper values.
+ * Tests that the orCardinality calculations are correct.
*/
@Test
- public final void isFullTest() {
-
- // create empty filter
- AbstractBloomFilter filter = createEmptyFilter(shape);
- assertFalse(filter.isFull());
-
- final List<Integer> values = new ArrayList<>(shape.getNumberOfBits());
- for (int i = 0; i < shape.getNumberOfBits(); i++) {
- values.add(i);
- }
+ public final void orCardinalityTest() {
+ final List<Integer> lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
+ final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- StaticHasher hasher2 = new StaticHasher(values.iterator(), shape);
- filter = createFilter(hasher2, shape);
+ final AbstractBloomFilter bf = createFilter(hasher, shape);
- assertTrue(filter.isFull());
+ final List<Integer> lst2 = Arrays.asList(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27);
+ final Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
- final int mid = shape.getNumberOfBits() / 2;
- values.remove(Integer.valueOf(mid));
- hasher2 = new StaticHasher(values.iterator(), shape);
- filter = createFilter(hasher2, shape);
- assertFalse(filter.isFull());
+ final BloomFilter bf2 = createFilter(hasher2, shape);
+ assertEquals(27, bf.orCardinality(bf2));
}
/**
- * Tests that contains() fails properly if the other Bloom filter is not of the proper shape.
+ * Tests that the orCardinality calculations are correct when there are more than Long.LENGTH bits.
*/
@Test
- public final void containsTest_BloomFilter_WrongShape() {
- final List<Integer> lst = Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ public final void orCardinalityTest_ExtraLongs() {
+ final List<Integer> lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter bf = createFilter(hasher, shape);
-
- final Shape anotherShape = new Shape(testFunctionX, 3, 72, 17);
- final Hasher hasher2 = new StaticHasher(lst.iterator(), anotherShape);
- final BloomFilter bf2 = createFilter(hasher2, anotherShape);
- try {
- bf.contains(bf2);
- fail("Should throw IllegalArgumentException");
- } catch (final IllegalArgumentException expected) {
- // do nothing.
- }
- }
- /**
- * Tests that contains() with a Bloom filter argument returns the proper results.
- */
- @Test
- public final void containsTest_BloomFilter() {
- final List<Integer> lst = Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter bf = createFilter(hasher, shape);
+ final AbstractBloomFilter bf = createFilter(hasher, shape);
- final List<Integer> lst2 = Arrays.asList(4, 5, 6, 7, 8, 9, 10);
+ final List<Integer> lst2 = Arrays.asList(11, 12, 13, 14, 15, 16, 17, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69);
final Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
- final BloomFilter bf2 = createFilter(hasher2, shape);
- assertTrue(bf.contains(bf2));
- assertFalse(bf2.contains(bf));
- }
-
- /**
- * Tests that contains() with a Hasher argument returns the proper results.
- */
- @Test
- public final void containsTest_Hasher() {
- final List<Integer> lst = Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter bf = createFilter(hasher, shape);
- List<Integer> lst2 = Arrays.asList(4, 5, 6, 7, 8, 9, 10);
- Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
- assertTrue(bf.contains(hasher2));
-
- lst2 = Arrays.asList(17, 18, 19, 20);
- hasher2 = new StaticHasher(lst2.iterator(), shape);
- assertFalse(bf.contains(hasher2));
+ final AbstractBloomFilter bf2 = createFilter(hasher2, shape);
- lst2 = Arrays.asList(10, 11, 12, 17, 18, 19, 20);
- hasher2 = new StaticHasher(lst2.iterator(), shape);
- assertFalse(bf.contains(hasher2));
+ assertEquals(27, bf.orCardinality(bf2));
+ assertEquals(27, bf2.orCardinality(bf));
}
/**
- * Tests that contains() fails properly if the hasher is not of the proper shape.
+ * Tests that the zorCardinality calculations are correct.
*/
@Test
- public final void containsTest_Hasher_WrongShape() {
- final List<Integer> lst = Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ public final void xorCardinalityTest() {
+ final List<Integer> lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter bf = createFilter(hasher, shape);
-
- final Shape anotherShape = new Shape(testFunctionX, 3, 72, 17);
- final List<Integer> lst2 = Arrays.asList(4, 5, 6, 7, 8, 9, 10);
- final Hasher hasher2 = new StaticHasher(lst2.iterator(), anotherShape);
- try {
- bf.contains(hasher2);
- fail("Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected) {
- // do nothing
- }
- }
+ final BloomFilter bf = createFilter(hasher, shape);
- /**
- * Compare 2 static hashers to verify they have the same bits enabled.
- *
- * @param hasher1 the first static hasher.
- * @param hasher2 the second static hasher.
- */
- private void assertSameBits(final StaticHasher hasher1, final StaticHasher hasher2) {
- final OfInt iter1 = hasher1.getBits(shape);
- final OfInt iter2 = hasher2.getBits(shape);
+ final List<Integer> lst2 = Arrays.asList(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27);
+ final Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
+ final BloomFilter bf2 = createFilter(hasher2, shape);
- while (iter1.hasNext()) {
- assertTrue("Not enough data in second hasher", iter2.hasNext());
- assertEquals(iter1.nextInt(), iter2.nextInt());
- }
- assertFalse("Too much data in second hasher", iter2.hasNext());
+ assertEquals(20, bf.xorCardinality(bf2));
}
/**
- * Tests that the the hasher returned from getHasher() works correctly.
+ * Tests that the xorCardinality calculations are correct when there are more than Long.LENGTH bits.
*/
@Test
- public final void getHasherTest() {
- final List<Integer> lst = Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
- final StaticHasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter bf = createFilter(hasher, shape);
+ public final void xorCardinalityTest_ExtraLongs() {
+ final List<Integer> lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
+ final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final StaticHasher hasher2 = bf.getHasher();
+ final BloomFilter bf = createFilter(hasher, shape);
- assertEquals(shape, hasher2.getShape());
- assertSameBits(hasher, hasher2);
- }
+ final List<Integer> lst2 = Arrays.asList(11, 12, 13, 14, 15, 16, 17, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69);
+ final Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
+ final BloomFilter bf2 = createFilter(hasher2, shape);
- /**
- * Tests that getBits() works correctly when multiple long values are returned.
- */
- @Test
- public final void getBitsTest_SpanLong() {
- final List<Integer> lst = Arrays.asList(63, 64);
- final StaticHasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter bf = createFilter(hasher, shape);
- final long[] lb = bf.getBits();
- assertEquals(2, lb.length);
- assertEquals(0x8000000000000000L, lb[0]);
- assertEquals(0x1, lb[1]);
+ assertEquals(20, bf.xorCardinality(bf2));
+ assertEquals(20, bf2.xorCardinality(bf));
}
}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BitSetBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BitSetBloomFilterTest.java
index 8939d88..3c3230f 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/BitSetBloomFilterTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/BitSetBloomFilterTest.java
@@ -32,16 +32,6 @@ import org.junit.Test;
*/
public class BitSetBloomFilterTest extends AbstractBloomFilterTest {
- @Override
- protected BitSetBloomFilter createFilter(final Hasher hasher, final Shape shape) {
- return new BitSetBloomFilter( hasher, shape );
- }
-
- @Override
- protected BitSetBloomFilter createEmptyFilter(final Shape shape) {
- return new BitSetBloomFilter( shape );
- }
-
/**
* Test that andCardinality works for BitSetBloomFilter arguments.
*/
@@ -71,6 +61,38 @@ public class BitSetBloomFilterTest extends AbstractBloomFilterTest {
}
+ @Override
+ protected BitSetBloomFilter createEmptyFilter(final Shape shape) {
+ return new BitSetBloomFilter( shape );
+ }
+
+ @Override
+ protected BitSetBloomFilter createFilter(final Hasher hasher, final Shape shape) {
+ return new BitSetBloomFilter( hasher, shape );
+ }
+
+ /**
+ * Test that merge() works for BitSetBloomFilter arguments.
+ */
+ @Test
+ public void mergeTest_BitSetBloomFilter() {
+
+ final List<Integer> lst = Arrays.asList( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ,16 ,17 );
+ final Hasher hasher = new StaticHasher( lst.iterator(), shape );
+
+ final BitSetBloomFilter bf = createFilter(hasher, shape);
+
+ final List<Integer> lst2 = Arrays.asList( 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ,26 ,27 );
+ final Hasher hasher2 = new StaticHasher( lst2.iterator(), shape );
+ final BloomFilter bf2 = new BitSetBloomFilter(hasher2, shape);
+
+ bf.merge(bf2);
+
+ assertEquals(27, bf.cardinality());
+
+
+ }
+
/**
* Test that xorCardinality works for BitSetBloomFilter arguments.
*/
@@ -100,27 +122,5 @@ public class BitSetBloomFilterTest extends AbstractBloomFilterTest {
}
- /**
- * Test that merge() works for BitSetBloomFilter arguments.
- */
- @Test
- public void mergeTest_BitSetBloomFilter() {
-
- final List<Integer> lst = Arrays.asList( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ,16 ,17 );
- final Hasher hasher = new StaticHasher( lst.iterator(), shape );
-
- final BitSetBloomFilter bf = createFilter(hasher, shape);
-
- final List<Integer> lst2 = Arrays.asList( 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ,26 ,27 );
- final Hasher hasher2 = new StaticHasher( lst2.iterator(), shape );
- final BloomFilter bf2 = new BitSetBloomFilter(hasher2, shape);
-
- bf.merge(bf2);
-
- assertEquals(27, bf.cardinality());
-
-
- }
-
}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilterTest.java
index ec66404..853b383 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilterTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilterTest.java
@@ -38,14 +38,34 @@ import org.junit.Test;
public class CountingBloomFilterTest extends AbstractBloomFilterTest {
- @Override
- protected CountingBloomFilter createFilter(final Hasher hasher, final Shape shape) {
- return new CountingBloomFilter( hasher, shape );
- }
+ /**
+ * Tests that the andCardinality calculation executes correctly when using a
+ * CountingBloomFilter argument.
+ */
+ @Test
+ public void andCardinalityTest_CountingBloomFilter() {
+ final Hasher hasher = new StaticHasher( Arrays.asList( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ).iterator(), shape );
+
+ final CountingBloomFilter bf = createFilter(hasher, shape);
+
+ Hasher hasher2 = new StaticHasher( Arrays.asList( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ).iterator(), shape );
+ CountingBloomFilter bf2 = createFilter(hasher2, shape);
+
+ assertEquals( 10, bf.andCardinality(bf2));
+ assertEquals( 10, bf2.andCardinality(bf));
+
+ hasher2 = new StaticHasher( Arrays.asList( 1, 2, 3, 4, 5 ).iterator(), shape );
+ bf2 = createFilter(hasher2, shape);
+
+ assertEquals( 5, bf.andCardinality(bf2));
+ assertEquals( 5, bf2.andCardinality(bf));
+
+ hasher2 = new StaticHasher( Arrays.asList( 11, 12, 13, 14, 15 ).iterator(), shape );
+ bf2 = createFilter(hasher2, shape);
+ assertEquals( 0, bf.andCardinality(bf2));
+ assertEquals( 0, bf2.andCardinality(bf));
+
- @Override
- protected CountingBloomFilter createEmptyFilter(final Shape shape) {
- return new CountingBloomFilter( shape );
}
/**
@@ -112,6 +132,17 @@ public class CountingBloomFilterTest extends AbstractBloomFilterTest {
}
+ @Override
+ protected CountingBloomFilter createEmptyFilter(final Shape shape) {
+ return new CountingBloomFilter( shape );
+ }
+
+ @Override
+ protected CountingBloomFilter createFilter(final Hasher hasher, final Shape shape) {
+ return new CountingBloomFilter( hasher, shape );
+ }
+
+
/**
* Tests that merge correctly updates the counts when a CountingBloomFilter is passed
*/
@@ -151,7 +182,6 @@ public class CountingBloomFilterTest extends AbstractBloomFilterTest {
}
}
-
/**
* Test that merge correctly updates the counts when a BitSetBloomFilter is passed
*/
@@ -193,6 +223,41 @@ public class CountingBloomFilterTest extends AbstractBloomFilterTest {
}
/**
+ * Test that merge correctly updates the counts when a CountingBloomFilter is passed and an integer overflow occurs.
+ */
+ @Test
+ public void mergeTest_Overflow() {
+
+ final List<Integer> lst = Arrays.asList( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ,16 ,17 );
+ final Hasher hasher = new StaticHasher( lst.iterator(), shape );
+
+ CountingBloomFilter bf = createFilter(hasher, shape);
+
+
+ final Map<Integer,Integer> map = new HashMap<>();
+ bf.getCounts().forEach( e -> map.put( e.getKey(), e.getValue()));
+ map.put(1, Integer.MAX_VALUE );
+
+ CountingBloomFilter bf2 = new CountingBloomFilter(map, shape);
+
+ // should not fail
+ bf.merge(bf2);
+
+ // try max int on other side of merge.
+ bf2 = createFilter(hasher, shape);
+ bf = new CountingBloomFilter(map, shape);
+
+ try {
+ bf.merge(bf2);
+ fail( "Should have thrown IllegalStateException");
+ }
+ catch (final IllegalStateException expected)
+ {
+ // do nothing
+ }
+ }
+
+ /**
* Test that merge correctly updates the counts when a Hasher is passed
*/
@Test
@@ -232,45 +297,45 @@ public class CountingBloomFilterTest extends AbstractBloomFilterTest {
}
/**
- * Test that merge correctly updates the counts when a CountingBloomFilter is passed and an integer overflow occurs.
+ * Tests that when removing a counting Bloom filter the counts are correctly updated.
*/
@Test
- public void mergeTest_Overflow() {
-
- final List<Integer> lst = Arrays.asList( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ,16 ,17 );
- final Hasher hasher = new StaticHasher( lst.iterator(), shape );
-
- CountingBloomFilter bf = createFilter(hasher, shape);
-
-
+ public void removeTest_Counting() {
+ final int[] values = {
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 2, 2, 2, 2, 2, 2, 2, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1
+ };
final Map<Integer,Integer> map = new HashMap<>();
- bf.getCounts().forEach( e -> map.put( e.getKey(), e.getValue()));
- map.put(1, Integer.MAX_VALUE );
+ for (int i=1;i<values.length;i++)
+ {
+ map.put( i, values[i] );
+ }
- CountingBloomFilter bf2 = new CountingBloomFilter(map, shape);
+ final CountingBloomFilter bf = new CountingBloomFilter( map, shape );
- // should not fail
- bf.merge(bf2);
+ final List<Integer> lst = Arrays.asList( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ,16 ,17 );
+ final Hasher hasher = new StaticHasher( lst.iterator(), shape );
+ final BloomFilter bf2 = new CountingBloomFilter( hasher, shape );
- // try max int on other side of merge.
- bf2 = createFilter(hasher, shape);
- bf = new CountingBloomFilter(map, shape);
+ bf.remove( bf2 );
+ assertEquals( 17, bf.cardinality() );
+ final Map<Integer,Integer> map2 = new HashMap<>();
+ bf.getCounts().forEach( e -> map2.put( e.getKey(), e.getValue()));
- try {
- bf.merge(bf2);
- fail( "Should have thrown IllegalStateException");
- }
- catch (final IllegalStateException expected)
+ for (int i = 11; i<values.length; i++ )
{
- // do nothing
+ assertNotNull( map2.get(i) );
+ assertEquals( 1, map2.get(i).intValue());
}
+
}
/**
- * Tests that when removing a standard Bloom filter the counts are correctly updated.
+ * Tests that removing a hasher update the counts properly.
*/
@Test
- public void removeTest_Standard() {
+ public void removeTest_Hasher() {
final int[] values = {
0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 1, 1,
@@ -286,9 +351,9 @@ public class CountingBloomFilterTest extends AbstractBloomFilterTest {
final List<Integer> lst = Arrays.asList( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ,16 ,17 );
final Hasher hasher = new StaticHasher( lst.iterator(), shape );
- final BitSetBloomFilter bf2 = new BitSetBloomFilter( hasher, shape );
- bf.remove( bf2 );
+
+ bf.remove( hasher );
assertEquals( 17, bf.cardinality() );
final Map<Integer,Integer> map2 = new HashMap<>();
bf.getCounts().forEach( e -> map2.put( e.getKey(), e.getValue()));
@@ -302,10 +367,10 @@ public class CountingBloomFilterTest extends AbstractBloomFilterTest {
}
/**
- * Tests that when removing a counting Bloom filter the counts are correctly updated.
+ * Tests that when removing a standard Bloom filter the counts are correctly updated.
*/
@Test
- public void removeTest_Counting() {
+ public void removeTest_Standard() {
final int[] values = {
0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 1, 1,
@@ -321,7 +386,7 @@ public class CountingBloomFilterTest extends AbstractBloomFilterTest {
final List<Integer> lst = Arrays.asList( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ,16 ,17 );
final Hasher hasher = new StaticHasher( lst.iterator(), shape );
- final BloomFilter bf2 = new CountingBloomFilter( hasher, shape );
+ final BitSetBloomFilter bf2 = new BitSetBloomFilter( hasher, shape );
bf.remove( bf2 );
assertEquals( 17, bf.cardinality() );
@@ -371,69 +436,4 @@ public class CountingBloomFilterTest extends AbstractBloomFilterTest {
}
}
- /**
- * Tests that removing a hasher update the counts properly.
- */
- @Test
- public void removeTest_Hasher() {
- final int[] values = {
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 2, 2, 2, 2, 2, 2, 2, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1
- };
- final Map<Integer,Integer> map = new HashMap<>();
- for (int i=1;i<values.length;i++)
- {
- map.put( i, values[i] );
- }
-
- final CountingBloomFilter bf = new CountingBloomFilter( map, shape );
-
- final List<Integer> lst = Arrays.asList( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ,16 ,17 );
- final Hasher hasher = new StaticHasher( lst.iterator(), shape );
-
-
- bf.remove( hasher );
- assertEquals( 17, bf.cardinality() );
- final Map<Integer,Integer> map2 = new HashMap<>();
- bf.getCounts().forEach( e -> map2.put( e.getKey(), e.getValue()));
-
- for (int i = 11; i<values.length; i++ )
- {
- assertNotNull( map2.get(i) );
- assertEquals( 1, map2.get(i).intValue());
- }
-
- }
-
- /**
- * Tests that the andCardinality calculation executes correctly when using a
- * CountingBloomFilter argument.
- */
- @Test
- public void andCardinalityTest_CountingBloomFilter() {
- final Hasher hasher = new StaticHasher( Arrays.asList( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ).iterator(), shape );
-
- final CountingBloomFilter bf = createFilter(hasher, shape);
-
- Hasher hasher2 = new StaticHasher( Arrays.asList( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ).iterator(), shape );
- CountingBloomFilter bf2 = createFilter(hasher2, shape);
-
- assertEquals( 10, bf.andCardinality(bf2));
- assertEquals( 10, bf2.andCardinality(bf));
-
- hasher2 = new StaticHasher( Arrays.asList( 1, 2, 3, 4, 5 ).iterator(), shape );
- bf2 = createFilter(hasher2, shape);
-
- assertEquals( 5, bf.andCardinality(bf2));
- assertEquals( 5, bf2.andCardinality(bf));
-
- hasher2 = new StaticHasher( Arrays.asList( 11, 12, 13, 14, 15 ).iterator(), shape );
- bf2 = createFilter(hasher2, shape);
- assertEquals( 0, bf.andCardinality(bf2));
- assertEquals( 0, bf2.andCardinality(bf));
-
-
- }
-
}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBloomFilterMethodsTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBloomFilterMethodsTest.java
index 46e4d24..f7de15e 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBloomFilterMethodsTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBloomFilterMethodsTest.java
@@ -30,16 +30,6 @@ import org.apache.commons.collections4.bloomfilter.hasher.StaticHasher;
*/
public class DefaultBloomFilterMethodsTest extends AbstractBloomFilterTest {
- @Override
- protected AbstractBloomFilter createFilter(final Hasher hasher, final Shape shape) {
- return new BF( hasher, shape );
- }
-
- @Override
- protected AbstractBloomFilter createEmptyFilter(final Shape shape) {
- return new BF( shape );
- }
-
/**
* A testing class that implements only the abstract methods from BloomFilter.
*
@@ -97,4 +87,14 @@ public class DefaultBloomFilterMethodsTest extends AbstractBloomFilterTest {
}
+ @Override
+ protected AbstractBloomFilter createEmptyFilter(final Shape shape) {
+ return new BF( shape );
+ }
+
+ @Override
+ protected AbstractBloomFilter createFilter(final Hasher hasher, final Shape shape) {
+ return new BF( hasher, shape );
+ }
+
}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/HasherBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/HasherBloomFilterTest.java
index d7d6ad8..82a0a2a 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/HasherBloomFilterTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/HasherBloomFilterTest.java
@@ -33,16 +33,6 @@ import org.junit.Test;
public class HasherBloomFilterTest extends AbstractBloomFilterTest {
- @Override
- protected HasherBloomFilter createFilter(final Hasher hasher, final Shape shape) {
- return new HasherBloomFilter( hasher, shape );
- }
-
- @Override
- protected AbstractBloomFilter createEmptyFilter(final Shape shape) {
- return new HasherBloomFilter( shape );
- }
-
/**
* Tests that the constructor works correctly.
* @throws NoSuchAlgorithmException
@@ -58,5 +48,15 @@ public class HasherBloomFilterTest extends AbstractBloomFilterTest {
assertEquals( 0x60L, lb[1]);
}
+ @Override
+ protected AbstractBloomFilter createEmptyFilter(final Shape shape) {
+ return new HasherBloomFilter( shape );
+ }
+
+ @Override
+ protected HasherBloomFilter createFilter(final Hasher hasher, final Shape shape) {
+ return new HasherBloomFilter( hasher, shape );
+ }
+
}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/SetOperationsTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/SetOperationsTest.java
index d92f362..27d04f4 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/SetOperationsTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/SetOperationsTest.java
@@ -40,72 +40,126 @@ public class SetOperationsTest {
}
@Override
- public String getProvider() {
- return "Apache Commons Collection Tests";
+ public ProcessType getProcessType() {
+ return ProcessType.CYCLIC;
}
@Override
- public Signedness getSignedness() {
- return Signedness.SIGNED;
+ public String getProvider() {
+ return "Apache Commons Collection Tests";
}
@Override
- public ProcessType getProcessType() {
- return ProcessType.CYCLIC;
+ public long getSignature() {
+ return 0;
}
@Override
- public long getSignature() {
- return 0;
+ public Signedness getSignedness() {
+ return Signedness.SIGNED;
}
};
private final Shape shape = new Shape(testFunction, 3, 72, 17);
/**
- * Tests that the size estimate is correctly calculated.
+ * Tests that the Cosine similarity is correctly calculated.
*/
@Test
- public final void estimateSizeTest() {
- // build a filter
- List<Integer> lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
+ public final void cosineDistanceTest() {
+ List<Integer> lst = Arrays.asList(1, 2);
Hasher hasher = new StaticHasher(lst.iterator(), shape);
BloomFilter filter1 = new HasherBloomFilter(hasher, shape);
- assertEquals(1, SetOperations.estimateSize(filter1));
- // the data provided above do not generate an estimate that is equivalent to the
- // actual.
- lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20);
+ List<Integer> lst2 = Arrays.asList(2, 3);
+ Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
+ BloomFilter filter2 = new HasherBloomFilter(hasher2, shape);
+
+ assertEquals(0.5, SetOperations.cosineDistance(filter1, filter2), 0.0001);
+ assertEquals(0.5, SetOperations.cosineDistance(filter2, filter1), 0.0001);
+
+ lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
hasher = new StaticHasher(lst.iterator(), shape);
filter1 = new HasherBloomFilter(hasher, shape);
- assertEquals(1, SetOperations.estimateSize(filter1));
- lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
- 26, 27, 28, 29, 30, 31, 32, 33);
- final Hasher hasher2 = new StaticHasher(lst.iterator(), shape);
- final BloomFilter filter2 = new HasherBloomFilter(hasher2, shape);
+ lst2 = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
+ hasher2 = new StaticHasher(lst2.iterator(), shape);
+ filter2 = new HasherBloomFilter(hasher2, shape);
- assertEquals(3, SetOperations.estimateSize(filter2));
+ assertEquals(0.0, SetOperations.cosineDistance(filter1, filter2), 0.0001);
+ assertEquals(0.0, SetOperations.cosineDistance(filter2, filter1), 0.0001);
+
+ lst2 = Arrays.asList(10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25);
+ hasher2 = new StaticHasher(lst2.iterator(), shape);
+ filter2 = new HasherBloomFilter(hasher2, shape);
+
+ assertEquals(0.514928749927334, SetOperations.cosineDistance(filter1, filter2), 0.000000000000001);
+ assertEquals(0.514928749927334, SetOperations.cosineDistance(filter2, filter1), 0.000000000000001);
}
/**
- * Tests that the union size estimate is correctly calculated.
+ * Tests that the Cosine distance is correctly calculated when one or
+ * both filters are empty
*/
@Test
- public final void estimateUnionSizeTest() {
+ public final void cosineDistanceTest_NoValues() {
+ final BloomFilter filter1 = new HasherBloomFilter(shape);
+ final BloomFilter filter2 = new HasherBloomFilter(shape);
// build a filter
- List<Integer> lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
+ final List<Integer> lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
+ final Hasher hasher = new StaticHasher(lst.iterator(), shape);
+ final BloomFilter filter3 = new HasherBloomFilter( hasher, shape );
+
+ assertEquals(1.0, SetOperations.cosineDistance(filter1, filter2), 0.0001);
+ assertEquals(1.0, SetOperations.cosineDistance(filter2, filter1), 0.0001);
+ assertEquals(1.0, SetOperations.cosineDistance(filter1, filter3), 0.0001);
+ assertEquals(1.0, SetOperations.cosineDistance(filter3, filter1), 0.0001);
+
+ }
+
+ /**
+ * Tests that the Cosine similarity is correctly calculated.
+ */
+ @Test
+ public final void cosineSimilarityTest() {
+ final List<Integer> lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
final Hasher hasher = new StaticHasher(lst.iterator(), shape);
final BloomFilter filter1 = new HasherBloomFilter(hasher, shape);
- lst = Arrays.asList(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
- 40);
- final Hasher hasher2 = new StaticHasher(lst.iterator(), shape);
- final BloomFilter filter2 = new HasherBloomFilter(hasher2, shape);
+ List<Integer> lst2 = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
+ Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
+ BloomFilter filter2 = new HasherBloomFilter(hasher2, shape);
- final long estimate = SetOperations.estimateUnionSize(filter1, filter2);
- assertEquals(3, estimate);
+ assertEquals(1.0, SetOperations.cosineSimilarity(filter1, filter2), 0.0001);
+ assertEquals(1.0, SetOperations.cosineSimilarity(filter2, filter1), 0.0001);
+
+ lst2 = Arrays.asList(10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25);
+ hasher2 = new StaticHasher(lst2.iterator(), shape);
+ filter2 = new HasherBloomFilter(hasher2, shape);
+
+ assertEquals(0.485071250072666, SetOperations.cosineSimilarity(filter1, filter2), 0.000000000000001);
+ assertEquals(0.485071250072666, SetOperations.cosineSimilarity(filter2, filter1), 0.000000000000001);
+
+ }
+
+ /**
+ * Tests that the Cosine similarity is correctly calculated when one or
+ * both filters are empty
+ */
+ @Test
+ public final void cosineSimilarityTest_NoValues() {
+ final BloomFilter filter1 = new HasherBloomFilter(shape);
+ final BloomFilter filter2 = new HasherBloomFilter(shape);
+ // build a filter
+ final List<Integer> lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
+ final Hasher hasher = new StaticHasher(lst.iterator(), shape);
+ final BloomFilter filter3 = new HasherBloomFilter( hasher, shape );
+
+ assertEquals(0.0, SetOperations.cosineSimilarity(filter1, filter2), 0.0001);
+ assertEquals(0.0, SetOperations.cosineSimilarity(filter2, filter1), 0.0001);
+ assertEquals(0.0, SetOperations.cosineSimilarity(filter1, filter3), 0.0001);
+ assertEquals(0.0, SetOperations.cosineSimilarity(filter3, filter1), 0.0001);
}
@@ -130,35 +184,58 @@ public class SetOperationsTest {
}
/**
- * Tests that the Hamming distance is correctly calculated.
+ * Tests that the size estimate is correctly calculated.
*/
@Test
- public final void hammingDistanceTest() {
- final List<Integer> lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter filter1 = new HasherBloomFilter(hasher, shape);
+ public final void estimateSizeTest() {
+ // build a filter
+ List<Integer> lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
+ Hasher hasher = new StaticHasher(lst.iterator(), shape);
+ BloomFilter filter1 = new HasherBloomFilter(hasher, shape);
+ assertEquals(1, SetOperations.estimateSize(filter1));
- List<Integer> lst2 = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
- BloomFilter filter2 = new HasherBloomFilter(hasher2, shape);
+ // the data provided above do not generate an estimate that is equivalent to the
+ // actual.
+ lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20);
+ hasher = new StaticHasher(lst.iterator(), shape);
+ filter1 = new HasherBloomFilter(hasher, shape);
+ assertEquals(1, SetOperations.estimateSize(filter1));
- assertEquals(0, SetOperations.hammingDistance(filter1, filter2));
- assertEquals(0, SetOperations.hammingDistance(filter2, filter1));
+ lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
+ 26, 27, 28, 29, 30, 31, 32, 33);
+ final Hasher hasher2 = new StaticHasher(lst.iterator(), shape);
+ final BloomFilter filter2 = new HasherBloomFilter(hasher2, shape);
- lst2 = Arrays.asList(10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25);
- hasher2 = new StaticHasher(lst2.iterator(), shape);
- filter2 = new HasherBloomFilter(hasher2, shape);
+ assertEquals(3, SetOperations.estimateSize(filter2));
- assertEquals(17, SetOperations.hammingDistance(filter1, filter2));
- assertEquals(17, SetOperations.hammingDistance(filter2, filter1));
+ }
+
+
+ /**
+ * Tests that the union size estimate is correctly calculated.
+ */
+ @Test
+ public final void estimateUnionSizeTest() {
+ // build a filter
+ List<Integer> lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
+ final Hasher hasher = new StaticHasher(lst.iterator(), shape);
+ final BloomFilter filter1 = new HasherBloomFilter(hasher, shape);
+
+ lst = Arrays.asList(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
+ 40);
+ final Hasher hasher2 = new StaticHasher(lst.iterator(), shape);
+ final BloomFilter filter2 = new HasherBloomFilter(hasher2, shape);
+
+ final long estimate = SetOperations.estimateUnionSize(filter1, filter2);
+ assertEquals(3, estimate);
}
/**
- * Tests that the Jaccard similarity is correctly calculated.
+ * Tests that the Hamming distance is correctly calculated.
*/
@Test
- public final void jaccardSimilarityTest() {
+ public final void hammingDistanceTest() {
final List<Integer> lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
final Hasher hasher = new StaticHasher(lst.iterator(), shape);
final BloomFilter filter1 = new HasherBloomFilter(hasher, shape);
@@ -167,34 +244,15 @@ public class SetOperationsTest {
Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
BloomFilter filter2 = new HasherBloomFilter(hasher2, shape);
- assertEquals(0.0, SetOperations.jaccardSimilarity(filter1, filter2), 0.0001);
- assertEquals(0.0, SetOperations.jaccardSimilarity(filter2, filter1), 0.0001);
+ assertEquals(0, SetOperations.hammingDistance(filter1, filter2));
+ assertEquals(0, SetOperations.hammingDistance(filter2, filter1));
lst2 = Arrays.asList(10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25);
hasher2 = new StaticHasher(lst2.iterator(), shape);
filter2 = new HasherBloomFilter(hasher2, shape);
- assertEquals(0.68, SetOperations.jaccardSimilarity(filter1, filter2), 0.001);
- assertEquals(0.68, SetOperations.jaccardSimilarity(filter2, filter1), 0.001);
- }
-
- /**
- * Tests that the Jaccard similarity is correctly calculated when one or
- * both filters are empty
- */
- @Test
- public final void jaccardSimilarityTest_NoValues() {
- final BloomFilter filter1 = new HasherBloomFilter(shape);
- final BloomFilter filter2 = new HasherBloomFilter(shape);
- // build a filter
- final List<Integer> lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter filter3 = new HasherBloomFilter( hasher, shape );
-
- assertEquals(0.0, SetOperations.jaccardSimilarity(filter1, filter2), 0.0001);
- assertEquals(0.0, SetOperations.jaccardSimilarity(filter2, filter1), 0.0001);
- assertEquals(1.0, SetOperations.jaccardSimilarity(filter1, filter3), 0.0001);
- assertEquals(1.0, SetOperations.jaccardSimilarity(filter3, filter1), 0.0001);
+ assertEquals(17, SetOperations.hammingDistance(filter1, filter2));
+ assertEquals(17, SetOperations.hammingDistance(filter2, filter1));
}
@@ -244,12 +302,11 @@ public class SetOperationsTest {
}
-
/**
- * Tests that the Cosine similarity is correctly calculated.
+ * Tests that the Jaccard similarity is correctly calculated.
*/
@Test
- public final void cosineSimilarityTest() {
+ public final void jaccardSimilarityTest() {
final List<Integer> lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
final Hasher hasher = new StaticHasher(lst.iterator(), shape);
final BloomFilter filter1 = new HasherBloomFilter(hasher, shape);
@@ -258,80 +315,23 @@ public class SetOperationsTest {
Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
BloomFilter filter2 = new HasherBloomFilter(hasher2, shape);
- assertEquals(1.0, SetOperations.cosineSimilarity(filter1, filter2), 0.0001);
- assertEquals(1.0, SetOperations.cosineSimilarity(filter2, filter1), 0.0001);
-
- lst2 = Arrays.asList(10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25);
- hasher2 = new StaticHasher(lst2.iterator(), shape);
- filter2 = new HasherBloomFilter(hasher2, shape);
-
- assertEquals(0.485071250072666, SetOperations.cosineSimilarity(filter1, filter2), 0.000000000000001);
- assertEquals(0.485071250072666, SetOperations.cosineSimilarity(filter2, filter1), 0.000000000000001);
-
- }
-
- /**
- * Tests that the Cosine similarity is correctly calculated when one or
- * both filters are empty
- */
- @Test
- public final void cosineSimilarityTest_NoValues() {
- final BloomFilter filter1 = new HasherBloomFilter(shape);
- final BloomFilter filter2 = new HasherBloomFilter(shape);
- // build a filter
- final List<Integer> lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter filter3 = new HasherBloomFilter( hasher, shape );
-
- assertEquals(0.0, SetOperations.cosineSimilarity(filter1, filter2), 0.0001);
- assertEquals(0.0, SetOperations.cosineSimilarity(filter2, filter1), 0.0001);
- assertEquals(0.0, SetOperations.cosineSimilarity(filter1, filter3), 0.0001);
- assertEquals(0.0, SetOperations.cosineSimilarity(filter3, filter1), 0.0001);
-
- }
-
- /**
- * Tests that the Cosine similarity is correctly calculated.
- */
- @Test
- public final void cosineDistanceTest() {
- List<Integer> lst = Arrays.asList(1, 2);
- Hasher hasher = new StaticHasher(lst.iterator(), shape);
- BloomFilter filter1 = new HasherBloomFilter(hasher, shape);
-
- List<Integer> lst2 = Arrays.asList(2, 3);
- Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
- BloomFilter filter2 = new HasherBloomFilter(hasher2, shape);
-
- assertEquals(0.5, SetOperations.cosineDistance(filter1, filter2), 0.0001);
- assertEquals(0.5, SetOperations.cosineDistance(filter2, filter1), 0.0001);
-
- lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- hasher = new StaticHasher(lst.iterator(), shape);
- filter1 = new HasherBloomFilter(hasher, shape);
-
- lst2 = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- hasher2 = new StaticHasher(lst2.iterator(), shape);
- filter2 = new HasherBloomFilter(hasher2, shape);
-
- assertEquals(0.0, SetOperations.cosineDistance(filter1, filter2), 0.0001);
- assertEquals(0.0, SetOperations.cosineDistance(filter2, filter1), 0.0001);
+ assertEquals(0.0, SetOperations.jaccardSimilarity(filter1, filter2), 0.0001);
+ assertEquals(0.0, SetOperations.jaccardSimilarity(filter2, filter1), 0.0001);
lst2 = Arrays.asList(10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25);
hasher2 = new StaticHasher(lst2.iterator(), shape);
filter2 = new HasherBloomFilter(hasher2, shape);
- assertEquals(0.514928749927334, SetOperations.cosineDistance(filter1, filter2), 0.000000000000001);
- assertEquals(0.514928749927334, SetOperations.cosineDistance(filter2, filter1), 0.000000000000001);
-
+ assertEquals(0.68, SetOperations.jaccardSimilarity(filter1, filter2), 0.001);
+ assertEquals(0.68, SetOperations.jaccardSimilarity(filter2, filter1), 0.001);
}
/**
- * Tests that the Cosine distance is correctly calculated when one or
+ * Tests that the Jaccard similarity is correctly calculated when one or
* both filters are empty
*/
@Test
- public final void cosineDistanceTest_NoValues() {
+ public final void jaccardSimilarityTest_NoValues() {
final BloomFilter filter1 = new HasherBloomFilter(shape);
final BloomFilter filter2 = new HasherBloomFilter(shape);
// build a filter
@@ -339,10 +339,10 @@ public class SetOperationsTest {
final Hasher hasher = new StaticHasher(lst.iterator(), shape);
final BloomFilter filter3 = new HasherBloomFilter( hasher, shape );
- assertEquals(1.0, SetOperations.cosineDistance(filter1, filter2), 0.0001);
- assertEquals(1.0, SetOperations.cosineDistance(filter2, filter1), 0.0001);
- assertEquals(1.0, SetOperations.cosineDistance(filter1, filter3), 0.0001);
- assertEquals(1.0, SetOperations.cosineDistance(filter3, filter1), 0.0001);
+ assertEquals(0.0, SetOperations.jaccardSimilarity(filter1, filter2), 0.0001);
+ assertEquals(0.0, SetOperations.jaccardSimilarity(filter2, filter1), 0.0001);
+ assertEquals(1.0, SetOperations.jaccardSimilarity(filter1, filter3), 0.0001);
+ assertEquals(1.0, SetOperations.jaccardSimilarity(filter3, filter1), 0.0001);
}
}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/CommonComparatorTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/CommonComparatorTest.java
index 4a703f7..3c326d7 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/CommonComparatorTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/CommonComparatorTest.java
@@ -35,28 +35,12 @@ import org.junit.Test;
*/
public class CommonComparatorTest {
- private void assertBefore(final HashFunctionIdentity identity1, final HashFunctionIdentity identity2) {
- assertTrue(0 > HashFunctionIdentity.COMMON_COMPARATOR.compare(identity1, identity2));
- }
-
private void assertAfter(final HashFunctionIdentity identity1, final HashFunctionIdentity identity2) {
assertTrue(0 < HashFunctionIdentity.COMMON_COMPARATOR.compare(identity1, identity2));
}
- /**
- * Tests the name ordering.
- */
- @Test
- public void nameOrderTestDifferentNames() {
- final HashFunctionIdentityImpl impl1 = new HashFunctionIdentityImpl("Testing Suite", "impl1", Signedness.SIGNED,
- ProcessType.CYCLIC, 300L);
- final HashFunctionIdentityImpl impl2 = new HashFunctionIdentityImpl("Testing Suite", "impl2", Signedness.SIGNED,
- ProcessType.CYCLIC, 300L);
-
- assertBefore(impl1, impl2);
- assertEquals(0, HashFunctionIdentity.COMMON_COMPARATOR.compare(impl1, impl1));
- assertEquals(0, HashFunctionIdentity.COMMON_COMPARATOR.compare(impl2, impl2));
- assertAfter(impl2, impl1);
+ private void assertBefore(final HashFunctionIdentity identity1, final HashFunctionIdentity identity2) {
+ assertTrue(0 > HashFunctionIdentity.COMMON_COMPARATOR.compare(identity1, identity2));
}
/**
@@ -74,13 +58,13 @@ public class CommonComparatorTest {
}
/**
- * Tests that signedness ordering is correct.
+ * Tests the name ordering.
*/
@Test
- public void signednessOrder() {
+ public void nameOrderTestDifferentNames() {
final HashFunctionIdentityImpl impl1 = new HashFunctionIdentityImpl("Testing Suite", "impl1", Signedness.SIGNED,
ProcessType.CYCLIC, 300L);
- final HashFunctionIdentityImpl impl2 = new HashFunctionIdentityImpl("Testing Suite", "impl1", Signedness.UNSIGNED,
+ final HashFunctionIdentityImpl impl2 = new HashFunctionIdentityImpl("Testing Suite", "impl2", Signedness.SIGNED,
ProcessType.CYCLIC, 300L);
assertBefore(impl1, impl2);
@@ -119,6 +103,22 @@ public class CommonComparatorTest {
}
/**
+ * Tests that signedness ordering is correct.
+ */
+ @Test
+ public void signednessOrder() {
+ final HashFunctionIdentityImpl impl1 = new HashFunctionIdentityImpl("Testing Suite", "impl1", Signedness.SIGNED,
+ ProcessType.CYCLIC, 300L);
+ final HashFunctionIdentityImpl impl2 = new HashFunctionIdentityImpl("Testing Suite", "impl1", Signedness.UNSIGNED,
+ ProcessType.CYCLIC, 300L);
+
+ assertBefore(impl1, impl2);
+ assertEquals(0, HashFunctionIdentity.COMMON_COMPARATOR.compare(impl1, impl1));
+ assertEquals(0, HashFunctionIdentity.COMMON_COMPARATOR.compare(impl2, impl2));
+ assertAfter(impl2, impl1);
+ }
+
+ /**
* Tests that the ordering is correct when applied ot a collection.
*/
@Test
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/DeepComparatorTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/DeepComparatorTest.java
index 1394c7b..e8649fd 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/DeepComparatorTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/DeepComparatorTest.java
@@ -35,28 +35,12 @@ import org.junit.Test;
*/
public class DeepComparatorTest {
- private void assertBefore(final HashFunctionIdentity identity1, final HashFunctionIdentity identity2) {
- assertTrue(0 > HashFunctionIdentity.DEEP_COMPARATOR.compare(identity1, identity2));
- }
-
private void assertAfter(final HashFunctionIdentity identity1, final HashFunctionIdentity identity2) {
assertTrue(0 < HashFunctionIdentity.DEEP_COMPARATOR.compare(identity1, identity2));
}
- /**
- * Tests that name order is correct.
- */
- @Test
- public void nameOrderTestDifferentNames() {
- final HashFunctionIdentityImpl impl1 = new HashFunctionIdentityImpl("Testing Suite", "impl1", Signedness.SIGNED,
- ProcessType.CYCLIC, 300L);
- final HashFunctionIdentityImpl impl2 = new HashFunctionIdentityImpl("Testing Suite", "impl2", Signedness.SIGNED,
- ProcessType.CYCLIC, 300L);
-
- assertBefore(impl1, impl2);
- assertEquals(0, HashFunctionIdentity.DEEP_COMPARATOR.compare(impl1, impl1));
- assertEquals(0, HashFunctionIdentity.DEEP_COMPARATOR.compare(impl2, impl2));
- assertAfter(impl2, impl1);
+ private void assertBefore(final HashFunctionIdentity identity1, final HashFunctionIdentity identity2) {
+ assertTrue(0 > HashFunctionIdentity.DEEP_COMPARATOR.compare(identity1, identity2));
}
/**
@@ -74,13 +58,13 @@ public class DeepComparatorTest {
}
/**
- * Tests that signedness order is correct.
+ * Tests that name order is correct.
*/
@Test
- public void signednessOrder() {
+ public void nameOrderTestDifferentNames() {
final HashFunctionIdentityImpl impl1 = new HashFunctionIdentityImpl("Testing Suite", "impl1", Signedness.SIGNED,
ProcessType.CYCLIC, 300L);
- final HashFunctionIdentityImpl impl2 = new HashFunctionIdentityImpl("Testing Suite", "impl1", Signedness.UNSIGNED,
+ final HashFunctionIdentityImpl impl2 = new HashFunctionIdentityImpl("Testing Suite", "impl2", Signedness.SIGNED,
ProcessType.CYCLIC, 300L);
assertBefore(impl1, impl2);
@@ -122,6 +106,22 @@ public class DeepComparatorTest {
}
/**
+ * Tests that signedness order is correct.
+ */
+ @Test
+ public void signednessOrder() {
+ final HashFunctionIdentityImpl impl1 = new HashFunctionIdentityImpl("Testing Suite", "impl1", Signedness.SIGNED,
+ ProcessType.CYCLIC, 300L);
+ final HashFunctionIdentityImpl impl2 = new HashFunctionIdentityImpl("Testing Suite", "impl1", Signedness.UNSIGNED,
+ ProcessType.CYCLIC, 300L);
+
+ assertBefore(impl1, impl2);
+ assertEquals(0, HashFunctionIdentity.DEEP_COMPARATOR.compare(impl1, impl1));
+ assertEquals(0, HashFunctionIdentity.DEEP_COMPARATOR.compare(impl2, impl2));
+ assertAfter(impl2, impl1);
+ }
+
+ /**
* Tests that the ordering is correct when applied ot a collection.
*/
@Test
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasherBuilderTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasherBuilderTest.java
index 0c7132e..e0552fe 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasherBuilderTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasherBuilderTest.java
@@ -38,16 +38,6 @@ public class DynamicHasherBuilderTest {
private final Shape shape = new Shape( new MD5Cyclic(), 1, Integer.MAX_VALUE, 1 );
/**
- * Sets up the builder for testing.
- * @throws NoSuchAlgorithmException if MD5 is not available.
- */
- @Before
- public void setup() throws NoSuchAlgorithmException
- {
- builder = new DynamicHasher.Builder( new MD5Cyclic());
- }
-
- /**
* Tests that hashing a byte works as expected.
*/
@Test
@@ -80,6 +70,18 @@ public class DynamicHasherBuilderTest {
}
/**
+ * Tests that an empty hasher works as expected.
+ */
+ @Test
+ public void buildTest_Empty() {
+ final DynamicHasher hasher = builder.build();
+
+ final OfInt iter = hasher.getBits(shape);
+
+ assertFalse(iter.hasNext());
+ }
+
+ /**
* Tests that hashing a string works as expected.
*/
@Test
@@ -95,14 +97,12 @@ public class DynamicHasherBuilderTest {
}
/**
- * Tests that an empty hasher works as expected.
+ * Sets up the builder for testing.
+ * @throws NoSuchAlgorithmException if MD5 is not available.
*/
- @Test
- public void buildTest_Empty() {
- final DynamicHasher hasher = builder.build();
-
- final OfInt iter = hasher.getBits(shape);
-
- assertFalse(iter.hasNext());
+ @Before
+ public void setup() throws NoSuchAlgorithmException
+ {
+ builder = new DynamicHasher.Builder( new MD5Cyclic());
}
}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasherTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasherTest.java
index 64ed1f7..acddcec 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasherTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasherTest.java
@@ -45,23 +45,23 @@ public class DynamicHasherTest {
}
@Override
- public String getProvider() {
- return "Apache Commons Collection Tests";
+ public ProcessType getProcessType() {
+ return ProcessType.CYCLIC;
}
@Override
- public Signedness getSignedness() {
- return Signedness.SIGNED;
+ public String getProvider() {
+ return "Apache Commons Collection Tests";
}
@Override
- public ProcessType getProcessType() {
- return ProcessType.CYCLIC;
+ public long getSignature() {
+ return 0;
}
@Override
- public long getSignature() {
- return 0;
+ public Signedness getSignedness() {
+ return Signedness.SIGNED;
}
};
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionIdentityImplTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionIdentityImplTest.java
index 254339b..82a148d 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionIdentityImplTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionIdentityImplTest.java
@@ -42,23 +42,23 @@ public class HashFunctionIdentityImplTest {
}
@Override
- public String getProvider() {
- return "Provider";
+ public ProcessType getProcessType() {
+ return ProcessType.CYCLIC;
}
@Override
- public Signedness getSignedness() {
- return Signedness.SIGNED;
+ public String getProvider() {
+ return "Provider";
}
@Override
- public ProcessType getProcessType() {
- return ProcessType.CYCLIC;
+ public long getSignature() {
+ return -1l;
}
@Override
- public long getSignature() {
- return -1l;
+ public Signedness getSignedness() {
+ return Signedness.SIGNED;
}
};
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/ShapeTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/ShapeTest.java
index 8d863d4..f438aa5 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/ShapeTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/ShapeTest.java
@@ -42,23 +42,23 @@ public class ShapeTest {
}
@Override
- public String getProvider() {
- return "Apache Commons Collection Tests";
+ public ProcessType getProcessType() {
+ return ProcessType.CYCLIC;
}
@Override
- public Signedness getSignedness() {
- return Signedness.SIGNED;
+ public String getProvider() {
+ return "Apache Commons Collection Tests";
}
@Override
- public ProcessType getProcessType() {
- return ProcessType.CYCLIC;
+ public long getSignature() {
+ return 0;
}
@Override
- public long getSignature() {
- return 0;
+ public Signedness getSignedness() {
+ return Signedness.SIGNED;
}};
/*
@@ -77,161 +77,137 @@ public class ShapeTest {
private final Shape shape = new Shape(testFunction, 5, 0.1);
/**
- * Tests that the constructor with a null name, number of items, and probability fails.
+ * Tests that if the number of bits is less than 8 an exception is thrown
*/
@Test
- public void constructor_np_noName() {
-
+ public void constructor__probability_bits_hash__BadNumberOfBitsTest() {
try {
- new Shape(null, 5, 0.1);
- fail( "Should throw IllegalArgumentException");
- }
- catch (final IllegalArgumentException expected)
+ new Shape(testFunction, 0.5, 6, 1);
+ fail( "Should have thrown IllegalArgumentException");
+ } catch (final IllegalArgumentException expected)
{
- // do nothing
+ //expected
}
}
/**
- * Tests that the constructor with a null name, number of items and size of filter fails.
+ * Tests that invalid probability values cause and IllegalArgumentException to
+ * be thrown.
*/
@Test
- public void constructor_nm_noName() {
-
+ public void constructor__probability_bits_hash_BadProbabilityTest() {
+ // probability should not be 0
try {
- new Shape(null, 5, 72);
- fail( "Should throw IllegalArgumentException");
- }
- catch (final IllegalArgumentException expected)
+ new Shape(testFunction, 0.0, 24, 1);
+ fail( "Should have thrown IllegalArgumentException");
+ } catch (final IllegalArgumentException expected)
{
- // do nothing
+ //expected
}
- }
-
- /**
- * Tests that the constructor with a null name, number of items, size of filter,
- * and number of functions fails.
- */
- @Test
- public void constructor_nmk_noName() {
+ // probability should not be = -1
try {
- new Shape(null, 5, 72, 17);
- fail( "Should throw IllegalArgumentException");
- }
- catch (final IllegalArgumentException expected)
+ new Shape(testFunction, -1.0, 24, 1);
+ fail( "Should have thrown IllegalArgumentException");
+ } catch (final IllegalArgumentException expected)
{
- // do nothing
+ //expected
}
- }
-
- /**
- * Tests that the constructor with a null name, probability, size of filter,
- * and number of functions fails.
- */
- @Test
- public void constructor_pmk_noName() {
+ // probability should not be < -1
try {
- new Shape(null, 0.1, 72, 17);
- fail( "Should throw IllegalArgumentException");
- }
- catch (final IllegalArgumentException expected)
+ new Shape(testFunction, -1.5, 24, 1);
+ fail( "Should have thrown IllegalArgumentException");
+ } catch (final IllegalArgumentException expected)
{
- // do nothing
+ //expected
}
- }
- /**
- * Tests the the probability is calculated correctly.
- */
- @Test
- public void constructor_items_probability_Test() {
-
- assertEquals(24, shape.getNumberOfBits());
- assertEquals(3, shape.getNumberOfBytes());
- assertEquals(3, shape.getNumberOfHashFunctions());
- assertEquals(5, shape.getNumberOfItems());
- assertEquals(0.100375138, shape.getProbability(), 0.000001);
+ // probability should not be = 1
+ try {
+ new Shape(testFunction, 1.0, 24, 1);
+ fail( "Should have thrown IllegalArgumentException");
+ } catch (final IllegalArgumentException expected)
+ {
+ //expected
+ }
+ // probability should not be > 1
+ try {
+ new Shape(testFunction, 2.0, 24, 1);
+ fail( "Should have thrown IllegalArgumentException");
+ } catch (final IllegalArgumentException expected)
+ {
+ //expected
+ }
}
/**
- * Tests that if calculated number of bits is greater than Integer.MAX_VALUE an
- * IllegalArgumentException is thrown.
+ * Tests that if the number of bits less than 8 an IllegalArgumentException
+ * is thrown.
*/
@Test
- public void constructor_items_probability_NumberOfBitsOverflowTest() {
+ public void constructor_items_bits_BadNumberOfBitsTest() {
try {
- new Shape( testFunction, Integer.MAX_VALUE, 1.0 / 10);
- fail("Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected) {
- // do nothing.
+ new Shape(testFunction, 5, 6);
+ fail( "Should have thrown IllegalArgumentException");
+ } catch (final IllegalArgumentException expected)
+ {
+ //expected
}
}
/**
- * Tests that if the number of items is less than 1 an IllegalArgumentException is
- * thrown.
+ * Tests that if the number of hash functions is less than 1 an exception is thrown.
*/
@Test
- public void constructor_items_probability_BadNumberOfItemsTest() {
+ public void constructor_items_bits_BadNumberOfHashFunctionsTest() {
try {
- new Shape( testFunction, 0, 1.0 / 10);
- fail("Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected) {
- // do nothing.
+ new Shape(testFunction, 16,8);
+ fail( "Should have thrown IllegalArgumentException");
+ } catch (final IllegalArgumentException expected)
+ {
+ //expected
}
}
/**
- * Tests that if the probability is less than or equal to 0 an IllegalArgumentException
+ * Tests that if the number of items less than 1 an IllegalArgumentException
* is thrown.
*/
@Test
- public void constructor_items_probability_BadProbabilityTest() {
- try {
- new Shape(testFunction, 10, 0.0);
- fail("Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected) {
- // do nothing.
- }
-
+ public void constructor_items_bits_BadNumberOfItemsTest() {
try {
- new Shape(testFunction, 10, 1.0);
- fail("Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected) {
- // do nothing.
+ new Shape(testFunction, 0, 24);
+ fail( "Should have thrown IllegalArgumentException");
+ } catch (final IllegalArgumentException expected)
+ {
+ //expected
}
}
/**
- * Tests that the number of items and number of bits is passed the other values are
- * calculated correctly.
+ * Tests that if the number of bits is less than 8 an exception is thrown
*/
@Test
- public void constructor_items_bitsTest() {
- /*
- * values from https://hur.st/bloomfilter/?n=5&m=24
- */
- final Shape filterConfig = new Shape(testFunction, 5, 24);
-
- assertEquals(24, filterConfig.getNumberOfBits());
- assertEquals(3, filterConfig.getNumberOfBytes());
- assertEquals(3, filterConfig.getNumberOfHashFunctions());
- assertEquals(5, filterConfig.getNumberOfItems());
- assertEquals(0.100375138, filterConfig.getProbability(), 0.000001);
-
+ public void constructor_items_bits_hash_BadNumberOfBitsTest() {
+ try {
+ new Shape(testFunction, 5, 6, 1);
+ fail( "Should have thrown IllegalArgumentException");
+ } catch (final IllegalArgumentException expected)
+ {
+ //expected
+ }
}
/**
- * Tests that if the number of items less than 1 an IllegalArgumentException
- * is thrown.
+ * Tests that if the number of hash functions is less than 1 an exception is
+ * thrown.
*/
@Test
- public void constructor_items_bits_BadNumberOfItemsTest() {
+ public void constructor_items_bits_hash_BadNumberOfHashFunctionsTest() {
try {
- new Shape(testFunction, 0, 24);
+ new Shape(testFunction, 5, 24, 0);
fail( "Should have thrown IllegalArgumentException");
} catch (final IllegalArgumentException expected)
{
@@ -240,13 +216,12 @@ public class ShapeTest {
}
/**
- * Tests that if the number of bits less than 8 an IllegalArgumentException
- * is thrown.
+ * Tests that if the number of items is less than 1 an exception is thrown.
*/
@Test
- public void constructor_items_bits_BadNumberOfBitsTest() {
+ public void constructor_items_bits_hash_BadNumberOfItemsTest() {
try {
- new Shape(testFunction, 5, 6);
+ new Shape(testFunction, 0, 24, 1);
fail( "Should have thrown IllegalArgumentException");
} catch (final IllegalArgumentException expected)
{
@@ -255,12 +230,13 @@ public class ShapeTest {
}
/**
- * Tests that if the number of hash functions is less than 1 an exception is thrown.
+ * Tests that if the calculated probability is greater than or equal to 1 an
+ * IllegalArgumentException is thrown
*/
@Test
- public void constructor_items_bits_BadNumberOfHashFunctionsTest() {
+ public void constructor_items_bits_hash_BadProbabilityTest() {
try {
- new Shape(testFunction, 16,8);
+ new Shape(testFunction, 4000,8,1);
fail( "Should have thrown IllegalArgumentException");
} catch (final IllegalArgumentException expected)
{
@@ -288,144 +264,150 @@ public class ShapeTest {
}
/**
- * Tests that if the number of items is less than 1 an exception is thrown.
+ * Tests that the number of items and number of bits is passed the other values are
+ * calculated correctly.
*/
@Test
- public void constructor_items_bits_hash_BadNumberOfItemsTest() {
- try {
- new Shape(testFunction, 0, 24, 1);
- fail( "Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected)
- {
- //expected
- }
+ public void constructor_items_bitsTest() {
+ /*
+ * values from https://hur.st/bloomfilter/?n=5&m=24
+ */
+ final Shape filterConfig = new Shape(testFunction, 5, 24);
+
+ assertEquals(24, filterConfig.getNumberOfBits());
+ assertEquals(3, filterConfig.getNumberOfBytes());
+ assertEquals(3, filterConfig.getNumberOfHashFunctions());
+ assertEquals(5, filterConfig.getNumberOfItems());
+ assertEquals(0.100375138, filterConfig.getProbability(), 0.000001);
+
}
/**
- * Tests that if the number of bits is less than 8 an exception is thrown
+ * Tests that if the number of items is less than 1 an IllegalArgumentException is
+ * thrown.
*/
@Test
- public void constructor_items_bits_hash_BadNumberOfBitsTest() {
+ public void constructor_items_probability_BadNumberOfItemsTest() {
try {
- new Shape(testFunction, 5, 6, 1);
- fail( "Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected)
- {
- //expected
+ new Shape( testFunction, 0, 1.0 / 10);
+ fail("Should have thrown IllegalArgumentException");
+ } catch (final IllegalArgumentException expected) {
+ // do nothing.
}
}
/**
- * Tests that if the number of hash functions is less than 1 an exception is
- * thrown.
+ * Tests that if the probability is less than or equal to 0 an IllegalArgumentException
+ * is thrown.
*/
@Test
- public void constructor_items_bits_hash_BadNumberOfHashFunctionsTest() {
+ public void constructor_items_probability_BadProbabilityTest() {
try {
- new Shape(testFunction, 5, 24, 0);
- fail( "Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected)
- {
- //expected
+ new Shape(testFunction, 10, 0.0);
+ fail("Should have thrown IllegalArgumentException");
+ } catch (final IllegalArgumentException expected) {
+ // do nothing.
+ }
+
+ try {
+ new Shape(testFunction, 10, 1.0);
+ fail("Should have thrown IllegalArgumentException");
+ } catch (final IllegalArgumentException expected) {
+ // do nothing.
}
}
/**
- * Tests that if the calculated probability is greater than or equal to 1 an
- * IllegalArgumentException is thrown
+ * Tests that if calculated number of bits is greater than Integer.MAX_VALUE an
+ * IllegalArgumentException is thrown.
*/
@Test
- public void constructor_items_bits_hash_BadProbabilityTest() {
+ public void constructor_items_probability_NumberOfBitsOverflowTest() {
try {
- new Shape(testFunction, 4000,8,1);
- fail( "Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected)
- {
- //expected
+ new Shape( testFunction, Integer.MAX_VALUE, 1.0 / 10);
+ fail("Should have thrown IllegalArgumentException");
+ } catch (final IllegalArgumentException expected) {
+ // do nothing.
}
}
/**
- * Tests the calculated values of calling the constructor with the
- * probability, number of bits and number of hash functions.
+ * Tests the the probability is calculated correctly.
*/
@Test
- public void constructor_probability_bits_hashTest() {
- /*
- * values from https://hur.st/bloomfilter/?n=5&p=.1&m=&k=
- */
- final Shape filterConfig = new Shape(testFunction, 0.1, 24, 3);
+ public void constructor_items_probability_Test() {
+
+ assertEquals(24, shape.getNumberOfBits());
+ assertEquals(3, shape.getNumberOfBytes());
+ assertEquals(3, shape.getNumberOfHashFunctions());
+ assertEquals(5, shape.getNumberOfItems());
+ assertEquals(0.100375138, shape.getProbability(), 0.000001);
- assertEquals(24, filterConfig.getNumberOfBits());
- assertEquals(3, filterConfig.getNumberOfBytes());
- assertEquals(3, filterConfig.getNumberOfHashFunctions());
- assertEquals(5, filterConfig.getNumberOfItems());
- assertEquals(0.100375138, filterConfig.getProbability(), 0.000001);
}
/**
- * Tests that invalid probability values cause and IllegalArgumentException to
- * be thrown.
+ * Tests that the constructor with a null name, number of items and size of filter fails.
*/
@Test
- public void constructor__probability_bits_hash_BadProbabilityTest() {
- // probability should not be 0
- try {
- new Shape(testFunction, 0.0, 24, 1);
- fail( "Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected)
- {
- //expected
- }
+ public void constructor_nm_noName() {
- // probability should not be = -1
try {
- new Shape(testFunction, -1.0, 24, 1);
- fail( "Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected)
- {
- //expected
+ new Shape(null, 5, 72);
+ fail( "Should throw IllegalArgumentException");
}
-
- // probability should not be < -1
- try {
- new Shape(testFunction, -1.5, 24, 1);
- fail( "Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected)
+ catch (final IllegalArgumentException expected)
{
- //expected
+ // do nothing
}
+ }
+
+ /**
+ * Tests that the constructor with a null name, number of items, size of filter,
+ * and number of functions fails.
+ */
+ @Test
+ public void constructor_nmk_noName() {
- // probability should not be = 1
try {
- new Shape(testFunction, 1.0, 24, 1);
- fail( "Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected)
+ new Shape(null, 5, 72, 17);
+ fail( "Should throw IllegalArgumentException");
+ }
+ catch (final IllegalArgumentException expected)
{
- //expected
+ // do nothing
}
+ }
+
+ /**
+ * Tests that the constructor with a null name, number of items, and probability fails.
+ */
+ @Test
+ public void constructor_np_noName() {
- // probability should not be > 1
try {
- new Shape(testFunction, 2.0, 24, 1);
- fail( "Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected)
+ new Shape(null, 5, 0.1);
+ fail( "Should throw IllegalArgumentException");
+ }
+ catch (final IllegalArgumentException expected)
{
- //expected
+ // do nothing
}
}
/**
- * Tests that if the number of bits is less than 8 an exception is thrown
+ * Tests that the constructor with a null name, probability, size of filter,
+ * and number of functions fails.
*/
@Test
- public void constructor__probability_bits_hash__BadNumberOfBitsTest() {
+ public void constructor_pmk_noName() {
+
try {
- new Shape(testFunction, 0.5, 6, 1);
- fail( "Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected)
+ new Shape(null, 0.1, 72, 17);
+ fail( "Should throw IllegalArgumentException");
+ }
+ catch (final IllegalArgumentException expected)
{
- //expected
+ // do nothing
}
}
@@ -444,6 +426,24 @@ public class ShapeTest {
}
/**
+ * Tests the calculated values of calling the constructor with the
+ * probability, number of bits and number of hash functions.
+ */
+ @Test
+ public void constructor_probability_bits_hashTest() {
+ /*
+ * values from https://hur.st/bloomfilter/?n=5&p=.1&m=&k=
+ */
+ final Shape filterConfig = new Shape(testFunction, 0.1, 24, 3);
+
+ assertEquals(24, filterConfig.getNumberOfBits());
+ assertEquals(3, filterConfig.getNumberOfBytes());
+ assertEquals(3, filterConfig.getNumberOfHashFunctions());
+ assertEquals(5, filterConfig.getNumberOfItems());
+ assertEquals(0.100375138, filterConfig.getProbability(), 0.000001);
+ }
+
+ /**
* Test equality of shape.
*/
@Test
@@ -461,23 +461,23 @@ public class ShapeTest {
}
@Override
- public String getProvider() {
- return "Apache Commons Collection Tests";
+ public ProcessType getProcessType() {
+ return ProcessType.CYCLIC;
}
@Override
- public Signedness getSignedness() {
- return Signedness.SIGNED;
+ public String getProvider() {
+ return "Apache Commons Collection Tests";
}
@Override
- public ProcessType getProcessType() {
- return ProcessType.CYCLIC;
+ public long getSignature() {
+ return 0;
}
@Override
- public long getSignature() {
- return 0;
+ public Signedness getSignedness() {
+ return Signedness.SIGNED;
}};
assertNotEquals(new Shape(testFunction2, 4, 1.0 / 10), shape);
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/StaticHasherTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/StaticHasherTest.java
index 2eb157c..cb3924d 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/StaticHasherTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/StaticHasherTest.java
@@ -43,23 +43,23 @@ public class StaticHasherTest {
}
@Override
- public String getProvider() {
- return "Apache Commons Collection Tests";
+ public ProcessType getProcessType() {
+ return ProcessType.CYCLIC;
}
@Override
- public Signedness getSignedness() {
- return Signedness.SIGNED;
+ public String getProvider() {
+ return "Apache Commons Collection Tests";
}
@Override
- public ProcessType getProcessType() {
- return ProcessType.CYCLIC;
+ public long getSignature() {
+ return 0;
}
@Override
- public long getSignature() {
- return 0;
+ public Signedness getSignedness() {
+ return Signedness.SIGNED;
}
};
@@ -71,58 +71,71 @@ public class StaticHasherTest {
}
@Override
- public String getProvider() {
- return "Apache Commons Collection Tests";
+ public ProcessType getProcessType() {
+ return ProcessType.CYCLIC;
}
@Override
- public Signedness getSignedness() {
- return Signedness.SIGNED;
+ public String getProvider() {
+ return "Apache Commons Collection Tests";
}
@Override
- public ProcessType getProcessType() {
- return ProcessType.CYCLIC;
+ public long getSignature() {
+ return 0;
}
@Override
- public long getSignature() {
- return 0;
+ public Signedness getSignedness() {
+ return Signedness.SIGNED;
}
};
private final Shape shape = new Shape(testFunction, 3, 72, 17);
/**
- * Tests that getBits returns the proper values.
+ * Compare 2 static hashers to verify they have the same bits enabled.
+ *
+ * @param hasher1 the first static hasher.
+ * @param hasher2 the second static hasher.
*/
- @Test
- public void testGetBits() {
- final List<Integer> lst = Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ private void assertSameBits(final StaticHasher hasher1, final StaticHasher hasher2) {
+ final OfInt iter1 = hasher1.getBits(shape);
+ final OfInt iter2 = hasher2.getBits(shape);
- final StaticHasher hasher = new StaticHasher(lst.iterator(), shape);
- assertEquals(17, hasher.size());
- final OfInt iter = hasher.getBits(shape);
- for (int i = 0; i < 17; i++) {
- assertTrue(iter.hasNext());
- assertEquals(i, iter.nextInt());
+ while (iter1.hasNext()) {
+ assertTrue("Not enough data in second hasher", iter2.hasNext());
+ assertEquals(iter1.nextInt(), iter2.nextInt());
}
- assertFalse(iter.hasNext());
-
+ assertFalse("Too much data in second hasher", iter2.hasNext());
}
/**
- * Tests that gitBits does not return duplicates and orders the indices.
+ * Tests that passing a hasher other than a StaticHahser to the constructor works as
+ * expected.
*/
@Test
- public void testGetBits_DuplicateValues() {
- final int[] input = {6, 69, 44, 19, 10, 57, 48, 23, 70, 61, 36, 11, 2, 49, 24, 15, 62, 1, 63, 53, 43, 17, 7, 69, 59,
- 49, 39, 13, 3, 65, 55, 45, 35, 25};
- final int[] expected = {1, 2, 3, 6, 7, 10, 11, 13, 15, 17, 19, 23, 24, 25, 35, 36, 39, 43, 44, 45, 48, 49, 53, 55, 57,
- 59, 61, 62, 63, 65, 69, 70};
+ public void testConstructor_Hasher() {
+ final int[] expected = {1, 3, 5, 7, 9};
- final StaticHasher hasher = new StaticHasher(Arrays.stream(input).iterator(), shape);
+ final Hasher testHasher = new Hasher() {
+
+ @Override
+ public OfInt getBits(final Shape shape) {
+ final int[] values = {1, 3, 5, 7, 9, 3, 5, 1};
+ return Arrays.stream(values).iterator();
+ }
+
+ @Override
+ public HashFunctionIdentity getHashFunctionIdentity() {
+ return testFunction;
+ }
+ @Override
+ public boolean isEmpty() { return false; }
+ };
+
+ final StaticHasher hasher = new StaticHasher(testHasher, shape);
final OfInt iter = hasher.getBits(shape);
for (final int element : expected) {
assertTrue(iter.hasNext());
@@ -132,20 +145,34 @@ public class StaticHasherTest {
}
/**
- * Tests that gitBits is called with the wrong shape an exeption is thrown.
+ * Tests that passing a hasher other than a StaticHahser and the wrong Shape to the
+ * constructor throws an IllegalArgumentException.
*/
@Test
- public void testGetBits_WrongShape() {
- final List<Integer> lst = Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
- final StaticHasher hasher = new StaticHasher(lst.iterator(), shape);
+ public void testConstructor_Hasher_WrongShape() {
+ final Hasher testHasher = new Hasher() {
+
+ @Override
+ public OfInt getBits(final Shape shape) {
+ final int[] values = {1, 3, 5, 7, 9, 3, 5, 1};
+ return Arrays.stream(values).iterator();
+ }
+
+ @Override
+ public HashFunctionIdentity getHashFunctionIdentity() {
+ return testFunctionX;
+ }
+
+ @Override
+ public boolean isEmpty() { return false; }
+ };
try {
- hasher.getBits(new Shape(testFunctionX, 3, 72, 17));
+ new StaticHasher(testHasher, shape);
fail("Should have thown IllegalArgumentException");
} catch (final IllegalArgumentException expected) {
// do nothing
}
-
}
/**
@@ -206,23 +233,6 @@ public class StaticHasherTest {
}
/**
- * Compare 2 static hashers to verify they have the same bits enabled.
- *
- * @param hasher1 the first static hasher.
- * @param hasher2 the second static hasher.
- */
- private void assertSameBits(final StaticHasher hasher1, final StaticHasher hasher2) {
- final OfInt iter1 = hasher1.getBits(shape);
- final OfInt iter2 = hasher2.getBits(shape);
-
- while (iter1.hasNext()) {
- assertTrue("Not enough data in second hasher", iter2.hasNext());
- assertEquals(iter1.nextInt(), iter2.nextInt());
- }
- assertFalse("Too much data in second hasher", iter2.hasNext());
- }
-
- /**
* Tests that the constructor that accepts a static hasher properly builds the hasher.
*/
@Test
@@ -256,31 +266,35 @@ public class StaticHasherTest {
}
/**
- * Tests that passing a hasher other than a StaticHahser to the constructor works as
- * expected.
+ * Tests that getBits returns the proper values.
*/
@Test
- public void testConstructor_Hasher() {
- final int[] expected = {1, 3, 5, 7, 9};
+ public void testGetBits() {
+ final List<Integer> lst = Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
- final Hasher testHasher = new Hasher() {
+ final StaticHasher hasher = new StaticHasher(lst.iterator(), shape);
+ assertEquals(17, hasher.size());
+ final OfInt iter = hasher.getBits(shape);
+ for (int i = 0; i < 17; i++) {
+ assertTrue(iter.hasNext());
+ assertEquals(i, iter.nextInt());
+ }
+ assertFalse(iter.hasNext());
- @Override
- public boolean isEmpty() { return false; }
+ }
- @Override
- public HashFunctionIdentity getHashFunctionIdentity() {
- return testFunction;
- }
+ /**
+ * Tests that gitBits does not return duplicates and orders the indices.
+ */
+ @Test
+ public void testGetBits_DuplicateValues() {
+ final int[] input = {6, 69, 44, 19, 10, 57, 48, 23, 70, 61, 36, 11, 2, 49, 24, 15, 62, 1, 63, 53, 43, 17, 7, 69, 59,
+ 49, 39, 13, 3, 65, 55, 45, 35, 25};
+ final int[] expected = {1, 2, 3, 6, 7, 10, 11, 13, 15, 17, 19, 23, 24, 25, 35, 36, 39, 43, 44, 45, 48, 49, 53, 55, 57,
+ 59, 61, 62, 63, 65, 69, 70};
- @Override
- public OfInt getBits(final Shape shape) {
- final int[] values = {1, 3, 5, 7, 9, 3, 5, 1};
- return Arrays.stream(values).iterator();
- }
- };
+ final StaticHasher hasher = new StaticHasher(Arrays.stream(input).iterator(), shape);
- final StaticHasher hasher = new StaticHasher(testHasher, shape);
final OfInt iter = hasher.getBits(shape);
for (final int element : expected) {
assertTrue(iter.hasNext());
@@ -290,34 +304,20 @@ public class StaticHasherTest {
}
/**
- * Tests that passing a hasher other than a StaticHahser and the wrong Shape to the
- * constructor throws an IllegalArgumentException.
+ * Tests that gitBits is called with the wrong shape an exeption is thrown.
*/
@Test
- public void testConstructor_Hasher_WrongShape() {
- final Hasher testHasher = new Hasher() {
-
- @Override
- public boolean isEmpty() { return false; }
-
- @Override
- public HashFunctionIdentity getHashFunctionIdentity() {
- return testFunctionX;
- }
-
- @Override
- public OfInt getBits(final Shape shape) {
- final int[] values = {1, 3, 5, 7, 9, 3, 5, 1};
- return Arrays.stream(values).iterator();
- }
- };
+ public void testGetBits_WrongShape() {
+ final List<Integer> lst = Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ final StaticHasher hasher = new StaticHasher(lst.iterator(), shape);
try {
- new StaticHasher(testHasher, shape);
+ hasher.getBits(new Shape(testFunctionX, 3, 72, 17));
fail("Should have thown IllegalArgumentException");
} catch (final IllegalArgumentException expected) {
// do nothing
}
+
}
/**