You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by xe...@apache.org on 2012/03/17 00:22:18 UTC
git commit: update MurmurHash to version 3 patch by Vijay;
reviewed by Pavel Yaskevich for CASSANDRA-2975
Updated Branches:
refs/heads/trunk 5ed172053 -> d765b246e
update MurmurHash to version 3
patch by Vijay; reviewed by Pavel Yaskevich for CASSANDRA-2975
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/d765b246
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/d765b246
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/d765b246
Branch: refs/heads/trunk
Commit: d765b246e72e7bb455e9ec9440ae735b1ee8543f
Parents: 5ed1720
Author: Pavel Yaskevich <xe...@apache.org>
Authored: Sat Mar 17 01:52:06 2012 +0300
Committer: Pavel Yaskevich <xe...@apache.org>
Committed: Sat Mar 17 01:56:58 2012 +0300
----------------------------------------------------------------------
CHANGES.txt | 1 +
src/java/org/apache/cassandra/db/ColumnIndex.java | 15 +-
.../org/apache/cassandra/db/RowIndexEntry.java | 22 +--
.../db/columniterator/SSTableNamesIterator.java | 2 +-
.../apache/cassandra/io/sstable/Descriptor.java | 12 +-
.../apache/cassandra/io/sstable/IndexHelper.java | 12 +-
.../io/sstable/SSTableIdentityIterator.java | 2 +-
.../apache/cassandra/io/sstable/SSTableReader.java | 16 +--
.../apache/cassandra/io/sstable/SSTableWriter.java | 9 +-
.../org/apache/cassandra/utils/BloomFilter.java | 75 ++------
.../cassandra/utils/BloomFilterSerializer.java | 6 +-
src/java/org/apache/cassandra/utils/Filter.java | 2 +
.../org/apache/cassandra/utils/FilterFactory.java | 147 +++++++++++++++
.../apache/cassandra/utils/LegacyBloomFilter.java | 7 +-
.../utils/LegacyBloomFilterSerializer.java | 11 +-
.../apache/cassandra/utils/Murmur2BloomFilter.java | 53 +++++
.../apache/cassandra/utils/Murmur3BloomFilter.java | 51 +++++
.../org/apache/cassandra/utils/MurmurHash.java | 107 +++++++++++-
.../cassandra/utils/LongBloomFilterTest.java | 75 ++++++--
.../cassandra/io/sstable/DescriptorTest.java | 17 ++-
.../apache/cassandra/utils/BloomFilterTest.java | 15 +-
.../apache/cassandra/utils/FilterTestHelper.java | 3 +-
.../cassandra/utils/LegacyBloomFilterTest.java | 4 +-
.../apache/cassandra/utils/SerializationsTest.java | 30 ++-
24 files changed, 529 insertions(+), 165 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 646afd5..8eef80c 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,6 +1,7 @@
1.2-dev
* Track tombstone expiration and compact when tombstone content is
higher than a configurable threshold, default 20% (CASSANDRA-3442)
+ * update MurmurHash to version 3 (CASSANDRA-2975)
1.1.1-dev
http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/src/java/org/apache/cassandra/db/ColumnIndex.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/ColumnIndex.java b/src/java/org/apache/cassandra/db/ColumnIndex.java
index ff3af19..b4c2f90 100644
--- a/src/java/org/apache/cassandra/db/ColumnIndex.java
+++ b/src/java/org/apache/cassandra/db/ColumnIndex.java
@@ -17,9 +17,6 @@
*/
package org.apache.cassandra.db;
-import java.io.DataOutput;
-import java.io.IOError;
-import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collections;
@@ -28,23 +25,23 @@ import java.util.List;
import org.apache.cassandra.config.DatabaseDescriptor;
import org.apache.cassandra.io.sstable.IndexHelper;
-import org.apache.cassandra.io.util.DataOutputBuffer;
import org.apache.cassandra.io.util.IIterableColumns;
-import org.apache.cassandra.utils.BloomFilter;
+import org.apache.cassandra.utils.Filter;
+import org.apache.cassandra.utils.FilterFactory;
public class ColumnIndex
{
public final List<IndexHelper.IndexInfo> columnsIndex;
- public final BloomFilter bloomFilter;
+ public final Filter bloomFilter;
- private static final ColumnIndex EMPTY = new ColumnIndex(Collections.<IndexHelper.IndexInfo>emptyList(), BloomFilter.emptyFilter());
+ private static final ColumnIndex EMPTY = new ColumnIndex(Collections.<IndexHelper.IndexInfo>emptyList(), FilterFactory.emptyFilter());
private ColumnIndex(int estimatedColumnCount)
{
- this(new ArrayList<IndexHelper.IndexInfo>(), BloomFilter.getFilter(estimatedColumnCount, 4));
+ this(new ArrayList<IndexHelper.IndexInfo>(), FilterFactory.getFilter(estimatedColumnCount, 4));
}
- private ColumnIndex(List<IndexHelper.IndexInfo> columnsIndex, BloomFilter bloomFilter)
+ private ColumnIndex(List<IndexHelper.IndexInfo> columnsIndex, Filter bloomFilter)
{
this.columnsIndex = columnsIndex;
this.bloomFilter = bloomFilter;
http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/src/java/org/apache/cassandra/db/RowIndexEntry.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/RowIndexEntry.java b/src/java/org/apache/cassandra/db/RowIndexEntry.java
index 46c6604..0b910c8 100644
--- a/src/java/org/apache/cassandra/db/RowIndexEntry.java
+++ b/src/java/org/apache/cassandra/db/RowIndexEntry.java
@@ -24,14 +24,11 @@ import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
-import org.apache.cassandra.io.ISerializer;
import org.apache.cassandra.io.sstable.Descriptor;
import org.apache.cassandra.io.sstable.IndexHelper;
-import org.apache.cassandra.io.util.FileDataInput;
import org.apache.cassandra.io.util.FileUtils;
-import org.apache.cassandra.service.StorageService;
-import org.apache.cassandra.utils.BloomFilter;
-import org.apache.cassandra.utils.ByteBufferUtil;
+import org.apache.cassandra.utils.Filter;
+import org.apache.cassandra.utils.FilterFactory;
public class RowIndexEntry
{
@@ -67,7 +64,7 @@ public class RowIndexEntry
return Collections.<IndexHelper.IndexInfo>emptyList();
}
- public BloomFilter bloomFilter()
+ public Filter bloomFilter()
{
throw new UnsupportedOperationException();
}
@@ -85,7 +82,7 @@ public class RowIndexEntry
dos.writeInt(rie.columnsIndex().size());
for (IndexHelper.IndexInfo info : rie.columnsIndex())
info.serialize(dos);
- BloomFilter.serializer().serialize(rie.bloomFilter(), dos);
+ FilterFactory.serialize(rie.bloomFilter(), dos);
}
else
{
@@ -107,7 +104,7 @@ public class RowIndexEntry
List<IndexHelper.IndexInfo> columnsIndex = new ArrayList<IndexHelper.IndexInfo>(entries);
for (int i = 0; i < entries; i++)
columnsIndex.add(IndexHelper.IndexInfo.deserialize(dis));
- BloomFilter bf = BloomFilter.serializer().deserialize(dis);
+ Filter bf = FilterFactory.deserialize(dis, descriptor.filterType);
return new IndexedEntry(position, new DeletionInfo(mfda, ldt), columnsIndex, bf);
}
else
@@ -142,9 +139,9 @@ public class RowIndexEntry
{
private final DeletionInfo deletionInfo;
private final List<IndexHelper.IndexInfo> columnsIndex;
- private final BloomFilter bloomFilter;
+ private final Filter bloomFilter;
- private IndexedEntry(long position, DeletionInfo deletionInfo, List<IndexHelper.IndexInfo> columnsIndex, BloomFilter bloomFilter)
+ private IndexedEntry(long position, DeletionInfo deletionInfo, List<IndexHelper.IndexInfo> columnsIndex, Filter bloomFilter)
{
super(position);
assert deletionInfo != null;
@@ -167,7 +164,7 @@ public class RowIndexEntry
}
@Override
- public BloomFilter bloomFilter()
+ public Filter bloomFilter()
{
return bloomFilter;
}
@@ -178,7 +175,8 @@ public class RowIndexEntry
size += DBConstants.INT_SIZE; // number of entries
for (IndexHelper.IndexInfo info : columnsIndex)
size += info.serializedSize();
- return size + (int)bloomFilter.serializedSize();
+
+ return size + (int) FilterFactory.serializedSize(bloomFilter);
}
}
}
http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java b/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java
index 4d3a148..068933c 100644
--- a/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java
+++ b/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java
@@ -134,7 +134,7 @@ public class SSTableNamesIterator extends SimpleAbstractColumnIterator implement
else
{
assert file != null;
- bf = IndexHelper.defreezeBloomFilter(file, sstable.descriptor.usesOldBloomFilter);
+ bf = IndexHelper.defreezeBloomFilter(file, sstable.descriptor.filterType);
indexList = IndexHelper.deserializeIndex(file);
}
http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/src/java/org/apache/cassandra/io/sstable/Descriptor.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/io/sstable/Descriptor.java b/src/java/org/apache/cassandra/io/sstable/Descriptor.java
index 4c9ab2d..dff0e98 100644
--- a/src/java/org/apache/cassandra/io/sstable/Descriptor.java
+++ b/src/java/org/apache/cassandra/io/sstable/Descriptor.java
@@ -22,6 +22,7 @@ import java.util.StringTokenizer;
import com.google.common.base.Objects;
+import org.apache.cassandra.utils.FilterFactory;
import org.apache.cassandra.utils.Pair;
import static org.apache.cassandra.io.sstable.Component.separator;
@@ -55,7 +56,7 @@ public class Descriptor
// hc (1.0.4): records partitioner in metadata component
// ia (1.2.0): column indexes are promoted to the index file
// records estimated histogram of deletion times in tombstones
- public static final String CURRENT_VERSION = "ia";
+ public static final String CURRENT_VERSION = "ib";
public final File directory;
/** version has the following format: <code>[a-z]+</code> */
@@ -70,13 +71,13 @@ public class Descriptor
public final boolean hasIntRowSize;
public final boolean hasEncodedKeys;
public final boolean isLatestVersion;
- public final boolean usesOldBloomFilter;
public final boolean metadataIncludesReplayPosition;
public final boolean tracksMaxTimestamp;
public final boolean hasCompressionRatio;
public final boolean hasPartitioner;
public final boolean tracksTombstones;
public final boolean hasPromotedIndexes;
+ public final FilterFactory.Type filterType;
/**
* A descriptor that assumes CURRENT_VERSION.
@@ -100,7 +101,6 @@ public class Descriptor
hasStringsInBloomFilter = version.compareTo("c") < 0;
hasIntRowSize = version.compareTo("d") < 0;
hasEncodedKeys = version.compareTo("e") < 0;
- usesOldBloomFilter = version.compareTo("f") < 0;
metadataIncludesReplayPosition = version.compareTo("g") >= 0;
tracksMaxTimestamp = version.compareTo("h") >= 0;
hasCompressionRatio = version.compareTo("hb") >= 0;
@@ -108,6 +108,12 @@ public class Descriptor
tracksTombstones = version.compareTo("ia") >= 0;
hasPromotedIndexes = version.compareTo("ia") >= 0;
isLatestVersion = version.compareTo(CURRENT_VERSION) == 0;
+ if (version.compareTo("f") < 0)
+ filterType = FilterFactory.Type.SHA;
+ else if (version.compareTo("ia") <= 0)
+ filterType = FilterFactory.Type.MURMUR2;
+ else
+ filterType = FilterFactory.Type.MURMUR3;
}
public String filenameFor(Component component)
http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/src/java/org/apache/cassandra/io/sstable/IndexHelper.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/io/sstable/IndexHelper.java b/src/java/org/apache/cassandra/io/sstable/IndexHelper.java
index 798dc19..5c7d645 100644
--- a/src/java/org/apache/cassandra/io/sstable/IndexHelper.java
+++ b/src/java/org/apache/cassandra/io/sstable/IndexHelper.java
@@ -104,9 +104,9 @@ public class IndexHelper
return indexList;
}
- public static Filter defreezeBloomFilter(FileDataInput file, boolean usesOldBloomFilter) throws IOException
+ public static Filter defreezeBloomFilter(FileDataInput file, FilterFactory.Type type) throws IOException
{
- return defreezeBloomFilter(file, Integer.MAX_VALUE, usesOldBloomFilter);
+ return defreezeBloomFilter(file, Integer.MAX_VALUE, type);
}
/**
@@ -114,14 +114,14 @@ public class IndexHelper
*
* @param file - source file
* @param maxSize - sanity check: if filter claimes to be larger than this it is bogus
- * @param useOldBuffer - do we need to reuse old buffer?
+ * @param type - Bloom Filter type.
*
* @return bloom filter summarizing the column information
* @throws java.io.IOException if an I/O error occurs.
* Guarantees that file's current position will be just after the bloom filter, even if
* the filter cannot be deserialized, UNLESS EOFException is thrown.
*/
- public static Filter defreezeBloomFilter(FileDataInput file, long maxSize, boolean useOldBuffer) throws IOException
+ public static Filter defreezeBloomFilter(FileDataInput file, long maxSize, FilterFactory.Type type) throws IOException
{
int size = file.readInt();
if (size > maxSize || size <= 0)
@@ -129,9 +129,7 @@ public class IndexHelper
ByteBuffer bytes = file.readBytes(size);
DataInputStream stream = new DataInputStream(ByteBufferUtil.inputStream(bytes));
- return useOldBuffer
- ? LegacyBloomFilter.serializer().deserialize(stream)
- : BloomFilter.serializer().deserialize(stream);
+ return FilterFactory.deserialize(stream, type);
}
/**
http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/src/java/org/apache/cassandra/io/sstable/SSTableIdentityIterator.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/io/sstable/SSTableIdentityIterator.java b/src/java/org/apache/cassandra/io/sstable/SSTableIdentityIterator.java
index 63b1322..ca04ce9 100644
--- a/src/java/org/apache/cassandra/io/sstable/SSTableIdentityIterator.java
+++ b/src/java/org/apache/cassandra/io/sstable/SSTableIdentityIterator.java
@@ -116,7 +116,7 @@ public class SSTableIdentityIterator implements Comparable<SSTableIdentityIterat
{
try
{
- IndexHelper.defreezeBloomFilter(file, dataSize, sstable.descriptor.usesOldBloomFilter);
+ IndexHelper.defreezeBloomFilter(file, dataSize, sstable.descriptor.filterType);
}
catch (Exception e)
{
http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/src/java/org/apache/cassandra/io/sstable/SSTableReader.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/io/sstable/SSTableReader.java b/src/java/org/apache/cassandra/io/sstable/SSTableReader.java
index a41d2a7..864e11f 100644
--- a/src/java/org/apache/cassandra/io/sstable/SSTableReader.java
+++ b/src/java/org/apache/cassandra/io/sstable/SSTableReader.java
@@ -309,7 +309,7 @@ public class SSTableReader extends SSTable
{
if (!components.contains(Component.FILTER))
{
- bf = BloomFilter.emptyFilter();
+ bf = FilterFactory.emptyFilter();
return;
}
@@ -317,14 +317,7 @@ public class SSTableReader extends SSTable
try
{
stream = new DataInputStream(new BufferedInputStream(new FileInputStream(descriptor.filenameFor(Component.FILTER))));
- if (descriptor.usesOldBloomFilter)
- {
- bf = LegacyBloomFilter.serializer().deserialize(stream);
- }
- else
- {
- bf = BloomFilter.serializer().deserialize(stream);
- }
+ bf = FilterFactory.deserialize(stream, descriptor.filterType);
}
finally
{
@@ -455,10 +448,7 @@ public class SSTableReader extends SSTable
public long getBloomFilterSerializedSize()
{
- if (descriptor.usesOldBloomFilter)
- return LegacyBloomFilter.serializer().serializedSize((LegacyBloomFilter) bf);
- else
- return BloomFilter.serializer().serializedSize((BloomFilter) bf);
+ return FilterFactory.serializedSize(bf, descriptor.filterType);
}
/**
http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java b/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java
index 9144044..55182d9 100644
--- a/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java
+++ b/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java
@@ -368,7 +368,7 @@ public class SSTableWriter extends SSTable
private final SequentialWriter indexFile;
public final SegmentedFile.Builder builder;
public final IndexSummary summary;
- public final BloomFilter bf;
+ public final Filter bf;
private FileMark mark;
IndexWriter(long keyCount) throws IOException
@@ -384,9 +384,8 @@ public class SSTableWriter extends SSTable
logger.error("Bloom filter FP chance of zero isn't supposed to happen");
fpChance = null;
}
- bf = fpChance == null
- ? BloomFilter.getFilter(keyCount, 15)
- : BloomFilter.getFilter(keyCount, fpChance);
+ bf = fpChance == null ? FilterFactory.getFilter(keyCount, 15)
+ : FilterFactory.getFilter(keyCount, fpChance);
}
public void append(DecoratedKey key, RowIndexEntry indexEntry) throws IOException
@@ -410,7 +409,7 @@ public class SSTableWriter extends SSTable
// bloom filter
FileOutputStream fos = new FileOutputStream(descriptor.filenameFor(SSTable.COMPONENT_FILTER));
DataOutputStream stream = new DataOutputStream(fos);
- BloomFilter.serializer().serialize(bf, stream);
+ FilterFactory.serialize(bf, stream, descriptor.filterType);
stream.flush();
fos.getFD().sync();
stream.close();
http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/src/java/org/apache/cassandra/utils/BloomFilter.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/BloomFilter.java b/src/java/org/apache/cassandra/utils/BloomFilter.java
index f664264..3ca62e5 100644
--- a/src/java/org/apache/cassandra/utils/BloomFilter.java
+++ b/src/java/org/apache/cassandra/utils/BloomFilter.java
@@ -19,91 +19,45 @@ package org.apache.cassandra.utils;
import java.nio.ByteBuffer;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
import org.apache.cassandra.utils.obs.OpenBitSet;
-public class BloomFilter extends Filter
+public abstract class BloomFilter extends Filter
{
- private static final Logger logger = LoggerFactory.getLogger(BloomFilter.class);
private static final int EXCESS = 20;
- static final BloomFilterSerializer serializer = new BloomFilterSerializer();
public final OpenBitSet bitset;
- BloomFilter(int hashes, OpenBitSet bs)
+ BloomFilter(int hashes, long numElements, int bucketsPer)
{
hashCount = hashes;
- bitset = bs;
- }
-
- public static BloomFilter emptyFilter()
- {
- return new BloomFilter(0, bucketsFor(0, 0));
- }
-
- public static BloomFilterSerializer serializer()
- {
- return serializer;
+ bitset = new OpenBitSet(numElements * bucketsPer + EXCESS);
}
- private static OpenBitSet bucketsFor(long numElements, int bucketsPer)
+ BloomFilter(int hashes, OpenBitSet bitset)
{
- return new OpenBitSet(numElements * bucketsPer + EXCESS);
- }
-
- /**
- * @return A BloomFilter with the lowest practical false positive probability
- * for the given number of elements.
- */
- public static BloomFilter getFilter(long numElements, int targetBucketsPerElem)
- {
- int maxBucketsPerElement = Math.max(1, BloomCalculations.maxBucketsPerElement(numElements));
- int bucketsPerElement = Math.min(targetBucketsPerElem, maxBucketsPerElement);
- if (bucketsPerElement < targetBucketsPerElem)
- {
- logger.warn(String.format("Cannot provide an optimal BloomFilter for %d elements (%d/%d buckets per element).",
- numElements, bucketsPerElement, targetBucketsPerElem));
- }
- BloomCalculations.BloomSpecification spec = BloomCalculations.computeBloomSpec(bucketsPerElement);
- if (logger.isTraceEnabled())
- logger.trace("Creating bloom filter for {} elements and spec {}", numElements, spec);
- return new BloomFilter(spec.K, bucketsFor(numElements, spec.bucketsPerElement));
- }
-
- /**
- * @return The smallest BloomFilter that can provide the given false positive
- * probability rate for the given number of elements.
- *
- * Asserts that the given probability can be satisfied using this filter.
- */
- public static BloomFilter getFilter(long numElements, double maxFalsePosProbability)
- {
- assert maxFalsePosProbability <= 1.0 : "Invalid probability";
- int bucketsPerElement = BloomCalculations.maxBucketsPerElement(numElements);
- BloomCalculations.BloomSpecification spec = BloomCalculations.computeBloomSpec(bucketsPerElement, maxFalsePosProbability);
- return new BloomFilter(spec.K, bucketsFor(numElements, spec.bucketsPerElement));
+ this.hashCount = hashes;
+ this.bitset = bitset;
}
private long[] getHashBuckets(ByteBuffer key)
{
- return BloomFilter.getHashBuckets(key, hashCount, bitset.size());
+ return getHashBuckets(key, hashCount, bitset.size());
}
+ protected abstract long[] hash(ByteBuffer b, int position, int remaining, long seed);
+
// Murmur is faster than an SHA-based approach and provides as-good collision
// resistance. The combinatorial generation approach described in
// http://www.eecs.harvard.edu/~kirsch/pubs/bbbf/esa06.pdf
// does prove to work in actual tests, and is obviously faster
// than performing further iterations of murmur.
- static long[] getHashBuckets(ByteBuffer b, int hashCount, long max)
+ long[] getHashBuckets(ByteBuffer b, int hashCount, long max)
{
long[] result = new long[hashCount];
- long hash1 = MurmurHash.hash64(b, b.position(), b.remaining(), 0L);
- long hash2 = MurmurHash.hash64(b, b.position(), b.remaining(), hash1);
+ long[] hash = this.hash(b, b.position(), b.remaining(), 0L);
for (int i = 0; i < hashCount; ++i)
{
- result[i] = Math.abs((hash1 + (long)i * hash2) % max);
+ result[i] = Math.abs((hash[0] + (long)i * hash[1]) % max);
}
return result;
}
@@ -132,9 +86,4 @@ public class BloomFilter extends Filter
{
bitset.clear(0, bitset.size());
}
-
- public long serializedSize()
- {
- return serializer.serializedSize(this);
- }
}
http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/src/java/org/apache/cassandra/utils/BloomFilterSerializer.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/BloomFilterSerializer.java b/src/java/org/apache/cassandra/utils/BloomFilterSerializer.java
index c0834a6..4d3c4af 100644
--- a/src/java/org/apache/cassandra/utils/BloomFilterSerializer.java
+++ b/src/java/org/apache/cassandra/utils/BloomFilterSerializer.java
@@ -25,7 +25,7 @@ import org.apache.cassandra.db.DBConstants;
import org.apache.cassandra.io.ISerializer;
import org.apache.cassandra.utils.obs.OpenBitSet;
-public class BloomFilterSerializer implements ISerializer<BloomFilter>
+abstract class BloomFilterSerializer implements ISerializer<BloomFilter>
{
public void serialize(BloomFilter bf, DataOutput dos) throws IOException
{
@@ -59,9 +59,11 @@ public class BloomFilterSerializer implements ISerializer<BloomFilter>
bits[i] = dis.readLong();
}
- return new BloomFilter(hashes, bs);
+ return createFilter(hashes, bs);
}
+ protected abstract BloomFilter createFilter(int hashes, OpenBitSet bs);
+
/**
* Calculates a serialized size of the given Bloom Filter
* @see BloomFilterSerializer#serialize(BloomFilter, DataOutput)
http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/src/java/org/apache/cassandra/utils/Filter.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/Filter.java b/src/java/org/apache/cassandra/utils/Filter.java
index 138cb31..f7ce1f3 100644
--- a/src/java/org/apache/cassandra/utils/Filter.java
+++ b/src/java/org/apache/cassandra/utils/Filter.java
@@ -31,4 +31,6 @@ public abstract class Filter
public abstract void add(ByteBuffer key);
public abstract boolean isPresent(ByteBuffer key);
+
+ public abstract void clear();
}
http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/src/java/org/apache/cassandra/utils/FilterFactory.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/FilterFactory.java b/src/java/org/apache/cassandra/utils/FilterFactory.java
new file mode 100644
index 0000000..93c642f
--- /dev/null
+++ b/src/java/org/apache/cassandra/utils/FilterFactory.java
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cassandra.utils;
+
+import java.io.DataInput;
+import java.io.DataInputStream;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class FilterFactory
+{
+ private static final Logger logger = LoggerFactory.getLogger(FilterFactory.class);
+
+ public enum Type
+ {
+ SHA, MURMUR2, MURMUR3
+ }
+
+ public static void serialize(Filter bf, DataOutput output) throws IOException
+ {
+ serialize(bf, output, Type.MURMUR3);
+ }
+
+ public static void serialize(Filter bf, DataOutput output, Type type) throws IOException
+ {
+ switch (type)
+ {
+ case SHA:
+ LegacyBloomFilter.serializer.serialize((LegacyBloomFilter) bf, output);
+ break;
+ case MURMUR2:
+ Murmur2BloomFilter.serializer.serialize((Murmur2BloomFilter) bf, output);
+ break;
+ default:
+ Murmur3BloomFilter.serializer.serialize((Murmur3BloomFilter) bf, output);
+ break;
+ }
+ }
+
+ public static Filter deserialize(DataInput input, Type type) throws IOException
+ {
+ switch (type)
+ {
+ case SHA:
+ return LegacyBloomFilter.serializer.deserialize(input);
+ case MURMUR2:
+ return Murmur2BloomFilter.serializer.deserialize(input);
+ default:
+ return Murmur3BloomFilter.serializer.deserialize(input);
+ }
+ }
+
+ public static long serializedSize(Filter bf)
+ {
+ return serializedSize(bf, Type.MURMUR3);
+ }
+
+ public static long serializedSize(Filter bf, Type type)
+ {
+ switch (type)
+ {
+ case SHA:
+ return LegacyBloomFilter.serializer.serializedSize((LegacyBloomFilter) bf);
+ case MURMUR2:
+ return Murmur2BloomFilter.serializer.serializedSize((Murmur2BloomFilter) bf);
+ default:
+ return Murmur3BloomFilter.serializer.serializedSize((Murmur3BloomFilter) bf);
+ }
+ }
+
+ /**
+ * @return A BloomFilter with the lowest practical false positive
+ * probability for the given number of elements.
+ */
+ public static Filter getFilter(long numElements, int targetBucketsPerElem)
+ {
+ return getFilter(numElements, targetBucketsPerElem, Type.MURMUR3);
+ }
+
+ // helper method for test.
+ static Filter getFilter(long numElements, int targetBucketsPerElem, Type type)
+ {
+ int maxBucketsPerElement = Math.max(1, BloomCalculations.maxBucketsPerElement(numElements));
+ int bucketsPerElement = Math.min(targetBucketsPerElem, maxBucketsPerElement);
+ if (bucketsPerElement < targetBucketsPerElem)
+ {
+ logger.warn(String.format("Cannot provide an optimal BloomFilter for %d elements (%d/%d buckets per element).", numElements, bucketsPerElement, targetBucketsPerElem));
+ }
+ BloomCalculations.BloomSpecification spec = BloomCalculations.computeBloomSpec(bucketsPerElement);
+ return createFilter(spec.K, numElements, spec.bucketsPerElement, type);
+ }
+
+ /**
+ * @return The smallest BloomFilter that can provide the given false
+ * positive probability rate for the given number of elements.
+ *
+ * Asserts that the given probability can be satisfied using this
+ * filter.
+ */
+ public static Filter getFilter(long numElements, double maxFalsePosProbability)
+ {
+ return getFilter(numElements, maxFalsePosProbability, Type.MURMUR3);
+ }
+
+ // helper method for test.
+ static Filter getFilter(long numElements, double maxFalsePosProbability, Type type)
+ {
+ assert maxFalsePosProbability <= 1.0 : "Invalid probability";
+ int bucketsPerElement = BloomCalculations.maxBucketsPerElement(numElements);
+ BloomCalculations.BloomSpecification spec = BloomCalculations.computeBloomSpec(bucketsPerElement, maxFalsePosProbability);
+ return createFilter(spec.K, numElements, spec.bucketsPerElement, type);
+ }
+
+ private static Filter createFilter(int hash, long numElements, int bucketsPer, Type type)
+ {
+ switch (type)
+ {
+ case MURMUR2:
+ return new Murmur2BloomFilter(hash, numElements, bucketsPer);
+ default:
+ return new Murmur3BloomFilter(hash, numElements, bucketsPer);
+ }
+ }
+
+ public static BloomFilter emptyFilter()
+ {
+ return new Murmur3BloomFilter(0, 0, 0);
+ }
+}
http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/src/java/org/apache/cassandra/utils/LegacyBloomFilter.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/LegacyBloomFilter.java b/src/java/org/apache/cassandra/utils/LegacyBloomFilter.java
index 9942fbc..6f7269e 100644
--- a/src/java/org/apache/cassandra/utils/LegacyBloomFilter.java
+++ b/src/java/org/apache/cassandra/utils/LegacyBloomFilter.java
@@ -27,12 +27,7 @@ public class LegacyBloomFilter extends Filter
{
private static final int EXCESS = 20;
private static final Logger logger = LoggerFactory.getLogger(LegacyBloomFilter.class);
- static final LegacyBloomFilterSerializer serializer = new LegacyBloomFilterSerializer();
-
- public static LegacyBloomFilterSerializer serializer()
- {
- return serializer;
- }
+ public static final LegacyBloomFilterSerializer serializer = new LegacyBloomFilterSerializer();
private BitSet filter;
http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/src/java/org/apache/cassandra/utils/LegacyBloomFilterSerializer.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/LegacyBloomFilterSerializer.java b/src/java/org/apache/cassandra/utils/LegacyBloomFilterSerializer.java
index a556876..a0de19b 100644
--- a/src/java/org/apache/cassandra/utils/LegacyBloomFilterSerializer.java
+++ b/src/java/org/apache/cassandra/utils/LegacyBloomFilterSerializer.java
@@ -32,10 +32,17 @@ public class LegacyBloomFilterSerializer
// oos.flush();
}
- public LegacyBloomFilter deserialize(DataInputStream dis) throws IOException
+ public LegacyBloomFilter deserialize(final DataInput dis) throws IOException
{
int hashes = dis.readInt();
- ObjectInputStream ois = new ObjectInputStream(dis);
+ ObjectInputStream ois = new ObjectInputStream(new InputStream()
+ {
+ @Override
+ public int read() throws IOException
+ {
+ return dis.readByte() & 0xFF;
+ }
+ });
try
{
BitSet bs = (BitSet) ois.readObject();
http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/src/java/org/apache/cassandra/utils/Murmur2BloomFilter.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/Murmur2BloomFilter.java b/src/java/org/apache/cassandra/utils/Murmur2BloomFilter.java
new file mode 100644
index 0000000..df5a160
--- /dev/null
+++ b/src/java/org/apache/cassandra/utils/Murmur2BloomFilter.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cassandra.utils;
+
+import java.nio.ByteBuffer;
+
+import org.apache.cassandra.io.ISerializer;
+import org.apache.cassandra.utils.obs.OpenBitSet;
+
+public class Murmur2BloomFilter extends BloomFilter
+{
+ public static final ISerializer<BloomFilter> serializer = new Murmur2BloomFilterSerializer();
+
+ Murmur2BloomFilter(int hashes, long numElements, int bucketsPer)
+ {
+ super(hashes, numElements, bucketsPer);
+ }
+
+ private Murmur2BloomFilter(int hashes, OpenBitSet bs)
+ {
+ super(hashes, bs);
+ }
+
+ protected long[] hash(ByteBuffer b, int position, int remaining, long seed)
+ {
+ long hash1 = MurmurHash.hash2_64(b, b.position(), b.remaining(), seed);
+ long hash2 = MurmurHash.hash2_64(b, b.position(), b.remaining(), hash1);
+ return (new long[] { hash1, hash2 });
+ }
+
+ private static class Murmur2BloomFilterSerializer extends BloomFilterSerializer
+ {
+ protected BloomFilter createFilter(int hashes, OpenBitSet bs)
+ {
+ return new Murmur2BloomFilter(hashes, bs);
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/src/java/org/apache/cassandra/utils/Murmur3BloomFilter.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/Murmur3BloomFilter.java b/src/java/org/apache/cassandra/utils/Murmur3BloomFilter.java
new file mode 100644
index 0000000..304842a
--- /dev/null
+++ b/src/java/org/apache/cassandra/utils/Murmur3BloomFilter.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cassandra.utils;
+
+import java.nio.ByteBuffer;
+
+import org.apache.cassandra.io.ISerializer;
+import org.apache.cassandra.utils.obs.OpenBitSet;
+
+public class Murmur3BloomFilter extends BloomFilter
+{
+ public static final ISerializer<BloomFilter> serializer = new Murmur3BloomFilterSerializer();
+
+ Murmur3BloomFilter(int hashes, long numElements, int bucketsPer)
+ {
+ super(hashes, numElements, bucketsPer);
+ }
+
+ private Murmur3BloomFilter(int hashes, OpenBitSet bs)
+ {
+ super(hashes, bs);
+ }
+
+ protected long[] hash(ByteBuffer b, int position, int remaining, long seed)
+ {
+ return MurmurHash.hash3_x64_128(b, b.position(), b.remaining(), seed);
+ }
+
+ private static class Murmur3BloomFilterSerializer extends BloomFilterSerializer
+ {
+ protected BloomFilter createFilter(int hashes, OpenBitSet bs)
+ {
+ return new Murmur3BloomFilter(hashes, bs);
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/src/java/org/apache/cassandra/utils/MurmurHash.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/MurmurHash.java b/src/java/org/apache/cassandra/utils/MurmurHash.java
index 36f9ab5..d2a3f37 100644
--- a/src/java/org/apache/cassandra/utils/MurmurHash.java
+++ b/src/java/org/apache/cassandra/utils/MurmurHash.java
@@ -23,6 +23,9 @@ import java.nio.ByteBuffer;
* This is a very fast, non-cryptographic hash suitable for general hash-based
* lookup. See http://murmurhash.googlepages.com/ for more details.
*
+ * hash32() and hash64() are MurmurHash 2.0.
+ * hash3_x64_128() is MurmurHash 3.0.
+ *
* <p>
* The C version of MurmurHash 2.0 found at that site was ported to Java by
* Andrzej Bialecki (ab at getopt org).
@@ -85,7 +88,7 @@ public class MurmurHash
return h;
}
- public static long hash64(ByteBuffer key, int offset, int length, long seed)
+ public static long hash2_64(ByteBuffer key, int offset, int length, long seed)
{
long m64 = 0xc6a4a7935bd1e995L;
int r64 = 47;
@@ -140,4 +143,106 @@ public class MurmurHash
return h64;
}
+
+ protected static long getblock(ByteBuffer key, int offset, int index)
+ {
+ int i_8 = index << 3;
+ return ((long) key.get(offset + i_8 + 0) & 0xff) + (((long) key.get(offset + i_8 + 1) & 0xff) << 8) +
+ (((long) key.get(offset + i_8 + 2) & 0xff) << 16) + (((long) key.get(offset + i_8 + 3) & 0xff) << 24) +
+ (((long) key.get(offset + i_8 + 4) & 0xff) << 32) + (((long) key.get(offset + i_8 + 5) & 0xff) << 40) +
+ (((long) key.get(offset + i_8 + 6) & 0xff) << 48) + (((long) key.get(offset + i_8 + 7) & 0xff) << 56);
+ }
+
+ protected static long rotl64(long v, int n)
+ {
+ return ((v << n) | (v >>> (64 - n)));
+ }
+
+ protected static long fmix(long k)
+ {
+ k ^= k >>> 33;
+ k *= 0xff51afd7ed558ccdL;
+ k ^= k >>> 33;
+ k *= 0xc4ceb9fe1a85ec53L;
+ k ^= k >>> 33;
+
+ return k;
+ }
+
+ public static long[] hash3_x64_128(ByteBuffer key, int offset, int length, long seed)
+ {
+ final int nblocks = length >> 4; // Process as 128-bit blocks.
+
+ long h1 = seed;
+ long h2 = seed;
+
+ long c1 = 0x87c37b91114253d5L;
+ long c2 = 0x4cf5ad432745937fL;
+
+ //----------
+ // body
+
+ for(int i = 0; i < nblocks; i++)
+ {
+ int i_8 = i << 4;
+
+ long k1 = getblock(key, offset, i*2+0);
+ long k2 = getblock(key, offset, i*2+1);
+
+ k1 *= c1; k1 = rotl64(k1,31); k1 *= c2; h1 ^= k1;
+
+ h1 = rotl64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
+
+ k2 *= c2; k2 = rotl64(k2,33); k2 *= c1; h2 ^= k2;
+
+ h2 = rotl64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
+ }
+
+ //----------
+ // tail
+
+ // Advance offset to the unprocessed tail of the data.
+ offset += nblocks * 16;
+
+ long k1 = 0;
+ long k2 = 0;
+
+ switch(length & 15)
+ {
+ case 15: k2 ^= ((long) key.get(offset+14)) << 48;
+ case 14: k2 ^= ((long) key.get(offset+13)) << 40;
+ case 13: k2 ^= ((long) key.get(offset+12)) << 32;
+ case 12: k2 ^= ((long) key.get(offset+11)) << 24;
+ case 11: k2 ^= ((long) key.get(offset+10)) << 16;
+ case 10: k2 ^= ((long) key.get(offset+9)) << 8;
+ case 9: k2 ^= ((long) key.get(offset+8)) << 0;
+ k2 *= c2; k2 = rotl64(k2,33); k2 *= c1; h2 ^= k2;
+
+ case 8: k1 ^= ((long) key.get(offset+7)) << 56;
+ case 7: k1 ^= ((long) key.get(offset+6)) << 48;
+ case 6: k1 ^= ((long) key.get(offset+5)) << 40;
+ case 5: k1 ^= ((long) key.get(offset+4)) << 32;
+ case 4: k1 ^= ((long) key.get(offset+3)) << 24;
+ case 3: k1 ^= ((long) key.get(offset+2)) << 16;
+ case 2: k1 ^= ((long) key.get(offset+1)) << 8;
+ case 1: k1 ^= ((long) key.get(offset));
+ k1 *= c1; k1 = rotl64(k1,31); k1 *= c2; h1 ^= k1;
+ };
+
+ //----------
+ // finalization
+
+ h1 ^= length; h2 ^= length;
+
+ h1 += h2;
+ h2 += h1;
+
+ h1 = fmix(h1);
+ h2 = fmix(h2);
+
+ h1 += h2;
+ h2 += h1;
+
+ return(new long[] {h1, h2});
+ }
}
http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/test/long/org/apache/cassandra/utils/LongBloomFilterTest.java
----------------------------------------------------------------------
diff --git a/test/long/org/apache/cassandra/utils/LongBloomFilterTest.java b/test/long/org/apache/cassandra/utils/LongBloomFilterTest.java
index e5a9e22..d4a4c34 100644
--- a/test/long/org/apache/cassandra/utils/LongBloomFilterTest.java
+++ b/test/long/org/apache/cassandra/utils/LongBloomFilterTest.java
@@ -21,45 +21,82 @@ package org.apache.cassandra.utils;
import java.util.Random;
import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
public class LongBloomFilterTest
{
- public BloomFilter bf;
+ private static final Logger logger = LoggerFactory.getLogger(LongBloomFilterTest.class);
/**
* NB: needs to run with -mx1G
*/
- @Test
- public void testBigInt()
+ public void testBigInt(FilterFactory.Type type)
{
int size = 10 * 1000 * 1000;
- bf = BloomFilter.getFilter(size, FilterTestHelper.spec.bucketsPerElement);
- FilterTestHelper.testFalsePositives(bf,
- new KeyGenerator.IntGenerator(size),
- new KeyGenerator.IntGenerator(size, size * 2));
+ Filter bf = FilterFactory.getFilter(size, FilterTestHelper.spec.bucketsPerElement, type);
+ double fp = FilterTestHelper.testFalsePositives(bf, new KeyGenerator.IntGenerator(size),
+ new KeyGenerator.IntGenerator(size, size * 2));
+ logger.info("Bloom filter false positive: {}", fp);
}
- @Test
- public void testBigRandom()
+ public void testBigRandom(FilterFactory.Type type)
{
int size = 10 * 1000 * 1000;
- bf = BloomFilter.getFilter(size, FilterTestHelper.spec.bucketsPerElement);
- FilterTestHelper.testFalsePositives(bf,
- new KeyGenerator.RandomStringGenerator(new Random().nextInt(), size),
- new KeyGenerator.RandomStringGenerator(new Random().nextInt(), size));
+ Filter bf = FilterFactory.getFilter(size, FilterTestHelper.spec.bucketsPerElement, type);
+ double fp = FilterTestHelper.testFalsePositives(bf, new KeyGenerator.RandomStringGenerator(new Random().nextInt(), size),
+ new KeyGenerator.RandomStringGenerator(new Random().nextInt(), size));
+ logger.info("Bloom filter false positive: {}", fp);
}
- @Test
- public void timeit()
+ public void timeit(FilterFactory.Type type)
{
int size = 300 * FilterTestHelper.ELEMENTS;
- bf = BloomFilter.getFilter(size, FilterTestHelper.spec.bucketsPerElement);
+ Filter bf = FilterFactory.getFilter(size, FilterTestHelper.spec.bucketsPerElement, type);
+ double sumfp = 0;
for (int i = 0; i < 10; i++)
{
- FilterTestHelper.testFalsePositives(bf,
- new KeyGenerator.RandomStringGenerator(new Random().nextInt(), size),
- new KeyGenerator.RandomStringGenerator(new Random().nextInt(), size));
+ FilterTestHelper.testFalsePositives(bf, new KeyGenerator.RandomStringGenerator(new Random().nextInt(), size),
+ new KeyGenerator.RandomStringGenerator(new Random().nextInt(), size));
+
bf.clear();
}
+ logger.info("Bloom filter mean false positive: {}", sumfp/10);
+ }
+
+ @Test
+ public void testBigIntMurm2()
+ {
+ testBigInt(FilterFactory.Type.MURMUR2);
+ }
+
+ @Test
+ public void testBigRandomMurm2()
+ {
+ testBigRandom(FilterFactory.Type.MURMUR2);
+ }
+
+ @Test
+ public void timeitMurm2()
+ {
+ timeit(FilterFactory.Type.MURMUR2);
+ }
+
+ @Test
+ public void testBigIntMurm3()
+ {
+ testBigInt(FilterFactory.Type.MURMUR3);
+ }
+
+ @Test
+ public void testBigRandomMurm3()
+ {
+ testBigRandom(FilterFactory.Type.MURMUR3);
+ }
+
+ @Test
+ public void timeitMurm3()
+ {
+ timeit(FilterFactory.Type.MURMUR3);
}
}
http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/test/unit/org/apache/cassandra/io/sstable/DescriptorTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/io/sstable/DescriptorTest.java b/test/unit/org/apache/cassandra/io/sstable/DescriptorTest.java
index 525ecde..6861007 100644
--- a/test/unit/org/apache/cassandra/io/sstable/DescriptorTest.java
+++ b/test/unit/org/apache/cassandra/io/sstable/DescriptorTest.java
@@ -20,8 +20,9 @@ package org.apache.cassandra.io.sstable;
*
*/
-
+import org.apache.cassandra.utils.FilterFactory;
import org.junit.Test;
+import static org.junit.Assert.*;
public class DescriptorTest
{
@@ -31,7 +32,7 @@ public class DescriptorTest
Descriptor descriptor = Descriptor.fromFilename("Keyspace1-userActionUtilsKey-9-Data.db");
assert descriptor.version.equals(Descriptor.LEGACY_VERSION);
- assert descriptor.usesOldBloomFilter;
+ assert descriptor.filterType == FilterFactory.Type.SHA;
}
@Test
@@ -52,4 +53,16 @@ public class DescriptorTest
assert "gz".equals(desc.version);
assert !desc.tracksMaxTimestamp;
}
+
+ @Test
+ public void testMurmurBloomFilter()
+ {
+ Descriptor desc = Descriptor.fromFilename("Keyspace1-Standard1-ia-1-Data.db");
+ assertEquals("ia", desc.version);
+ assertEquals(desc.filterType, FilterFactory.Type.MURMUR2);
+
+ desc = Descriptor.fromFilename("Keyspace1-Standard1-ib-1-Data.db");
+ assertEquals("ib", desc.version);
+ assertEquals(desc.filterType, FilterFactory.Type.MURMUR3);
+ }
}
http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/test/unit/org/apache/cassandra/utils/BloomFilterTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/utils/BloomFilterTest.java b/test/unit/org/apache/cassandra/utils/BloomFilterTest.java
index 31a3369..d8f596f 100644
--- a/test/unit/org/apache/cassandra/utils/BloomFilterTest.java
+++ b/test/unit/org/apache/cassandra/utils/BloomFilterTest.java
@@ -33,21 +33,21 @@ import org.junit.Test;
public class BloomFilterTest
{
- public BloomFilter bf;
+ public Filter bf;
public BloomFilterTest()
{
- bf = BloomFilter.getFilter(10000L, FilterTestHelper.MAX_FAILURE_RATE);
+ bf = FilterFactory.getFilter(10000L, FilterTestHelper.MAX_FAILURE_RATE);
}
- public static BloomFilter testSerialize(BloomFilter f) throws IOException
+ public static Filter testSerialize(Filter f) throws IOException
{
f.add(ByteBufferUtil.bytes("a"));
DataOutputBuffer out = new DataOutputBuffer();
- f.serializer().serialize(f, out);
+ FilterFactory.serialize(f, out, FilterFactory.Type.MURMUR3);
ByteArrayInputStream in = new ByteArrayInputStream(out.getData(), 0, out.getLength());
- BloomFilter f2 = f.serializer().deserialize(new DataInputStream(in));
+ Filter f2 = FilterFactory.deserialize(new DataInputStream(in), FilterFactory.Type.MURMUR3);
assert f2.isPresent(ByteBufferUtil.bytes("a"));
assert !f2.isPresent(ByteBufferUtil.bytes("b"));
@@ -101,7 +101,7 @@ public class BloomFilterTest
{
return;
}
- BloomFilter bf2 = BloomFilter.getFilter(KeyGenerator.WordGenerator.WORDS / 2, FilterTestHelper.MAX_FAILURE_RATE);
+ Filter bf2 = FilterFactory.getFilter(KeyGenerator.WordGenerator.WORDS / 2, FilterTestHelper.MAX_FAILURE_RATE);
int skipEven = KeyGenerator.WordGenerator.WORDS % 2 == 0 ? 0 : 2;
FilterTestHelper.testFalsePositives(bf2,
new KeyGenerator.WordGenerator(skipEven, 2),
@@ -123,7 +123,8 @@ public class BloomFilterTest
{
hashes.clear();
ByteBuffer buf = keys.next();
- for (long hashIndex : BloomFilter.getHashBuckets(buf, MAX_HASH_COUNT, 1024 * 1024))
+ BloomFilter bf = (BloomFilter) FilterFactory.getFilter(10, 10);
+ for (long hashIndex : bf.getHashBuckets(buf, MAX_HASH_COUNT, 1024 * 1024))
{
hashes.add(hashIndex);
}
http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/test/unit/org/apache/cassandra/utils/FilterTestHelper.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/utils/FilterTestHelper.java b/test/unit/org/apache/cassandra/utils/FilterTestHelper.java
index 85294f6..fd6f873 100644
--- a/test/unit/org/apache/cassandra/utils/FilterTestHelper.java
+++ b/test/unit/org/apache/cassandra/utils/FilterTestHelper.java
@@ -43,7 +43,7 @@ public class FilterTestHelper
return new KeyGenerator.RandomStringGenerator(271828, ELEMENTS);
}
- public static void testFalsePositives(Filter f, ResetableIterator<ByteBuffer> keys, ResetableIterator<ByteBuffer> otherkeys)
+ public static double testFalsePositives(Filter f, ResetableIterator<ByteBuffer> keys, ResetableIterator<ByteBuffer> otherkeys)
{
assert keys.size() == otherkeys.size();
@@ -63,6 +63,7 @@ public class FilterTestHelper
double fp_ratio = fp / (keys.size() * BloomCalculations.probs[spec.bucketsPerElement][spec.K]);
assert fp_ratio < 1.03 : fp_ratio;
+ return fp_ratio;
}
public void testTrue()
http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/test/unit/org/apache/cassandra/utils/LegacyBloomFilterTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/utils/LegacyBloomFilterTest.java b/test/unit/org/apache/cassandra/utils/LegacyBloomFilterTest.java
index fe52dda..248f325 100644
--- a/test/unit/org/apache/cassandra/utils/LegacyBloomFilterTest.java
+++ b/test/unit/org/apache/cassandra/utils/LegacyBloomFilterTest.java
@@ -43,10 +43,10 @@ public class LegacyBloomFilterTest
{
f.add(ByteBufferUtil.bytes("a"));
DataOutputBuffer out = new DataOutputBuffer();
- f.serializer().serialize(f, out);
+ FilterFactory.serialize(f, out, FilterFactory.Type.SHA);
ByteArrayInputStream in = new ByteArrayInputStream(out.getData(), 0, out.getLength());
- LegacyBloomFilter f2 = f.serializer().deserialize(new DataInputStream(in));
+ LegacyBloomFilter f2 = (LegacyBloomFilter) FilterFactory.deserialize(new DataInputStream(in), FilterFactory.Type.SHA);
assert f2.isPresent(ByteBufferUtil.bytes("a"));
assert !f2.isPresent(ByteBufferUtil.bytes("b"));
http://git-wip-us.apache.org/repos/asf/cassandra/blob/d765b246/test/unit/org/apache/cassandra/utils/SerializationsTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/utils/SerializationsTest.java b/test/unit/org/apache/cassandra/utils/SerializationsTest.java
index d2c85c0..c30daef 100644
--- a/test/unit/org/apache/cassandra/utils/SerializationsTest.java
+++ b/test/unit/org/apache/cassandra/utils/SerializationsTest.java
@@ -23,6 +23,7 @@ package org.apache.cassandra.utils;
import org.apache.cassandra.AbstractSerializationsTester;
import org.apache.cassandra.service.StorageService;
+import org.apache.cassandra.utils.FilterFactory.Type;
import org.junit.Test;
import java.io.DataInputStream;
@@ -33,24 +34,35 @@ import java.nio.ByteBuffer;
public class SerializationsTest extends AbstractSerializationsTester
{
- private void testBloomFilterWrite() throws IOException
+ private void testBloomFilterWrite(Type murmur) throws IOException
{
- BloomFilter bf = BloomFilter.getFilter(1000000, 0.0001);
+ Filter bf = FilterFactory.getFilter(1000000, 0.0001);
for (int i = 0; i < 100; i++)
bf.add(StorageService.getPartitioner().getTokenFactory().toByteArray(StorageService.getPartitioner().getRandomToken()));
DataOutputStream out = getOutput("utils.BloomFilter.bin");
- BloomFilter.serializer().serialize(bf, out);
+ FilterFactory.serialize(bf, out, murmur);
out.close();
}
@Test
- public void testBloomFilterRead() throws IOException
+ public void testBloomFilterReadMURMUR2() throws IOException
{
if (EXECUTE_WRITES)
- testBloomFilterWrite();
+ testBloomFilterWrite(FilterFactory.Type.MURMUR2);
DataInputStream in = getInput("utils.BloomFilter.bin");
- assert BloomFilter.serializer().deserialize(in) != null;
+ assert FilterFactory.deserialize(in, FilterFactory.Type.MURMUR2) != null;
+ in.close();
+ }
+
+ @Test
+ public void testBloomFilterReadMURMUR3() throws IOException
+ {
+ if (EXECUTE_WRITES)
+ testBloomFilterWrite(FilterFactory.Type.MURMUR3);
+
+ DataInputStream in = getInput("utils.BloomFilter.bin");
+ assert FilterFactory.deserialize(in, FilterFactory.Type.MURMUR3) != null;
in.close();
}
@@ -65,8 +77,8 @@ public class SerializationsTest extends AbstractSerializationsTester
b.add(key);
}
DataOutputStream out = getOutput("utils.LegacyBloomFilter.bin");
- LegacyBloomFilter.serializer().serialize(a, out);
- LegacyBloomFilter.serializer().serialize(b, out);
+ FilterFactory.serialize(a, out, FilterFactory.Type.SHA);
+ FilterFactory.serialize(b, out, FilterFactory.Type.SHA);
out.close();
}
@@ -77,7 +89,7 @@ public class SerializationsTest extends AbstractSerializationsTester
testLegacyBloomFilterWrite();
DataInputStream in = getInput("utils.LegacyBloomFilter.bin");
- assert LegacyBloomFilter.serializer().deserialize(in) != null;
+ assert FilterFactory.deserialize(in, FilterFactory.Type.SHA) != null;
in.close();
}