You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by to...@apache.org on 2010/03/20 00:19:14 UTC
svn commit: r925479 - in /hadoop/common/trunk: CHANGES.txt
src/java/org/apache/hadoop/io/BloomMapFile.java
src/test/core/org/apache/hadoop/io/TestBloomMapFile.java
Author: tomwhite
Date: Fri Mar 19 23:19:13 2010
New Revision: 925479
URL: http://svn.apache.org/viewvc?rev=925479&view=rev
Log:
HADOOP-6546. BloomMapFile can return false negatives. Contributed by Clark Jefcoat.
Modified:
hadoop/common/trunk/CHANGES.txt
hadoop/common/trunk/src/java/org/apache/hadoop/io/BloomMapFile.java
hadoop/common/trunk/src/test/core/org/apache/hadoop/io/TestBloomMapFile.java
Modified: hadoop/common/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/CHANGES.txt?rev=925479&r1=925478&r2=925479&view=diff
==============================================================================
--- hadoop/common/trunk/CHANGES.txt (original)
+++ hadoop/common/trunk/CHANGES.txt Fri Mar 19 23:19:13 2010
@@ -281,6 +281,9 @@ Trunk (unreleased changes)
HADOOP-6504. Invalid example in the documentation of
org.apache.hadoop.util.Tool. (Benoit Sigoure via tomwhite)
+ HADOOP-6546. BloomMapFile can return false negatives. (Clark Jefcoat
+ via tomwhite)
+
Release 0.21.0 - Unreleased
INCOMPATIBLE CHANGES
Modified: hadoop/common/trunk/src/java/org/apache/hadoop/io/BloomMapFile.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/src/java/org/apache/hadoop/io/BloomMapFile.java?rev=925479&r1=925478&r2=925479&view=diff
==============================================================================
--- hadoop/common/trunk/src/java/org/apache/hadoop/io/BloomMapFile.java (original)
+++ hadoop/common/trunk/src/java/org/apache/hadoop/io/BloomMapFile.java Fri Mar 19 23:19:13 2010
@@ -58,6 +58,16 @@ public class BloomMapFile {
fs.delete(bloom, true);
fs.delete(dir, true);
}
+
+ private static byte[] byteArrayForBloomKey(DataOutputBuffer buf) {
+ int cleanLength = buf.getLength();
+ byte [] ba = buf.getData();
+ if (cleanLength != ba.length) {
+ ba = new byte[cleanLength];
+ System.arraycopy(buf.getData(), 0, ba, 0, cleanLength);
+ }
+ return ba;
+ }
public static class Writer extends MapFile.Writer {
private DynamicBloomFilter bloomFilter;
@@ -163,7 +173,7 @@ public class BloomMapFile {
super.append(key, val);
buf.reset();
key.write(buf);
- bloomKey.set(buf.getData(), 1.0);
+ bloomKey.set(byteArrayForBloomKey(buf), 1.0);
bloomFilter.add(bloomKey);
}
@@ -228,7 +238,7 @@ public class BloomMapFile {
}
buf.reset();
key.write(buf);
- bloomKey.set(buf.getData(), 1.0);
+ bloomKey.set(byteArrayForBloomKey(buf), 1.0);
return bloomFilter.membershipTest(bloomKey);
}
Modified: hadoop/common/trunk/src/test/core/org/apache/hadoop/io/TestBloomMapFile.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/src/test/core/org/apache/hadoop/io/TestBloomMapFile.java?rev=925479&r1=925478&r2=925479&view=diff
==============================================================================
--- hadoop/common/trunk/src/test/core/org/apache/hadoop/io/TestBloomMapFile.java (original)
+++ hadoop/common/trunk/src/test/core/org/apache/hadoop/io/TestBloomMapFile.java Fri Mar 19 23:19:13 2010
@@ -18,6 +18,10 @@
package org.apache.hadoop.io;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -67,4 +71,41 @@ public class TestBloomMapFile extends Te
assertTrue(falsePos < 2);
}
+ private void checkMembershipVaryingSizedKeys(String name, List<Text> keys) throws Exception {
+ Path dirName = new Path(System.getProperty("test.build.data",".") +
+ name + ".bloommapfile");
+ FileSystem fs = FileSystem.getLocal(conf);
+ Path qualifiedDirName = fs.makeQualified(dirName);
+ BloomMapFile.Writer writer = new BloomMapFile.Writer(conf, fs,
+ qualifiedDirName.toString(), Text.class, NullWritable.class);
+ for (Text key : keys) {
+ writer.append(key, NullWritable.get());
+ }
+ writer.close();
+
+ // will check for membership in the opposite order of how keys were inserted
+ BloomMapFile.Reader reader = new BloomMapFile.Reader(fs,
+ qualifiedDirName.toString(), conf);
+ Collections.reverse(keys);
+ for (Text key : keys) {
+ assertTrue("False negative for existing key " + key, reader.probablyHasKey(key));
+ }
+ reader.close();
+ fs.delete(qualifiedDirName, true);
+ }
+
+ public void testMembershipVaryingSizedKeysTest1() throws Exception {
+ ArrayList<Text> list = new ArrayList<Text>();
+ list.add(new Text("A"));
+ list.add(new Text("BB"));
+ checkMembershipVaryingSizedKeys(getName(), list);
+ }
+
+ public void testMembershipVaryingSizedKeysTest2() throws Exception {
+ ArrayList<Text> list = new ArrayList<Text>();
+ list.add(new Text("AA"));
+ list.add(new Text("B"));
+ checkMembershipVaryingSizedKeys(getName(), list);
+ }
+
}