You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2008/02/07 21:10:43 UTC

svn commit: r619620 - in /hadoop/hbase/trunk: CHANGES.txt src/java/org/onelab/filter/CountingBloomFilter.java src/test/org/onelab/test/TestFilter.java

Author: stack
Date: Thu Feb  7 12:10:40 2008
New Revision: 619620

URL: http://svn.apache.org/viewvc?rev=619620&view=rev
Log:
HBASE-19 CountingBloomFilter can overflow its storage

Modified:
    hadoop/hbase/trunk/CHANGES.txt
    hadoop/hbase/trunk/src/java/org/onelab/filter/CountingBloomFilter.java
    hadoop/hbase/trunk/src/test/org/onelab/test/TestFilter.java

Modified: hadoop/hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/CHANGES.txt?rev=619620&r1=619619&r2=619620&view=diff
==============================================================================
--- hadoop/hbase/trunk/CHANGES.txt (original)
+++ hadoop/hbase/trunk/CHANGES.txt Thu Feb  7 12:10:40 2008
@@ -9,6 +9,8 @@
   OPTIMIZATIONS
 
   BUG FIXES
+   HBASE-19    CountingBloomFilter can overflow its storage
+               (Stu Hood and Bryan Duxbury via Stack)
 
   IMPROVEMENTS
    HBASE-415   Rewrite leases to use DelayedBlockingQueue instead of polling

Modified: hadoop/hbase/trunk/src/java/org/onelab/filter/CountingBloomFilter.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/onelab/filter/CountingBloomFilter.java?rev=619620&r1=619619&r2=619620&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/onelab/filter/CountingBloomFilter.java (original)
+++ hadoop/hbase/trunk/src/java/org/onelab/filter/CountingBloomFilter.java Thu Feb  7 12:10:40 2008
@@ -50,6 +50,7 @@
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
+import java.util.Arrays;
 
 /**
  * Implements a <i>counting Bloom filter</i>, as defined by Fan et al. in a ToN
@@ -61,15 +62,18 @@
  * 
  * contract <a href="http://www.one-lab.org">European Commission One-Lab Project 034819</a>.
  *
- * @version 1.0 - 5 Feb. 07
+ * @version 1.1 - 19 Jan. 08
  * 
  * @see org.onelab.filter.Filter The general behavior of a filter
  * 
  * @see <a href="http://portal.acm.org/citation.cfm?id=343571.343572">Summary cache: a scalable wide-area web cache sharing protocol</a>
  */
 public final class CountingBloomFilter extends Filter {
-  /** Counter vector. */
-  private byte[] vector;
+  /** Storage for the counting buckets */
+  private long[] buckets;
+
+  /** We are using 4bit buckets, so each bucket can count to 15 */
+  private final static long BUCKET_MAX_VALUE = 15;
 
   /** Default constructor - use with readFields */
   public CountingBloomFilter() {}
@@ -81,9 +85,15 @@
    */
   public CountingBloomFilter(int vectorSize, int nbHash){
     super(vectorSize, nbHash);
-    vector = new byte[vectorSize];
+    buckets = new long[buckets2words(vectorSize)];
   }//end constructor
 
+  /** returns the number of 64 bit words it would take to hold vectorSize buckets */
+  private static int buckets2words(int vectorSize) {
+   return ((vectorSize - 1) >>> 4) + 1;
+  }
+
+
   /** {@inheritDoc} */
   @Override
   public void add(Key key) {
@@ -95,7 +105,18 @@
     hash.clear();
 
     for(int i = 0; i < nbHash; i++) {
-      vector[h[i]]++;
+      // find the bucket
+      int wordNum = h[i] >> 4;          // div 16
+      int bucketShift = (h[i] & 0x0f) << 2;  // (mod 16) * 4
+      
+      long bucketMask = 15L << bucketShift;
+      long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift;
+      
+      // only increment if the count in the bucket is less than BUCKET_MAX_VALUE
+      if(bucketValue < BUCKET_MAX_VALUE) {
+        // increment by 1
+        buckets[wordNum] = (buckets[wordNum] & ~bucketMask) | ((bucketValue + 1) << bucketShift);
+      }
     }
   }//end add()
 
@@ -117,8 +138,17 @@
     hash.clear();
 
     for(int i = 0; i < nbHash; i++) {
-      if(vector[h[i]] >= 1) {
-        vector[h[i]]--;
+      // find the bucket
+      int wordNum = h[i] >> 4;          // div 16
+      int bucketShift = (h[i] & 0x0f) << 2;  // (mod 16) * 4
+      
+      long bucketMask = 15L << bucketShift;
+      long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift;
+      
+      // only decrement if the count in the bucket is between 0 and BUCKET_MAX_VALUE
+      if(bucketValue >= 1 && bucketValue < BUCKET_MAX_VALUE) {
+        // decrement by 1
+        buckets[wordNum] = (buckets[wordNum] & ~bucketMask) | ((bucketValue - 1) << bucketShift);
       }
     }
   }//end delete
@@ -133,9 +163,10 @@
       throw new IllegalArgumentException("filters cannot be and-ed");
     }
     CountingBloomFilter cbf = (CountingBloomFilter)filter;
-
-    for(int i = 0; i < vectorSize; i++) {
-      this.vector[i] &= cbf.vector[i];
+    
+    int sizeInWords = buckets2words(vectorSize);
+    for(int i = 0; i < sizeInWords; i++) {
+      this.buckets[i] &= cbf.buckets[i];
     }
   }//end and()
 
@@ -150,7 +181,13 @@
     hash.clear();
 
     for(int i = 0; i < nbHash; i++) {
-      if(vector[h[i]] == 0) {
+      // find the bucket
+      int wordNum = h[i] >> 4;          // div 16
+      int bucketShift = (h[i] & 0x0f) << 2;  // (mod 16) * 4
+
+      long bucketMask = 15L << bucketShift;
+
+      if((buckets[wordNum] & bucketMask) == 0) {
         return false;
       }
     }
@@ -177,8 +214,9 @@
 
     CountingBloomFilter cbf = (CountingBloomFilter)filter;
 
-    for(int i = 0; i < vectorSize; i++) {
-      this.vector[i] |= cbf.vector[i];
+    int sizeInWords = buckets2words(vectorSize);
+    for(int i = 0; i < sizeInWords; i++) {
+      this.buckets[i] |= cbf.buckets[i];
     }
   }//end or()
 
@@ -199,7 +237,14 @@
       if(i > 0) {
         res.append(" ");
       }
-      res.append(vector[i]&0xff);
+      
+      int wordNum = i >> 4;          // div 16
+      int bucketShift = (i & 0x0f) << 2;  // (mod 16) * 4
+      
+      long bucketMask = 15L << bucketShift;
+      long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift;
+      
+      res.append(bucketValue);
     }
 
     return res.toString();
@@ -209,7 +254,7 @@
   @Override
   public Object clone(){
     CountingBloomFilter cbf = new CountingBloomFilter(vectorSize, nbHash);
-    cbf.or(this);
+    cbf.buckets = this.buckets.clone();
     return cbf;
   }//end clone()
 
@@ -219,8 +264,9 @@
   @Override
   public void write(DataOutput out) throws IOException {
     super.write(out);
-    for(int i = 0; i < vector.length; i++) {
-      out.writeByte(vector[i]);
+    int sizeInWords = buckets2words(vectorSize);
+    for(int i = 0; i < sizeInWords; i++) {
+      out.writeLong(buckets[i]);
     }
   }
 
@@ -228,9 +274,10 @@
   @Override
   public void readFields(DataInput in) throws IOException {
     super.readFields(in);
-    vector = new byte[vectorSize];
-    for(int i = 0; i < vector.length; i++) {
-      vector[i] = in.readByte();
+    int sizeInWords = buckets2words(vectorSize);
+    buckets = new long[sizeInWords];
+    for(int i = 0; i < sizeInWords; i++) {
+      buckets[i] = in.readLong();
     }
   }
 }//end class

Modified: hadoop/hbase/trunk/src/test/org/onelab/test/TestFilter.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/onelab/test/TestFilter.java?rev=619620&r1=619619&r2=619620&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/test/org/onelab/test/TestFilter.java (original)
+++ hadoop/hbase/trunk/src/test/org/onelab/test/TestFilter.java Thu Feb  7 12:10:40 2008
@@ -94,6 +94,27 @@
     assertTrue(bf.membershipTest(new StringKey("graknyl")));
     assertFalse(bf.membershipTest(new StringKey("xyzzy")));
     assertFalse(bf.membershipTest(new StringKey("abcd")));
+
+    // delete 'key', and check that it is no longer a member
+    ((CountingBloomFilter)bf).delete(key);
+    assertFalse(bf.membershipTest(key));
+    
+    // OR 'key' back into the filter
+    Filter bf2 = new CountingBloomFilter(8, 2);
+    bf2.add(key);
+    bf.or(bf2);
+    assertTrue(bf.membershipTest(key));
+    assertTrue(bf.membershipTest(new StringKey("graknyl")));
+    assertFalse(bf.membershipTest(new StringKey("xyzzy")));
+    assertFalse(bf.membershipTest(new StringKey("abcd")));
+    
+    // to test for overflows, add 'key' enough times to overflow an 8bit bucket,
+    // while asserting that it stays a member
+    for(int i = 0; i < 257; i++){
+      bf.add(key);
+      assertTrue(bf.membershipTest(key));
+    }
+    
   }
   
   /** Test a DynamicBloomFilter