You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ns...@apache.org on 2011/10/11 04:06:54 UTC

svn commit: r1181418 - in /hbase/branches/0.89/src: main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java main/java/org/apache/hadoop/hbase/util/ByteBloomFilter.java test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java

Author: nspiegelberg
Date: Tue Oct 11 02:06:53 2011
New Revision: 1181418

URL: http://svn.apache.org/viewvc?rev=1181418&view=rev
Log:
HBASE-3158: Fix Large Bloom Sizes

Summary:
1) Catch IllegalArgumentExceptions thrown by the BloomFilter and just continue
writing the StoreFile without blooms instead of crashing
2) ByteBloomFilter.<init>.bitSize should be a long and we should be able to
handle large bloom sizes
3) We cannot handle byteSize > MAX_INT because java doesn't allow you to
allocate arrays that large. Throw an IAE in that case

Test Plan:
mvn clean instal -Dtest=TestStoreFile

DiffCamp Revision: 174659
Reviewed By: kannan
CC: kannan
Revert Plan:
OK

Modified:
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/util/ByteBloomFilter.java
    hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java?rev=1181418&r1=1181417&r2=1181418&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java Tue Oct 11 02:06:53 2011
@@ -83,6 +83,7 @@ public class StoreFile {
   // Config keys.
   static final String IO_STOREFILE_BLOOM_ERROR_RATE = "io.storefile.bloom.error.rate";
   static final String IO_STOREFILE_BLOOM_MAX_FOLD = "io.storefile.bloom.max.fold";
+  static final String IO_STOREFILE_BLOOM_MAX_KEYS = "io.storefile.bloom.max.keys";
   static final String IO_STOREFILE_BLOOM_ENABLED = "io.storefile.bloom.enabled";
   static final String HFILE_BLOCK_CACHE_SIZE_KEY = "hfile.block.cache.size";
 
@@ -706,18 +707,26 @@ public class StoreFile {
           err /= 2;
         }
         int maxFold = conf.getInt(IO_STOREFILE_BLOOM_MAX_FOLD, 7);
+        int tooBig = conf.getInt(IO_STOREFILE_BLOOM_MAX_KEYS, 128*1000*1000);
 
-        try {
-          bloom = new ByteBloomFilter(maxKeys, err,
-              Hash.getHashType(conf), maxFold);
-          bloom.allocBloom();
-          bt = bloomType;
-        } catch (IllegalArgumentException iae) {
-          LOG.error(String.format(
-            "Parse error while creating bloom for %s (%d, %f)",
-            path, maxKeys, err), iae);
-          bloom = null;
-          bt = BloomType.NONE;
+        if (maxKeys < tooBig) {
+          try {
+            bloom = new ByteBloomFilter(maxKeys, err,
+                Hash.getHashType(conf), maxFold);
+            bloom.allocBloom();
+            bt = bloomType;
+          } catch (IllegalArgumentException iae) {
+            LOG.warn(String.format(
+              "Parse error while creating bloom for %s (%d, %f)",
+              path, maxKeys, err), iae);
+            bloom = null;
+            bt = BloomType.NONE;
+          }
+        } else {
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("Skipping bloom filter because max keysize too large: "
+                + maxKeys);
+          }
         }
       }
 
@@ -834,6 +843,10 @@ public class StoreFile {
       return this.writer.getPath();
     }
 
+    boolean hasBloom() {
+      return this.bloomFilter != null;
+    }
+
     public void append(final byte [] key, final byte [] value) throws IOException {
       if (this.bloomFilter != null) {
         // only add to the bloom filter on a new row

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/util/ByteBloomFilter.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/util/ByteBloomFilter.java?rev=1181418&r1=1181417&r2=1181418&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/util/ByteBloomFilter.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/util/ByteBloomFilter.java Tue Oct 11 02:06:53 2011
@@ -52,7 +52,7 @@ public class ByteBloomFilter implements 
   public static final int VERSION = 1;
 
   /** Bytes (B) in the array */
-  protected int byteSize;
+  protected long byteSize;
   /** Number of hash functions */
   protected final int hashCount;
   /** Hash type */
@@ -134,11 +134,11 @@ public class ByteBloomFilter implements 
      *
      * The probability of false positives is minimized when k = m/n ln(2).
      */
-    int bitSize = (int)Math.ceil(maxKeys * (Math.log(errorRate) / Math.log(0.6185)));
+    long bitSize = (long)Math.ceil(maxKeys * (Math.log(errorRate) / Math.log(0.6185)));
     int functionCount = (int)Math.ceil(Math.log(2) * (bitSize / maxKeys));
 
     // increase byteSize so folding is possible
-    int byteSize = (bitSize + 7) / 8;
+    long byteSize = (bitSize + 7) / 8;
     int mask = (1 << foldFactor) - 1;
     if ( (mask & byteSize) != 0) {
       byteSize >>= foldFactor;
@@ -161,13 +161,13 @@ public class ByteBloomFilter implements 
     if (this.bloom != null) {
       throw new IllegalArgumentException("can only create bloom once.");
     }
-    this.bloom = ByteBuffer.allocate(this.byteSize);
+    this.bloom = ByteBuffer.allocate((int)this.byteSize);
     assert this.bloom.hasArray();
   }
 
   void sanityCheck() throws IllegalArgumentException {
-    if(this.byteSize <= 0) {
-      throw new IllegalArgumentException("maxValue must be > 0");
+    if(0 >= this.byteSize || this.byteSize > Integer.MAX_VALUE) {
+      throw new IllegalArgumentException("Invalid byteSize: " + this.byteSize);
     }
 
     if(this.hashCount <= 0) {
@@ -205,7 +205,7 @@ public class ByteBloomFilter implements 
     int hash2 = this.hash.hash(buf, offset, len, hash1);
 
     for (int i = 0; i < this.hashCount; i++) {
-      int hashLoc = Math.abs((hash1 + i * hash2) % (this.byteSize * 8));
+      long hashLoc = Math.abs((hash1 + i * hash2) % (this.byteSize * 8));
       set(hashLoc);
     }
 
@@ -243,7 +243,7 @@ public class ByteBloomFilter implements 
     int hash2 = this.hash.hash(buf, offset, length, hash1);
 
     for (int i = 0; i < this.hashCount; i++) {
-      int hashLoc = Math.abs((hash1 + i * hash2) % (this.byteSize * 8));
+      long hashLoc = Math.abs((hash1 + i * hash2) % (this.byteSize * 8));
       if (!get(hashLoc, theBloom) ) {
         return false;
       }
@@ -259,9 +259,9 @@ public class ByteBloomFilter implements 
    *
    * @param pos index of bit
    */
-  void set(int pos) {
-    int bytePos = pos / 8;
-    int bitPos = pos % 8;
+  void set(long pos) {
+    int bytePos = (int)(pos / 8);
+    int bitPos = (int)(pos % 8);
     byte curByte = bloom.get(bytePos);
     curByte |= bitvals[bitPos];
     bloom.put(bytePos, curByte);
@@ -273,9 +273,9 @@ public class ByteBloomFilter implements 
    * @param pos index of bit
    * @return true if bit at specified index is 1, false if 0.
    */
-  static boolean get(int pos, ByteBuffer theBloom) {
-    int bytePos = pos / 8;
-    int bitPos = pos % 8;
+  static boolean get(long pos, ByteBuffer theBloom) {
+    int bytePos = (int)(pos / 8);
+    int bitPos = (int)(pos % 8);
     byte curByte = theBloom.get(bytePos);
     curByte &= bitvals[bitPos];
     return (curByte != 0);
@@ -293,7 +293,7 @@ public class ByteBloomFilter implements 
 
   @Override
   public int getByteSize() {
-    return this.byteSize;
+    return (int)this.byteSize;
   }
 
   @Override
@@ -301,7 +301,7 @@ public class ByteBloomFilter implements 
     // see if the actual size is exponentially smaller than expected.
     if (this.keyCount > 0 && this.bloom.hasArray()) {
       int pieces = 1;
-      int newByteSize = this.byteSize;
+      int newByteSize = (int)this.byteSize;
       int newMaxKeys = this.maxKeys;
 
       // while exponentially smaller & folding is lossless
@@ -367,7 +367,7 @@ public class ByteBloomFilter implements 
     @Override
     public void write(DataOutput out) throws IOException {
       out.writeInt(VERSION);
-      out.writeInt(byteSize);
+      out.writeInt((int)byteSize);
       out.writeInt(hashCount);
       out.writeInt(hashType);
       out.writeInt(keyCount);

Modified: hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java?rev=1181418&r1=1181417&r2=1181418&view=diff
==============================================================================
--- hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java (original)
+++ hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java Tue Oct 11 02:06:53 2011
@@ -40,7 +40,9 @@ import org.apache.hadoop.hbase.client.Sc
 import org.apache.hadoop.hbase.io.Reference.Range;
 import org.apache.hadoop.hbase.io.hfile.HFile;
 import org.apache.hadoop.hbase.io.hfile.HFileScanner;
+import org.apache.hadoop.hbase.util.ByteBloomFilter;
 import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.Hash;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.mockito.Mockito;
 
@@ -323,17 +325,10 @@ public class TestStoreFile extends HBase
     HBaseTestingUtility.getTestDir("TestStoreFile").toString();
   private static String localFormatter = "%010d";
 
-  public void testBloomFilter() throws Exception {
-    FileSystem fs = FileSystem.getLocal(conf);
-    conf.setFloat("io.hfile.bloom.error.rate", (float)0.01);
-    conf.setBoolean("io.hfile.bloom.enabled", true);
-
-    // write the file
-    Path f = new Path(ROOT_DIR, getName());
-    StoreFile.Writer writer = new StoreFile.Writer(fs, f,
-        StoreFile.DEFAULT_BLOCKSIZE_SMALL, HFile.DEFAULT_COMPRESSION_ALGORITHM,
-        conf, KeyValue.COMPARATOR, StoreFile.BloomType.ROW, 2000);
-
+  private void bloomWriteRead(StoreFile.Writer writer, FileSystem fs)
+  throws Exception {
+    float err = conf.getFloat(StoreFile.IO_STOREFILE_BLOOM_ERROR_RATE, 0);
+    Path f = writer.getPath();
     long now = System.currentTimeMillis();
     for (int i = 0; i < 2000; i += 2) {
       String row = String.format(localFormatter, i);
@@ -370,14 +365,31 @@ public class TestStoreFile extends HBase
     System.out.println("False negatives: " + falseNeg);
     assertEquals(0, falseNeg);
     System.out.println("False positives: " + falsePos);
-    assertTrue(falsePos < 2);
+    if (!(falsePos <= 2* 2000 * err)) {
+      System.out.println("WTFBBQ! " + falsePos + ", " + (2* 2000 * err) );
+    }
+    assertTrue(falsePos <= 2* 2000 * err);
+  }
+
+  public void testBloomFilter() throws Exception {
+    FileSystem fs = FileSystem.getLocal(conf);
+    conf.setFloat(StoreFile.IO_STOREFILE_BLOOM_ERROR_RATE, (float)0.01);
+    conf.setBoolean(StoreFile.IO_STOREFILE_BLOOM_ENABLED, true);
+
+    // write the file
+    Path f = new Path(ROOT_DIR, getName());
+    StoreFile.Writer writer = new StoreFile.Writer(fs, f,
+        StoreFile.DEFAULT_BLOCKSIZE_SMALL, HFile.DEFAULT_COMPRESSION_ALGORITHM,
+        conf, KeyValue.COMPARATOR, StoreFile.BloomType.ROW, 2000);
+
+    bloomWriteRead(writer, fs);
   }
 
   public void testBloomTypes() throws Exception {
     float err = (float) 0.01;
     FileSystem fs = FileSystem.getLocal(conf);
-    conf.setFloat("io.hfile.bloom.error.rate", err);
-    conf.setBoolean("io.hfile.bloom.enabled", true);
+    conf.setFloat(StoreFile.IO_STOREFILE_BLOOM_ERROR_RATE, err);
+    conf.setBoolean(StoreFile.IO_STOREFILE_BLOOM_ENABLED, true);
 
     int rowCount = 50;
     int colCount = 10;
@@ -455,6 +467,41 @@ public class TestStoreFile extends HBase
     }
   }
   
+  public void testBloomEdgeCases() throws Exception {
+    float err = (float)0.005;
+    FileSystem fs = FileSystem.getLocal(conf);
+    Path f = new Path(ROOT_DIR, getName());
+    conf.setFloat(StoreFile.IO_STOREFILE_BLOOM_ERROR_RATE, err);
+    conf.setBoolean(StoreFile.IO_STOREFILE_BLOOM_ENABLED, true);
+    conf.setInt(StoreFile.IO_STOREFILE_BLOOM_MAX_KEYS, 1000);
+
+    // this should not create a bloom because the max keys is too small
+    StoreFile.Writer writer = new StoreFile.Writer(fs, f,
+        StoreFile.DEFAULT_BLOCKSIZE_SMALL, HFile.DEFAULT_COMPRESSION_ALGORITHM,
+        conf, KeyValue.COMPARATOR, StoreFile.BloomType.ROW, 2000);
+    assertFalse(writer.hasBloom());
+    writer.close();
+    fs.delete(f, true);
+
+    conf.setInt(StoreFile.IO_STOREFILE_BLOOM_MAX_KEYS, Integer.MAX_VALUE);
+    // the below config caused IllegalArgumentException in our production cluster
+    // however, the resulting byteSize is < MAX_INT, so this should work properly
+    writer = new StoreFile.Writer(fs, f,
+        StoreFile.DEFAULT_BLOCKSIZE_SMALL, HFile.DEFAULT_COMPRESSION_ALGORITHM,
+        conf, KeyValue.COMPARATOR, StoreFile.BloomType.ROW, 272446963);
+    assertTrue(writer.hasBloom());
+    bloomWriteRead(writer, fs);
+
+    // this, however, is too large and should not create a bloom
+    // because Java can't create a contiguous array > MAX_INT
+    writer = new StoreFile.Writer(fs, f,
+        StoreFile.DEFAULT_BLOCKSIZE_SMALL, HFile.DEFAULT_COMPRESSION_ALGORITHM,
+        conf, KeyValue.COMPARATOR, StoreFile.BloomType.ROW, Integer.MAX_VALUE);
+    assertFalse(writer.hasBloom());
+    writer.close();
+    fs.delete(f, true);
+  }
+
   public void testFlushTimeComparator() {
     assertOrdering(StoreFile.Comparators.FLUSH_TIME,
         mockStoreFile(true, 1000, -1, "/foo/123"),