You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ns...@apache.org on 2011/10/11 04:06:54 UTC
svn commit: r1181418 - in /hbase/branches/0.89/src:
main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
main/java/org/apache/hadoop/hbase/util/ByteBloomFilter.java
test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java
Author: nspiegelberg
Date: Tue Oct 11 02:06:53 2011
New Revision: 1181418
URL: http://svn.apache.org/viewvc?rev=1181418&view=rev
Log:
HBASE-3158: Fix Large Bloom Sizes
Summary:
1) Catch IllegalArgumentExceptions thrown by the BloomFilter and just continue
writing the StoreFile without blooms instead of crashing
2) ByteBloomFilter.<init>.bitSize should be a long and we should be able to
handle large bloom sizes
3) We cannot handle byteSize > MAX_INT because java doesn't allow you to
allocate arrays that large. Throw an IAE in that case
Test Plan:
mvn clean instal -Dtest=TestStoreFile
DiffCamp Revision: 174659
Reviewed By: kannan
CC: kannan
Revert Plan:
OK
Modified:
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/util/ByteBloomFilter.java
hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java?rev=1181418&r1=1181417&r2=1181418&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java Tue Oct 11 02:06:53 2011
@@ -83,6 +83,7 @@ public class StoreFile {
// Config keys.
static final String IO_STOREFILE_BLOOM_ERROR_RATE = "io.storefile.bloom.error.rate";
static final String IO_STOREFILE_BLOOM_MAX_FOLD = "io.storefile.bloom.max.fold";
+ static final String IO_STOREFILE_BLOOM_MAX_KEYS = "io.storefile.bloom.max.keys";
static final String IO_STOREFILE_BLOOM_ENABLED = "io.storefile.bloom.enabled";
static final String HFILE_BLOCK_CACHE_SIZE_KEY = "hfile.block.cache.size";
@@ -706,18 +707,26 @@ public class StoreFile {
err /= 2;
}
int maxFold = conf.getInt(IO_STOREFILE_BLOOM_MAX_FOLD, 7);
+ int tooBig = conf.getInt(IO_STOREFILE_BLOOM_MAX_KEYS, 128*1000*1000);
- try {
- bloom = new ByteBloomFilter(maxKeys, err,
- Hash.getHashType(conf), maxFold);
- bloom.allocBloom();
- bt = bloomType;
- } catch (IllegalArgumentException iae) {
- LOG.error(String.format(
- "Parse error while creating bloom for %s (%d, %f)",
- path, maxKeys, err), iae);
- bloom = null;
- bt = BloomType.NONE;
+ if (maxKeys < tooBig) {
+ try {
+ bloom = new ByteBloomFilter(maxKeys, err,
+ Hash.getHashType(conf), maxFold);
+ bloom.allocBloom();
+ bt = bloomType;
+ } catch (IllegalArgumentException iae) {
+ LOG.warn(String.format(
+ "Parse error while creating bloom for %s (%d, %f)",
+ path, maxKeys, err), iae);
+ bloom = null;
+ bt = BloomType.NONE;
+ }
+ } else {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Skipping bloom filter because max keysize too large: "
+ + maxKeys);
+ }
}
}
@@ -834,6 +843,10 @@ public class StoreFile {
return this.writer.getPath();
}
+ boolean hasBloom() {
+ return this.bloomFilter != null;
+ }
+
public void append(final byte [] key, final byte [] value) throws IOException {
if (this.bloomFilter != null) {
// only add to the bloom filter on a new row
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/util/ByteBloomFilter.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/util/ByteBloomFilter.java?rev=1181418&r1=1181417&r2=1181418&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/util/ByteBloomFilter.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/util/ByteBloomFilter.java Tue Oct 11 02:06:53 2011
@@ -52,7 +52,7 @@ public class ByteBloomFilter implements
public static final int VERSION = 1;
/** Bytes (B) in the array */
- protected int byteSize;
+ protected long byteSize;
/** Number of hash functions */
protected final int hashCount;
/** Hash type */
@@ -134,11 +134,11 @@ public class ByteBloomFilter implements
*
* The probability of false positives is minimized when k = m/n ln(2).
*/
- int bitSize = (int)Math.ceil(maxKeys * (Math.log(errorRate) / Math.log(0.6185)));
+ long bitSize = (long)Math.ceil(maxKeys * (Math.log(errorRate) / Math.log(0.6185)));
int functionCount = (int)Math.ceil(Math.log(2) * (bitSize / maxKeys));
// increase byteSize so folding is possible
- int byteSize = (bitSize + 7) / 8;
+ long byteSize = (bitSize + 7) / 8;
int mask = (1 << foldFactor) - 1;
if ( (mask & byteSize) != 0) {
byteSize >>= foldFactor;
@@ -161,13 +161,13 @@ public class ByteBloomFilter implements
if (this.bloom != null) {
throw new IllegalArgumentException("can only create bloom once.");
}
- this.bloom = ByteBuffer.allocate(this.byteSize);
+ this.bloom = ByteBuffer.allocate((int)this.byteSize);
assert this.bloom.hasArray();
}
void sanityCheck() throws IllegalArgumentException {
- if(this.byteSize <= 0) {
- throw new IllegalArgumentException("maxValue must be > 0");
+ if(0 >= this.byteSize || this.byteSize > Integer.MAX_VALUE) {
+ throw new IllegalArgumentException("Invalid byteSize: " + this.byteSize);
}
if(this.hashCount <= 0) {
@@ -205,7 +205,7 @@ public class ByteBloomFilter implements
int hash2 = this.hash.hash(buf, offset, len, hash1);
for (int i = 0; i < this.hashCount; i++) {
- int hashLoc = Math.abs((hash1 + i * hash2) % (this.byteSize * 8));
+ long hashLoc = Math.abs((hash1 + i * hash2) % (this.byteSize * 8));
set(hashLoc);
}
@@ -243,7 +243,7 @@ public class ByteBloomFilter implements
int hash2 = this.hash.hash(buf, offset, length, hash1);
for (int i = 0; i < this.hashCount; i++) {
- int hashLoc = Math.abs((hash1 + i * hash2) % (this.byteSize * 8));
+ long hashLoc = Math.abs((hash1 + i * hash2) % (this.byteSize * 8));
if (!get(hashLoc, theBloom) ) {
return false;
}
@@ -259,9 +259,9 @@ public class ByteBloomFilter implements
*
* @param pos index of bit
*/
- void set(int pos) {
- int bytePos = pos / 8;
- int bitPos = pos % 8;
+ void set(long pos) {
+ int bytePos = (int)(pos / 8);
+ int bitPos = (int)(pos % 8);
byte curByte = bloom.get(bytePos);
curByte |= bitvals[bitPos];
bloom.put(bytePos, curByte);
@@ -273,9 +273,9 @@ public class ByteBloomFilter implements
* @param pos index of bit
* @return true if bit at specified index is 1, false if 0.
*/
- static boolean get(int pos, ByteBuffer theBloom) {
- int bytePos = pos / 8;
- int bitPos = pos % 8;
+ static boolean get(long pos, ByteBuffer theBloom) {
+ int bytePos = (int)(pos / 8);
+ int bitPos = (int)(pos % 8);
byte curByte = theBloom.get(bytePos);
curByte &= bitvals[bitPos];
return (curByte != 0);
@@ -293,7 +293,7 @@ public class ByteBloomFilter implements
@Override
public int getByteSize() {
- return this.byteSize;
+ return (int)this.byteSize;
}
@Override
@@ -301,7 +301,7 @@ public class ByteBloomFilter implements
// see if the actual size is exponentially smaller than expected.
if (this.keyCount > 0 && this.bloom.hasArray()) {
int pieces = 1;
- int newByteSize = this.byteSize;
+ int newByteSize = (int)this.byteSize;
int newMaxKeys = this.maxKeys;
// while exponentially smaller & folding is lossless
@@ -367,7 +367,7 @@ public class ByteBloomFilter implements
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(VERSION);
- out.writeInt(byteSize);
+ out.writeInt((int)byteSize);
out.writeInt(hashCount);
out.writeInt(hashType);
out.writeInt(keyCount);
Modified: hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java?rev=1181418&r1=1181417&r2=1181418&view=diff
==============================================================================
--- hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java (original)
+++ hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java Tue Oct 11 02:06:53 2011
@@ -40,7 +40,9 @@ import org.apache.hadoop.hbase.client.Sc
import org.apache.hadoop.hbase.io.Reference.Range;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.io.hfile.HFileScanner;
+import org.apache.hadoop.hbase.util.ByteBloomFilter;
import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.Hash;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.mockito.Mockito;
@@ -323,17 +325,10 @@ public class TestStoreFile extends HBase
HBaseTestingUtility.getTestDir("TestStoreFile").toString();
private static String localFormatter = "%010d";
- public void testBloomFilter() throws Exception {
- FileSystem fs = FileSystem.getLocal(conf);
- conf.setFloat("io.hfile.bloom.error.rate", (float)0.01);
- conf.setBoolean("io.hfile.bloom.enabled", true);
-
- // write the file
- Path f = new Path(ROOT_DIR, getName());
- StoreFile.Writer writer = new StoreFile.Writer(fs, f,
- StoreFile.DEFAULT_BLOCKSIZE_SMALL, HFile.DEFAULT_COMPRESSION_ALGORITHM,
- conf, KeyValue.COMPARATOR, StoreFile.BloomType.ROW, 2000);
-
+ private void bloomWriteRead(StoreFile.Writer writer, FileSystem fs)
+ throws Exception {
+ float err = conf.getFloat(StoreFile.IO_STOREFILE_BLOOM_ERROR_RATE, 0);
+ Path f = writer.getPath();
long now = System.currentTimeMillis();
for (int i = 0; i < 2000; i += 2) {
String row = String.format(localFormatter, i);
@@ -370,14 +365,31 @@ public class TestStoreFile extends HBase
System.out.println("False negatives: " + falseNeg);
assertEquals(0, falseNeg);
System.out.println("False positives: " + falsePos);
- assertTrue(falsePos < 2);
+ if (!(falsePos <= 2* 2000 * err)) {
+ System.out.println("WTFBBQ! " + falsePos + ", " + (2* 2000 * err) );
+ }
+ assertTrue(falsePos <= 2* 2000 * err);
+ }
+
+ public void testBloomFilter() throws Exception {
+ FileSystem fs = FileSystem.getLocal(conf);
+ conf.setFloat(StoreFile.IO_STOREFILE_BLOOM_ERROR_RATE, (float)0.01);
+ conf.setBoolean(StoreFile.IO_STOREFILE_BLOOM_ENABLED, true);
+
+ // write the file
+ Path f = new Path(ROOT_DIR, getName());
+ StoreFile.Writer writer = new StoreFile.Writer(fs, f,
+ StoreFile.DEFAULT_BLOCKSIZE_SMALL, HFile.DEFAULT_COMPRESSION_ALGORITHM,
+ conf, KeyValue.COMPARATOR, StoreFile.BloomType.ROW, 2000);
+
+ bloomWriteRead(writer, fs);
}
public void testBloomTypes() throws Exception {
float err = (float) 0.01;
FileSystem fs = FileSystem.getLocal(conf);
- conf.setFloat("io.hfile.bloom.error.rate", err);
- conf.setBoolean("io.hfile.bloom.enabled", true);
+ conf.setFloat(StoreFile.IO_STOREFILE_BLOOM_ERROR_RATE, err);
+ conf.setBoolean(StoreFile.IO_STOREFILE_BLOOM_ENABLED, true);
int rowCount = 50;
int colCount = 10;
@@ -455,6 +467,41 @@ public class TestStoreFile extends HBase
}
}
+ public void testBloomEdgeCases() throws Exception {
+ float err = (float)0.005;
+ FileSystem fs = FileSystem.getLocal(conf);
+ Path f = new Path(ROOT_DIR, getName());
+ conf.setFloat(StoreFile.IO_STOREFILE_BLOOM_ERROR_RATE, err);
+ conf.setBoolean(StoreFile.IO_STOREFILE_BLOOM_ENABLED, true);
+ conf.setInt(StoreFile.IO_STOREFILE_BLOOM_MAX_KEYS, 1000);
+
+ // this should not create a bloom because the max keys is too small
+ StoreFile.Writer writer = new StoreFile.Writer(fs, f,
+ StoreFile.DEFAULT_BLOCKSIZE_SMALL, HFile.DEFAULT_COMPRESSION_ALGORITHM,
+ conf, KeyValue.COMPARATOR, StoreFile.BloomType.ROW, 2000);
+ assertFalse(writer.hasBloom());
+ writer.close();
+ fs.delete(f, true);
+
+ conf.setInt(StoreFile.IO_STOREFILE_BLOOM_MAX_KEYS, Integer.MAX_VALUE);
+ // the below config caused IllegalArgumentException in our production cluster
+ // however, the resulting byteSize is < MAX_INT, so this should work properly
+ writer = new StoreFile.Writer(fs, f,
+ StoreFile.DEFAULT_BLOCKSIZE_SMALL, HFile.DEFAULT_COMPRESSION_ALGORITHM,
+ conf, KeyValue.COMPARATOR, StoreFile.BloomType.ROW, 272446963);
+ assertTrue(writer.hasBloom());
+ bloomWriteRead(writer, fs);
+
+ // this, however, is too large and should not create a bloom
+ // because Java can't create a contiguous array > MAX_INT
+ writer = new StoreFile.Writer(fs, f,
+ StoreFile.DEFAULT_BLOCKSIZE_SMALL, HFile.DEFAULT_COMPRESSION_ALGORITHM,
+ conf, KeyValue.COMPARATOR, StoreFile.BloomType.ROW, Integer.MAX_VALUE);
+ assertFalse(writer.hasBloom());
+ writer.close();
+ fs.delete(f, true);
+ }
+
public void testFlushTimeComparator() {
assertOrdering(StoreFile.Comparators.FLUSH_TIME,
mockStoreFile(true, 1000, -1, "/foo/123"),