You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ji...@apache.org on 2008/07/15 21:01:55 UTC
svn commit: r677008 -
/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java
Author: jimk
Date: Tue Jul 15 12:01:55 2008
New Revision: 677008
URL: http://svn.apache.org/viewvc?rev=677008&view=rev
Log:
HBASE-744 BloomFilter serialization/deserialization broken
Modified:
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java
Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java?rev=677008&r1=677007&r2=677008&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java Tue Jul 15 12:01:55 2008
@@ -723,9 +723,9 @@
BloomFilter filter = new BloomFilter();
FSDataInputStream in = fs.open(filterFile);
try {
- bloomFilter.readFields(in);
+ filter.readFields(in);
} finally {
- fs.close();
+ in.close();
}
return filter;
}
@@ -817,12 +817,15 @@
*
* the probability of false positives is minimized when k is
* approximately m/n ln(2).
+ *
+ * If we fix the number of hash functions and know the number of
+ * entries, then the optimal vector size m = (k * n) / ln(2)
*/
this.bloomFilter = new BloomFilter(
- (int) DEFAULT_NUMBER_OF_HASH_FUNCTIONS,
(int) Math.ceil(
(DEFAULT_NUMBER_OF_HASH_FUNCTIONS * (1.0 * nrows)) /
- Math.log(2.0))
+ Math.log(2.0)),
+ (int) DEFAULT_NUMBER_OF_HASH_FUNCTIONS
);
} else {
this.bloomFilter = null;