You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ns...@apache.org on 2011/10/11 04:00:37 UTC
svn commit: r1181346 - in /hbase/branches/0.89/src:
main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
Author: nspiegelberg
Date: Tue Oct 11 02:00:36 2011
New Revision: 1181346
URL: http://svn.apache.org/viewvc?rev=1181346&view=rev
Log:
Entire Row Deletes not stored in Row+Col Bloom
Summary:
Applying changes for fixing bug HBASE-2823 from open-source repository to
internal one.
Summary: When a Delete Row is issued on a row with row+col bloom filter, some
of the columns might not be deleted. Since a Delete Row is just Delete Family
applied to all columns, if a file doesn't contain the column we are searching
for it might end up unaffected. In order to ensure the file will be included,
the row together with row+col are added in the bloom. Then shouldSeek() checks
both row and row+col if the bloom is row+col (BloomType.ROWCOL). That adds
additional false positives, which are taken into account with dividing the error
rate the user requires by two.
Test Plan:
Added a new unit test to check this.
DiffCamp Revision: 141218
Reviewed By: nspiegelberg
CC: nspiegelberg, kannan
Revert Plan:
OK
Modified:
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java?rev=1181346&r1=1181345&r2=1181346&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java Tue Oct 11 02:00:36 2011
@@ -694,6 +694,13 @@ public class StoreFile {
if (bloomType != BloomType.NONE && conf != null) {
float err = conf.getFloat(IO_STOREFILE_BLOOM_ERROR_RATE, (float)0.01);
+ // Since in row+col blooms we have 2 calls to shouldSeek() instead of 1
+ // and the false positives are adding up, we should keep the error rate
+ // twice as low in order to maintain the number of false positives as
+ // desired by the user
+ if (bloomType == BloomType.ROWCOL) {
+ err /= 2;
+ }
int maxFold = conf.getInt(IO_STOREFILE_BLOOM_MAX_FOLD, 7);
this.bloomFilter = new ByteBloomFilter(maxKeys, err,
@@ -800,7 +807,6 @@ public class StoreFile {
byte [] result = new byte[rl + ql];
System.arraycopy(kv.getBuffer(), ro, result, 0, rl);
System.arraycopy(kv.getBuffer(), qo, result, rl, ql);
-
this.bloomFilter.add(result);
break;
default:
@@ -943,7 +949,17 @@ public class StoreFile {
try {
ByteBuffer bloom = reader.getMetaBlock(BLOOM_FILTER_DATA_KEY, true);
if (bloom != null) {
- return this.bloomFilter.contains(key, bloom);
+ if (this.bloomFilterType == BloomType.ROWCOL) {
+ // Since a Row Delete is essentially a DeleteFamily applied to all
+ // columns, a file might be skipped if using row+col Bloom filter.
+ // In order to ensure this file is included an additional check is
+ // required looking only for a row bloom.
+ return this.bloomFilter.contains(key, bloom) ||
+ this.bloomFilter.contains(row, bloom);
+ }
+ else {
+ return this.bloomFilter.contains(key, bloom);
+ }
}
} catch (IOException e) {
LOG.error("Error reading bloom filter data -- proceeding without",
Modified: hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java?rev=1181346&r1=1181345&r2=1181346&view=diff
==============================================================================
--- hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java (original)
+++ hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java Tue Oct 11 02:00:36 2011
@@ -2702,6 +2702,47 @@ public class TestHRegion extends HBaseTe
checkOneCell(kvs[3], FAMILY, 0, 0, 1);
}
+ /**
+ * Testcase to cover bug-fix for HBASE-2823
+ * Ensures correct delete when issuing delete row
+ * on columns with bloom filter set to row+col (BloomType.ROWCOL)
+ */
+ public void testDeleteRowWithBloomFilter() throws IOException {
+ byte [] tableName = Bytes.toBytes("testDeleteRowWithBloomFilter");
+ byte [] familyName = Bytes.toBytes("familyName");
+
+ // Create Table
+ HColumnDescriptor hcd = new HColumnDescriptor(familyName, Integer.MAX_VALUE,
+ HColumnDescriptor.DEFAULT_COMPRESSION, false, true,
+ HColumnDescriptor.DEFAULT_TTL, "rowcol");
+
+ HTableDescriptor htd = new HTableDescriptor(tableName);
+ htd.addFamily(hcd);
+ HRegionInfo info = new HRegionInfo(htd, null, null, false);
+ Path path = new Path(DIR + "TestDeleteRowWithBloomFilter");
+ region = HRegion.createHRegion(info, path, conf);
+
+ // Insert some data
+ byte row[] = Bytes.toBytes("row1");
+ byte col[] = Bytes.toBytes("col1");
+
+ Put put = new Put(row);
+ put.add(familyName, col, 1, Bytes.toBytes("SomeRandomValue"));
+ region.put(put);
+ region.flushcache();
+
+ Delete del = new Delete(row);
+ region.delete(del, null, true);
+ region.flushcache();
+
+ // Get remaining rows (should have none)
+ Get get = new Get(row);
+ get.addColumn(familyName, col);
+
+ KeyValue[] keyValues = region.get(get, null).raw();
+ assertTrue(keyValues.length == 0);
+ }
+
private void putData(int startRow, int numRows, byte [] qf,
byte [] ...families)
throws IOException {