You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ns...@apache.org on 2011/10/11 04:00:37 UTC

svn commit: r1181346 - in /hbase/branches/0.89/src: main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java

Author: nspiegelberg
Date: Tue Oct 11 02:00:36 2011
New Revision: 1181346

URL: http://svn.apache.org/viewvc?rev=1181346&view=rev
Log:
Entire Row Deletes not stored in Row+Col Bloom

Summary:
Applying changes for fixing bug HBASE-2823 from open-source repository to
internal one.

Summary: When a Delete Row is issued on a row with row+col bloom filter, some
of the columns might not be deleted. Since a Delete Row is just Delete Family
applied to all columns, if a file doesn't contain the column we are searching
for it might end up unaffected. In order to ensure the file will be included,
the row together with row+col are added in the bloom. Then shouldSeek() checks
both row and row+col if the bloom is row+col (BloomType.ROWCOL). That adds
additional false positives, which are taken into account with dividing the error
rate the user requires by two.

Test Plan:
Added a new unit test to check this.

DiffCamp Revision: 141218
Reviewed By: nspiegelberg
CC: nspiegelberg, kannan
Revert Plan:
OK

Modified:
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
    hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java?rev=1181346&r1=1181345&r2=1181346&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java Tue Oct 11 02:00:36 2011
@@ -694,6 +694,13 @@ public class StoreFile {
 
       if (bloomType != BloomType.NONE && conf != null) {
         float err = conf.getFloat(IO_STOREFILE_BLOOM_ERROR_RATE, (float)0.01);
+        // Since in row+col blooms we have 2 calls to shouldSeek() instead of 1
+        // and the false positives are adding up, we should keep the error rate
+        // twice as low in order to maintain the number of false positives as
+        // desired by the user
+        if (bloomType == BloomType.ROWCOL) {
+          err /= 2;
+        }
         int maxFold = conf.getInt(IO_STOREFILE_BLOOM_MAX_FOLD, 7);
 
         this.bloomFilter = new ByteBloomFilter(maxKeys, err,
@@ -800,7 +807,6 @@ public class StoreFile {
             byte [] result = new byte[rl + ql];
             System.arraycopy(kv.getBuffer(), ro, result, 0,  rl);
             System.arraycopy(kv.getBuffer(), qo, result, rl, ql);
-
             this.bloomFilter.add(result);
             break;
           default:
@@ -943,7 +949,17 @@ public class StoreFile {
       try {
         ByteBuffer bloom = reader.getMetaBlock(BLOOM_FILTER_DATA_KEY, true);
         if (bloom != null) {
-          return this.bloomFilter.contains(key, bloom);
+          if (this.bloomFilterType == BloomType.ROWCOL) {
+            // Since a Row Delete is essentially a DeleteFamily applied to all
+            // columns, a file might be skipped if using row+col Bloom filter.
+            // In order to ensure this file is included an additional check is
+            // required looking only for a row bloom.
+            return this.bloomFilter.contains(key, bloom) ||
+                this.bloomFilter.contains(row, bloom);
+          }
+          else {
+            return this.bloomFilter.contains(key, bloom);
+          }
         }
       } catch (IOException e) {
         LOG.error("Error reading bloom filter data -- proceeding without",

Modified: hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java?rev=1181346&r1=1181345&r2=1181346&view=diff
==============================================================================
--- hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java (original)
+++ hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java Tue Oct 11 02:00:36 2011
@@ -2702,6 +2702,47 @@ public class TestHRegion extends HBaseTe
     checkOneCell(kvs[3], FAMILY, 0, 0, 1);
   }
 
+  /**
+    * Testcase to cover bug-fix for HBASE-2823
+    * Ensures correct delete when issuing delete row
+    * on columns with bloom filter set to row+col (BloomType.ROWCOL)
+   */
+  public void testDeleteRowWithBloomFilter() throws IOException {
+    byte [] tableName = Bytes.toBytes("testDeleteRowWithBloomFilter");
+    byte [] familyName = Bytes.toBytes("familyName");
+
+    // Create Table
+    HColumnDescriptor hcd = new HColumnDescriptor(familyName, Integer.MAX_VALUE,
+        HColumnDescriptor.DEFAULT_COMPRESSION, false, true,
+        HColumnDescriptor.DEFAULT_TTL, "rowcol");
+
+    HTableDescriptor htd = new HTableDescriptor(tableName);
+    htd.addFamily(hcd);
+    HRegionInfo info = new HRegionInfo(htd, null, null, false);
+    Path path = new Path(DIR + "TestDeleteRowWithBloomFilter");
+    region = HRegion.createHRegion(info, path, conf);
+
+    // Insert some data
+    byte row[] = Bytes.toBytes("row1");
+    byte col[] = Bytes.toBytes("col1");
+
+    Put put = new Put(row);
+    put.add(familyName, col, 1, Bytes.toBytes("SomeRandomValue"));
+    region.put(put);
+    region.flushcache();
+
+    Delete del = new Delete(row);
+    region.delete(del, null, true);
+    region.flushcache();
+
+    // Get remaining rows (should have none)
+    Get get = new Get(row);
+    get.addColumn(familyName, col);
+
+    KeyValue[] keyValues = region.get(get, null).raw();
+    assertTrue(keyValues.length == 0);
+  }
+
   private void putData(int startRow, int numRows, byte [] qf,
       byte [] ...families)
   throws IOException {