You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ns...@apache.org on 2011/10/11 04:27:08 UTC
svn commit: r1181608 - in /hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver: ScanQueryMatcher.java StoreFile.java

Author: nspiegelberg
Date: Tue Oct 11 02:27:07 2011
New Revision: 1181608

URL: http://svn.apache.org/viewvc?rev=1181608&view=rev
Log:
Avoid doing two ROWCOL Bloom filter lookups for single-column gets

Summary: We optimized multi-column scans using Bloom filters in D276188, but
this resulted in checking Bloom filters twice for single-row ("get") scans with
only one column: first, when selecting the set of storefiles to scan, and
second, when checking whether a seek should be done on each selected storefile
for the given column. Here, we are getting rid of the redundant check by not
using the multi-get Bloom filter optimization in case of a scan with only one
column.
Test Plan: Unit tests. HBaseTest on 5-node cluster. Restart one regionserver on
prod cluster with the new jar and check OpenTSDB stats.
Reviewed By: kannan
Reviewers: kannan, liyintang
CC: hbase@lists, , kannan
Revert Plan: OK
Differential Revision: 281907

Modified:
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java?rev=1181608&r1=1181607&r2=1181608&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java Tue Oct 11 02:27:07 2011
@@ -97,7 +97,12 @@ public class ScanQueryMatcher {
       // We can share the ExplicitColumnTracker, diff is we reset
       // between rows, not between storefiles.
       this.columns = new ExplicitColumnTracker(columns,maxVersions);
-      exactColumnQuery = true;
+
+      // Set the "exact column query" flag to enable row-column Bloom filter
+      // optimization. We avoid checking row-column Bloom filters twice for
+      // single-column get queries, because they are already being checked
+      // in StoreFile.shouldSeek.
+      exactColumnQuery = !(scan.isGetScan() && columns.size() == 1);
     }
   }
 

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java?rev=1181608&r1=1181607&r2=1181608&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java Tue Oct 11 02:27:07 2011
@@ -1069,10 +1069,24 @@ public class StoreFile {
       }
     }
 
+    /**
+     * Checks whether the given scan passes the Bloom filter (if present). Only
+     * checks Bloom filters for single-row or single-row-column scans. Bloom
+     * filter checking for multi-gets is implemented as part of the store
+     * scanner system (see {@link StoreFileScanner#seekExactly}) and uses
+     * the lower-level API {@link #passesBloomFilter(byte[], int, int, byte[],
+     * int, int)}.
+     *
+     * @param scan the scan specification. Used to determine the row, and to
+     *          check whether this is a single-row ("get") scan.
+     * @param columns the set of columns. Only used for row-column Bloom
+     *          filters.
+     * @return true if the scan with the given column set passes the Bloom
+     *         filter, or if the Bloom filter is not applicable for the scan.
+     *         False if the Bloom filter is applicable and the scan fails it.
+     */
     private boolean passesBloomFilter(Scan scan,
         final SortedSet<byte[]> columns) {
-      // Multi-column non-get scans will use Bloom filters through the
-      // lower-level API function that this function calls.
       if (!scan.isGetScan())
         return true;