You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ns...@apache.org on 2011/10/11 04:27:08 UTC
svn commit: r1181608 - in
/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver:
ScanQueryMatcher.java StoreFile.java
Author: nspiegelberg
Date: Tue Oct 11 02:27:07 2011
New Revision: 1181608
URL: http://svn.apache.org/viewvc?rev=1181608&view=rev
Log:
Avoid doing two ROWCOL Bloom filter lookups for single-column gets
Summary: We optimized multi-column scans using Bloom filters in D276188, but
this resulted in checking Bloom filters twice for single-row ("get") scans with
only one column: first, when selecting the set of storefiles to scan, and
second, when checking whether a seek should be done on each selected storefile
for the given column. Here, we are getting rid of the redundant check by not
using the multi-get Bloom filter optimization in case of a scan with only one
column.
Test Plan: Unit tests. HBaseTest on 5-node cluster. Restart one regionserver on
prod cluster with the new jar and check OpenTSDB stats.
Reviewed By: kannan
Reviewers: kannan, liyintang
CC: hbase@lists, , kannan
Revert Plan: OK
Differential Revision: 281907
Modified:
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java?rev=1181608&r1=1181607&r2=1181608&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java Tue Oct 11 02:27:07 2011
@@ -97,7 +97,12 @@ public class ScanQueryMatcher {
// We can share the ExplicitColumnTracker, diff is we reset
// between rows, not between storefiles.
this.columns = new ExplicitColumnTracker(columns,maxVersions);
- exactColumnQuery = true;
+
+ // Set the "exact column query" flag to enable row-column Bloom filter
+ // optimization. We avoid checking row-column Bloom filters twice for
+ // single-column get queries, because they are already being checked
+ // in StoreFile.shouldSeek.
+ exactColumnQuery = !(scan.isGetScan() && columns.size() == 1);
}
}
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java?rev=1181608&r1=1181607&r2=1181608&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java Tue Oct 11 02:27:07 2011
@@ -1069,10 +1069,24 @@ public class StoreFile {
}
}
+ /**
+ * Checks whether the given scan passes the Bloom filter (if present). Only
+ * checks Bloom filters for single-row or single-row-column scans. Bloom
+ * filter checking for multi-gets is implemented as part of the store
+ * scanner system (see {@link StoreFileScanner#seekExactly}) and uses
+ * the lower-level API {@link #passesBloomFilter(byte[], int, int, byte[],
+ * int, int)}.
+ *
+ * @param scan the scan specification. Used to determine the row, and to
+ * check whether this is a single-row ("get") scan.
+ * @param columns the set of columns. Only used for row-column Bloom
+ * filters.
+ * @return true if the scan with the given column set passes the Bloom
+ * filter, or if the Bloom filter is not applicable for the scan.
+ * False if the Bloom filter is applicable and the scan fails it.
+ */
private boolean passesBloomFilter(Scan scan,
final SortedSet<byte[]> columns) {
- // Multi-column non-get scans will use Bloom filters through the
- // lower-level API function that this function calls.
if (!scan.isGetScan())
return true;