You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by la...@apache.org on 2014/03/11 19:34:56 UTC

svn commit: r1576462 - in /hbase/branches/0.96: hbase-client/src/main/java/org/apache/hadoop/hbase/client/ hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/ src/main/docbkx/

Author: larsh
Date: Tue Mar 11 18:34:56 2014
New Revision: 1576462

URL: http://svn.apache.org/r1576462
Log:
HBASE-9778 Add hint to ExplicitColumnTracker to avoid seeking

Modified:
    hbase/branches/0.96/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Scan.java
    hbase/branches/0.96/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ExplicitColumnTracker.java
    hbase/branches/0.96/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java
    hbase/branches/0.96/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestExplicitColumnTracker.java
    hbase/branches/0.96/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestQueryMatcher.java
    hbase/branches/0.96/src/main/docbkx/performance.xml

Modified: hbase/branches/0.96/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Scan.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.96/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Scan.java?rev=1576462&r1=1576461&r2=1576462&view=diff
==============================================================================
--- hbase/branches/0.96/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Scan.java (original)
+++ hbase/branches/0.96/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Scan.java Tue Mar 11 18:34:56 2014
@@ -85,6 +85,23 @@ public class Scan extends OperationWithA
   private static final String RAW_ATTR = "_raw_";
   private static final String ISOLATION_LEVEL = "_isolationlevel_";
 
+  /**
+   * EXPERT ONLY.
+   * An integer (not long) indicating to the scanner logic how many times we attempt to retrieve the
+   * next KV before we schedule a reseek.
+   * The right value depends on the size of the average KV. A reseek is more efficient when
+   * it can skip 5-10 KVs or 512B-1KB, or when the next KV is likely found in another HFile block.
+   * Setting this only has any effect when columns were added with
+   * {@link #addColumn(byte[], byte[])}
+   * <pre>{@code
+   * Scan s = new Scan(...);
+   * s.addColumn(...);
+   * s.setAttribute(Scan.HINT_LOOKAHEAD, Bytes.toBytes(2));
+   * }</pre>
+   * Default is 0 (always reseek).
+   */
+  public static final String HINT_LOOKAHEAD = "_look_ahead_";
+
   private byte [] startRow = HConstants.EMPTY_START_ROW;
   private byte [] stopRow  = HConstants.EMPTY_END_ROW;
   private int maxVersions = 1;

Modified: hbase/branches/0.96/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ExplicitColumnTracker.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.96/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ExplicitColumnTracker.java?rev=1576462&r1=1576461&r2=1576462&view=diff
==============================================================================
--- hbase/branches/0.96/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ExplicitColumnTracker.java (original)
+++ hbase/branches/0.96/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ExplicitColumnTracker.java Tue Mar 11 18:34:56 2014
@@ -56,6 +56,10 @@ public class ExplicitColumnTracker imple
   private final int maxVersions;
   private final int minVersions;
 
+  // hint for the tracker about how many KVs we will attempt to search via next()
+  // before we schedule a (re)seek operation
+  private final int lookAhead; 
+
  /**
   * Contains the list of columns that the ExplicitColumnTracker is tracking.
   * Each ColumnCount instance also tracks how many versions of the requested
@@ -68,6 +72,7 @@ public class ExplicitColumnTracker imple
    * Used to eliminate duplicates. */
   private long latestTSOfCurrentColumn;
   private long oldestStamp;
+  private int skipCount;
 
   /**
    * Default constructor.
@@ -76,11 +81,14 @@ public class ExplicitColumnTracker imple
    * @param maxVersions maximum versions to return per column
    * @param oldestUnexpiredTS the oldest timestamp we are interested in,
    *  based on TTL 
+   * @param lookAhead number of KeyValues to look ahead via next before
+   *  (re)seeking
    */
   public ExplicitColumnTracker(NavigableSet<byte[]> columns, int minVersions,
-      int maxVersions, long oldestUnexpiredTS) {
+      int maxVersions, long oldestUnexpiredTS, int lookAhead) {
     this.maxVersions = maxVersions;
     this.minVersions = minVersions;
+    this.lookAhead = lookAhead;
     this.oldestStamp = oldestUnexpiredTS;
     this.columns = new ColumnCount[columns.size()];
     int i=0;
@@ -136,7 +144,8 @@ public class ExplicitColumnTracker imple
       if (ret > 0) {
         // The current KV is smaller than the column the ExplicitColumnTracker
         // is interested in, so seek to that column of interest.
-        return ScanQueryMatcher.MatchCode.SEEK_NEXT_COL;
+        return this.skipCount++ < this.lookAhead ? ScanQueryMatcher.MatchCode.SKIP
+            : ScanQueryMatcher.MatchCode.SEEK_NEXT_COL;
       }
 
       // The current KV is bigger than the column the ExplicitColumnTracker
@@ -145,6 +154,7 @@ public class ExplicitColumnTracker imple
       // column of interest, and check again.
       if (ret <= -1) {
         ++this.index;
+        this.skipCount = 0;
         if (done()) {
           // No more to match, do not include, done with this row.
           return ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW; // done_row
@@ -169,6 +179,7 @@ public class ExplicitColumnTracker imple
     if (count >= maxVersions || (count >= minVersions && isExpired(timestamp))) {
       // Done with versions for this column
       ++this.index;
+      this.skipCount = 0;
       resetTS();
       if (done()) {
         // We have served all the requested columns.
@@ -187,6 +198,7 @@ public class ExplicitColumnTracker imple
   // Called between every row.
   public void reset() {
     this.index = 0;
+    this.skipCount = 0;
     this.column = this.columns[this.index];
     for(ColumnCount col : this.columns) {
       col.setCount(0);
@@ -226,6 +238,7 @@ public class ExplicitColumnTracker imple
       resetTS();
       if (compare <= 0) {
         ++this.index;
+        this.skipCount = 0;
         if (done()) {
           // Will not hit any more columns in this storefile
           this.column = null;

Modified: hbase/branches/0.96/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.96/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java?rev=1576462&r1=1576461&r2=1576462&view=diff
==============================================================================
--- hbase/branches/0.96/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java (original)
+++ hbase/branches/0.96/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java Tue Mar 11 18:34:56 2014
@@ -183,8 +183,9 @@ public class ScanQueryMatcher {
 
       // We can share the ExplicitColumnTracker, diff is we reset
       // between rows, not between storefiles.
-      this.columns = new ExplicitColumnTracker(columns,
-          scanInfo.getMinVersions(), maxVersions, oldestUnexpiredTS);
+      byte[] attr = scan.getAttribute(Scan.HINT_LOOKAHEAD);
+      this.columns = new ExplicitColumnTracker(columns, scanInfo.getMinVersions(), maxVersions,
+          oldestUnexpiredTS, attr == null ? 0 : Bytes.toInt(attr));
     }
   }
 

Modified: hbase/branches/0.96/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestExplicitColumnTracker.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.96/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestExplicitColumnTracker.java?rev=1576462&r1=1576461&r2=1576462&view=diff
==============================================================================
--- hbase/branches/0.96/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestExplicitColumnTracker.java (original)
+++ hbase/branches/0.96/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestExplicitColumnTracker.java Tue Mar 11 18:34:56 2014
@@ -44,9 +44,9 @@ public class TestExplicitColumnTracker e
   private void runTest(int maxVersions,
                        TreeSet<byte[]> trackColumns,
                        List<byte[]> scannerColumns,
-                       List<MatchCode> expected) throws IOException {
+                       List<MatchCode> expected, int lookAhead) throws IOException {
     ColumnTracker exp = new ExplicitColumnTracker(
-      trackColumns, 0, maxVersions, Long.MIN_VALUE);
+      trackColumns, 0, maxVersions, Long.MIN_VALUE, lookAhead);
 
 
     //Initialize result
@@ -95,7 +95,7 @@ public class TestExplicitColumnTracker e
     scanner.add(col4);
     scanner.add(col5);
 
-    runTest(maxVersions, columns, scanner, expected);
+    runTest(maxVersions, columns, scanner, expected, 0);
   }
 
   public void testGet_MultiVersion() throws IOException{
@@ -150,9 +150,63 @@ public class TestExplicitColumnTracker e
     scanner.add(col5);
 
     //Initialize result
-    runTest(maxVersions, columns, scanner, expected);
+    runTest(maxVersions, columns, scanner, expected, 0);
   }
 
+  public void testGet_MultiVersionWithLookAhead() throws IOException{
+    if(PRINT){
+      System.out.println("\nMultiVersion");
+    }
+
+    //Create tracker
+    TreeSet<byte[]> columns = new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
+    //Looking for every other
+    columns.add(col2);
+    columns.add(col4);
+
+    List<ScanQueryMatcher.MatchCode> expected = new ArrayList<ScanQueryMatcher.MatchCode>();
+    expected.add(ScanQueryMatcher.MatchCode.SKIP);
+    expected.add(ScanQueryMatcher.MatchCode.SKIP);
+    expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_COL);
+
+    expected.add(ScanQueryMatcher.MatchCode.INCLUDE);                   // col2; 1st version
+    expected.add(ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL); // col2; 2nd version
+    expected.add(ScanQueryMatcher.MatchCode.SKIP);
+
+    expected.add(ScanQueryMatcher.MatchCode.SKIP);
+    expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_COL);
+    expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_COL);
+
+    expected.add(ScanQueryMatcher.MatchCode.INCLUDE);                   // col4; 1st version
+    expected.add(ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_ROW); // col4; 2nd version
+    expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW);
+
+    expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW);
+    expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW);
+    expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW);
+    int maxVersions = 2;
+
+    //Create "Scanner"
+    List<byte[]> scanner = new ArrayList<byte[]>();
+    scanner.add(col1);
+    scanner.add(col1);
+    scanner.add(col1);
+    scanner.add(col2);
+    scanner.add(col2);
+    scanner.add(col2);
+    scanner.add(col3);
+    scanner.add(col3);
+    scanner.add(col3);
+    scanner.add(col4);
+    scanner.add(col4);
+    scanner.add(col4);
+    scanner.add(col5);
+    scanner.add(col5);
+    scanner.add(col5);
+
+    //Initialize result
+    runTest(maxVersions, columns, scanner, expected, 2);
+  }
 
   /**
    * hbase-2259
@@ -165,7 +219,7 @@ public class TestExplicitColumnTracker e
     }
 
     ColumnTracker explicit = new ExplicitColumnTracker(columns, 0, maxVersions,
-        Long.MIN_VALUE);
+        Long.MIN_VALUE, 0);
     for (int i = 0; i < 100000; i+=2) {
       byte [] col = Bytes.toBytes("col"+i);
       ScanQueryMatcher.checkColumn(explicit, col, 0, col.length, 1, KeyValue.Type.Put.getCode(),
@@ -193,7 +247,7 @@ public class TestExplicitColumnTracker e
       new ScanQueryMatcher.MatchCode[] {
         ScanQueryMatcher.MatchCode.SEEK_NEXT_COL,
         ScanQueryMatcher.MatchCode.SEEK_NEXT_COL });
-    runTest(1, columns, scanner, expected);
+    runTest(1, columns, scanner, expected, 0);
   }
 
 }

Modified: hbase/branches/0.96/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestQueryMatcher.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.96/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestQueryMatcher.java?rev=1576462&r1=1576461&r2=1576462&view=diff
==============================================================================
--- hbase/branches/0.96/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestQueryMatcher.java (original)
+++ hbase/branches/0.96/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestQueryMatcher.java Tue Mar 11 18:34:56 2014
@@ -89,22 +89,8 @@ public class TestQueryMatcher extends HB
 
   }
 
-  public void testMatch_ExplicitColumns()
-  throws IOException {
-    //Moving up from the Tracker by using Gets and List<KeyValue> instead
-    //of just byte []
-
-    //Expected result
-    List<MatchCode> expected = new ArrayList<ScanQueryMatcher.MatchCode>();
-    expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_COL);
-    expected.add(ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL);
-    expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_COL);
-    expected.add(ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL);
-    expected.add(ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_ROW);
-    expected.add(ScanQueryMatcher.MatchCode.DONE);
-
-    // 2,4,5
-    
+    private void _testMatch_ExplicitColumns(Scan scan, List<MatchCode> expected) throws IOException {
+    // 2,4,5    
     ScanQueryMatcher qm = new ScanQueryMatcher(scan, new ScanInfo(fam2,
         0, 1, ttl, false, 0, rowComparator), get.getFamilyMap().get(fam2),
         EnvironmentEdgeManager.currentTimeMillis() - ttl);
@@ -136,6 +122,42 @@ public class TestQueryMatcher extends HB
     }
   }
 
+  public void testMatch_ExplicitColumns()
+  throws IOException {
+    //Moving up from the Tracker by using Gets and List<KeyValue> instead
+    //of just byte []
+
+    //Expected result
+    List<MatchCode> expected = new ArrayList<ScanQueryMatcher.MatchCode>();
+    expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_COL);
+    expected.add(ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL);
+    expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_COL);
+    expected.add(ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL);
+    expected.add(ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_ROW);
+    expected.add(ScanQueryMatcher.MatchCode.DONE);
+
+    _testMatch_ExplicitColumns(scan, expected);
+  }
+
+  public void testMatch_ExplicitColumnsWithLookAhead()
+  throws IOException {
+    //Moving up from the Tracker by using Gets and List<KeyValue> instead
+    //of just byte []
+
+    //Expected result
+    List<MatchCode> expected = new ArrayList<ScanQueryMatcher.MatchCode>();
+    expected.add(ScanQueryMatcher.MatchCode.SKIP);
+    expected.add(ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL);
+    expected.add(ScanQueryMatcher.MatchCode.SKIP);
+    expected.add(ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL);
+    expected.add(ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_ROW);
+    expected.add(ScanQueryMatcher.MatchCode.DONE);
+
+    Scan s = new Scan(scan);
+    s.setAttribute(Scan.HINT_LOOKAHEAD, Bytes.toBytes(2));
+    _testMatch_ExplicitColumns(s, expected);
+  }
+
 
   public void testMatch_Wildcard()
   throws IOException {

Modified: hbase/branches/0.96/src/main/docbkx/performance.xml
URL: http://svn.apache.org/viewvc/hbase/branches/0.96/src/main/docbkx/performance.xml?rev=1576462&r1=1576461&r2=1576462&view=diff
==============================================================================
Binary files - no diff available.