You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ns...@apache.org on 2011/10/11 04:04:34 UTC

svn commit: r1181390 - in /hbase/branches/0.89: ./ src/main/java/org/apache/hadoop/hbase/ src/main/java/org/apache/hadoop/hbase/regionserver/

Author: nspiegelberg
Date: Tue Oct 11 02:04:34 2011
New Revision: 1181390

URL: http://svn.apache.org/viewvc?rev=1181390&view=rev
Log:
Import HBase-2450

Summary:
See https://issues.apache.org/jira/browse/HBASE-2450

Test Plan:
manual test

DiffCamp Revision: 163497
Reviewed By: kannan
CC: kannan, hbase@lists
Tasks:
#396683: hbase-2450: column seeking optimization

Revert Plan:
OK

Modified:
    hbase/branches/0.89/CHANGES.txt
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/HConstants.java
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/KeyValue.java
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/MemStore.java
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java

Modified: hbase/branches/0.89/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/CHANGES.txt?rev=1181390&r1=1181389&r2=1181390&view=diff
==============================================================================
--- hbase/branches/0.89/CHANGES.txt (original)
+++ hbase/branches/0.89/CHANGES.txt Tue Oct 11 02:04:34 2011
@@ -837,6 +837,9 @@ Release 0.21.0 - Unreleased
   OPTIMIZATIONS
    HBASE-410   [testing] Speed up the test suite
    HBASE-2041  Change WAL default configuration values
+   HBASE-2450  For single row reads of specific columns, seek to the
+               first column in HFiles rather than start of row
+               (Pranav via Ryan, some Ryan)
 
 
 Release 0.20.0 - Tue Sep  8 12:53:05 PDT 2009

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/HConstants.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/HConstants.java?rev=1181390&r1=1181389&r2=1181390&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/HConstants.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/HConstants.java Tue Oct 11 02:04:34 2011
@@ -254,6 +254,11 @@ public final class HConstants {
   public static final long LATEST_TIMESTAMP = Long.MAX_VALUE;
 
   /**
+   * Timestamp to use when we want to refer to the oldest cell.
+   */
+  public static final long OLDEST_TIMESTAMP = Long.MIN_VALUE;
+
+  /**
    * LATEST_TIMESTAMP in bytes form
    */
   public static final byte [] LATEST_TIMESTAMP_BYTES = Bytes.toBytes(LATEST_TIMESTAMP);

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/KeyValue.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/KeyValue.java?rev=1181390&r1=1181389&r2=1181390&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/KeyValue.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/KeyValue.java Tue Oct 11 02:04:34 2011
@@ -1632,6 +1632,31 @@ public class KeyValue implements Writabl
   }
 
   /**
+   * Create a KeyValue for the specified row, family and qualifier that would be
+   * larger than or equal to all other possible KeyValues that have the same
+   * row, family, qualifier.
+   * Used for reseeking.
+   * @param row row key
+   * @param roffset row offset
+   * @param rlength row length
+   * @param family family name
+   * @param foffset family offset
+   * @param flength family length
+   * @param qualifier column qualifier
+   * @param qoffset qualifier offset
+   * @param qlength qualifier length
+   * @return Last possible key on passed row, family, qualifier.
+   */
+  public static KeyValue createLastOnRow(final byte [] row,
+      final int roffset, final int rlength, final byte [] family,
+      final int foffset, final int flength, final byte [] qualifier,
+      final int qoffset, final int qlength) {
+    return new KeyValue(row, roffset, rlength, family,
+        foffset, flength, qualifier, qoffset, qlength,
+        HConstants.OLDEST_TIMESTAMP, Type.Minimum, null, 0, 0);
+  }
+
+  /**
    * @param b
    * @return A KeyValue made of a byte array that holds the key-only part.
    * Needed to convert hfile index members to KeyValues.

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/MemStore.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/MemStore.java?rev=1181390&r1=1181389&r2=1181390&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/MemStore.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/MemStore.java Tue Oct 11 02:04:34 2011
@@ -401,7 +401,7 @@ public class MemStore implements HeapSiz
           if (kv.getType() == KeyValue.Type.Put.getCode() &&
               kv.getMemstoreTS() == 0) {
             // false means there was a change, so give us the size.
-            addedSize -= heapSizeChange(kv, false);
+            addedSize -= heapSizeChange(kv, true);
 
             it.remove();
           }

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java?rev=1181390&r1=1181389&r2=1181390&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java Tue Oct 11 02:04:34 2011
@@ -159,16 +159,17 @@ public class ScanQueryMatcher {
         this.deletes.add(bytes, offset, qualLength, timestamp, type);
         // Can't early out now, because DelFam come before any other keys
       }
-      // May be able to optimize the SKIP here, if we matched
-      // due to a DelFam, we can skip to next row
-      // due to a DelCol, we can skip to next col
-      // But it requires more info out of isDelete().
-      // needful -> million column challenge.
       return MatchCode.SKIP;
     }
 
     if (!this.deletes.isEmpty() &&
         deletes.isDeleted(bytes, offset, qualLength, timestamp)) {
+
+      // May be able to optimize the SKIP here, if we matched
+      // due to a DelFam, we can skip to next row
+      // due to a DelCol, we can skip to next col
+      // But it requires more info out of isDelete().
+      // needful -> million column challenge.
       return MatchCode.SKIP;
     }
 
@@ -233,6 +234,8 @@ public class ScanQueryMatcher {
     if (!Bytes.equals(stopRow , HConstants.EMPTY_END_ROW) &&
         rowComparator.compareRows(kv.getBuffer(),kv.getRowOffset(),
             kv.getRowLength(), stopRow, 0, stopRow.length) >= 0) {
+      // KV >= STOPROW
+      // then NO there is nothing left.
       return false;
     } else {
       return true;
@@ -280,6 +283,28 @@ public class ScanQueryMatcher {
     }
   }
 
+  public KeyValue getKeyForNextColumn(KeyValue kv) {
+    ColumnCount nextColumn = columns.getColumnHint();
+    if (nextColumn == null) {
+      return KeyValue.createLastOnRow(
+          kv.getBuffer(), kv.getRowOffset(), kv.getRowLength(),
+          kv.getBuffer(), kv.getFamilyOffset(), kv.getFamilyLength(),
+          kv.getBuffer(), kv.getQualifierOffset(), kv.getQualifierLength());
+    } else {
+      return KeyValue.createFirstOnRow(
+          kv.getBuffer(), kv.getRowOffset(), kv.getRowLength(),
+          kv.getBuffer(), kv.getFamilyOffset(), kv.getFamilyLength(),
+          nextColumn.getBuffer(), nextColumn.getOffset(), nextColumn.getLength());
+    }
+  }
+
+  public KeyValue getKeyForNextRow(KeyValue kv) {
+    return KeyValue.createLastOnRow(
+        kv.getBuffer(), kv.getRowOffset(), kv.getRowLength(),
+        null, 0, 0,
+        null, 0, 0);
+  }
+
   /**
    * {@link #match} return codes.  These instruct the scanner moving through
    * memstores and StoreFiles what to do with the current KeyValue.

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java?rev=1181390&r1=1181389&r2=1181390&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java Tue Oct 11 02:04:34 2011
@@ -68,7 +68,8 @@ class StoreScanner implements KeyValueSc
     // pass columns = try to filter out unnecessary ScanFiles
     List<KeyValueScanner> scanners = getScanners(scan, columns);
 
-    // Seek all scanners to the initial key
+    // Seek all scanners to the start of the Row (or if the exact maching row key does not
+    // exist, then to the start of the next matching Row).
     for(KeyValueScanner scanner : scanners) {
       scanner.seek(matcher.getStartKey());
     }
@@ -256,18 +257,18 @@ class StoreScanner implements KeyValueSc
           return false;
 
         case SEEK_NEXT_ROW:
+          // This is just a relatively simple end of scan fix, to short-cut end us if there is a
+          // endKey in the scan.
           if (!matcher.moreRowsMayExistAfter(kv)) {
             outResult.addAll(results);
             return false;
           }
-          heap.next();
+
+          reseek(matcher.getKeyForNextRow(kv));
           break;
 
         case SEEK_NEXT_COL:
-          // TODO hfile needs 'hinted' seeking to prevent it from
-          // reseeking from the start of the block on every dang seek.
-          // We need that API and expose it the scanner chain.
-          heap.next();
+          reseek(matcher.getKeyForNextColumn(kv));
           break;
 
         case SKIP: