You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2015/04/16 23:56:19 UTC

hbase git commit: HBASE-13291 Making methods under ScannerV2#next inlineable and faster

Repository: hbase
Updated Branches:
  refs/heads/branch-1 3ca1e46bd -> 8166142b2


HBASE-13291 Making methods under ScannerV2#next inlineable and faster


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/8166142b
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/8166142b
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/8166142b

Branch: refs/heads/branch-1
Commit: 8166142b2e815fc6ab30c14a5a546cd242bf3b4c
Parents: 3ca1e46
Author: stack <st...@apache.org>
Authored: Thu Apr 16 14:56:08 2015 -0700
Committer: stack <st...@apache.org>
Committed: Thu Apr 16 14:56:08 2015 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hbase/util/Bytes.java     |   7 +
 .../hadoop/hbase/io/hfile/HFileReaderV2.java    | 181 +++++++++++++------
 .../hadoop/hbase/io/hfile/HFileReaderV3.java    |  57 +++---
 .../hadoop/hbase/regionserver/HStore.java       |   4 +-
 .../hbase/regionserver/RSRpcServices.java       |   5 +-
 .../hadoop/hbase/regionserver/TestTags.java     |   2 +-
 6 files changed, 175 insertions(+), 81 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/8166142b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java
----------------------------------------------------------------------
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java
index cf6a9e1..a3ebc63 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java
@@ -53,6 +53,7 @@ import sun.misc.Unsafe;
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.Lists;
+
 import org.apache.hadoop.hbase.util.Bytes.LexicographicalComparerHolder.UnsafeComparer;
 
 /**
@@ -60,6 +61,7 @@ import org.apache.hadoop.hbase.util.Bytes.LexicographicalComparerHolder.UnsafeCo
  * comparisons, hash code generation, manufacturing keys for HashMaps or
  * HashSets, etc.
  */
+@SuppressWarnings("restriction")
 @InterfaceAudience.Public
 @InterfaceStability.Stable
 public class Bytes {
@@ -116,6 +118,11 @@ public class Bytes {
    */
   public static final int SIZEOF_SHORT = Short.SIZE / Byte.SIZE;
 
+  /**
+   * Mask to apply to a long to reveal the lower int only. Use like this:
+   * int i = (int)(0xFFFFFFFF00000000l ^ some_long_value);
+   */
+  public static final long MASK_FOR_LOWER_INT_IN_LONG = 0xFFFFFFFF00000000l;
 
   /**
    * Estimate of size cost to pay beyond payload in jvm for instance of byte [].

http://git-wip-us.apache.org/repos/asf/hbase/blob/8166142b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java
index 833f851..b30d5c2 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java
@@ -39,7 +39,6 @@ import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder;
 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
 import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext;
 import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
-import org.apache.hadoop.hbase.util.ByteBufferUtils;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.IdLock;
 import org.apache.hadoop.io.WritableUtils;
@@ -70,14 +69,15 @@ public class HFileReaderV2 extends AbstractHFileReader {
    */
   public final static int KEY_VALUE_LEN_SIZE = 2 * Bytes.SIZEOF_INT;
 
-  protected boolean includesMemstoreTS = false;
+  private boolean includesMemstoreTS = false;
   protected boolean decodeMemstoreTS = false;
+
   protected boolean shouldIncludeMemstoreTS() {
     return includesMemstoreTS;
   }
 
   /** Filesystem-level block reader. */
-  protected HFileBlock.FSReader fsBlockReader;
+  private HFileBlock.FSReader fsBlockReader;
 
   /**
    * A "sparse lock" implementation allowing to lock on a particular block
@@ -104,7 +104,7 @@ public class HFileReaderV2 extends AbstractHFileReader {
   /** Minor versions starting with this number have faked index key */
   static final int MINOR_VERSION_WITH_FAKED_KEY = 3;
 
-  protected HFileContext hfileContext;
+  HFileContext hfileContext;
 
   /**
    * Opens a HFile. You must load the index before you can use it by calling
@@ -158,7 +158,8 @@ public class HFileReaderV2 extends AbstractHFileReader {
     fileInfo = new FileInfo();
     fileInfo.read(blockIter.nextBlockWithBlockType(BlockType.FILE_INFO).getByteStream());
     byte[] creationTimeBytes = fileInfo.get(FileInfo.CREATE_TIME_TS);
-    this.hfileContext.setFileCreateTime(creationTimeBytes == null ? 0 : Bytes.toLong(creationTimeBytes));
+    this.hfileContext.setFileCreateTime(creationTimeBytes == null? 0:
+      Bytes.toLong(creationTimeBytes));
     lastKey = fileInfo.get(FileInfo.LASTKEY);
     avgKeyLen = Bytes.toInt(fileInfo.get(FileInfo.AVG_KEY_LEN));
     avgValueLen = Bytes.toInt(fileInfo.get(FileInfo.AVG_VALUE_LEN));
@@ -792,16 +793,9 @@ public class HFileReaderV2 extends AbstractHFileReader {
     }
 
     /**
-     * Go to the next key/value in the block section. Loads the next block if
-     * necessary. If successful, {@link #getKey()} and {@link #getValue()} can
-     * be called.
-     *
-     * @return true if successfully navigated to the next key/value
+     * Set the position on current backing blockBuffer.
      */
-    @Override
-    public boolean next() throws IOException {
-      assertSeeked();
-
+    private void positionThisBlockBuffer() {
       try {
         blockBuffer.position(getNextCellStartPosition());
       } catch (IllegalArgumentException e) {
@@ -812,32 +806,60 @@ public class HFileReaderV2 extends AbstractHFileReader {
             + "; currBlock currBlockOffset = " + block.getOffset());
         throw e;
       }
+    }
 
-      if (blockBuffer.remaining() <= 0) {
-        long lastDataBlockOffset =
-            reader.getTrailer().getLastDataBlockOffset();
-
-        if (block.getOffset() >= lastDataBlockOffset) {
-          setNonSeekedState();
-          return false;
-        }
-
-        // read the next block
-        HFileBlock nextBlock = readNextDataBlock();
-        if (nextBlock == null) {
-          setNonSeekedState();
-          return false;
-        }
+    /**
+     * Set our selves up for the next 'next' invocation, set up next block.
+     * @return True is more to read else false if at the end.
+     * @throws IOException
+     */
+    private boolean positionForNextBlock() throws IOException {
+      // Methods are small so they get inlined because they are 'hot'.
+      long lastDataBlockOffset = reader.getTrailer().getLastDataBlockOffset();
+      if (block.getOffset() >= lastDataBlockOffset) {
+        setNonSeekedState();
+        return false;
+      }
+      return isNextBlock();
+    }
 
-        updateCurrBlock(nextBlock);
-        return true;
+    private boolean isNextBlock() throws IOException {
+      // Methods are small so they get inlined because they are 'hot'.
+      HFileBlock nextBlock = readNextDataBlock();
+      if (nextBlock == null) {
+        setNonSeekedState();
+        return false;
       }
+      updateCurrBlock(nextBlock);
+      return true;
+    }
 
+    private final boolean _next() throws IOException {
+      // Small method so can be inlined. It is a hot one.
+      if (blockBuffer.remaining() <= 0) {
+        return positionForNextBlock();
+      }
       // We are still in the same block.
       readKeyValueLen();
       return true;
     }
 
+    /**
+     * Go to the next key/value in the block section. Loads the next block if
+     * necessary. If successful, {@link #getKey()} and {@link #getValue()} can
+     * be called.
+     *
+     * @return true if successfully navigated to the next key/value
+     */
+    @Override
+    public boolean next() throws IOException {
+      // This is a hot method so extreme measures taken to ensure it is small and inlineable.
+      // Checked by setting: -XX:+UnlockDiagnosticVMOptions -XX:+PrintInlining -XX:+PrintCompilation
+      assertSeeked();
+      positionThisBlockBuffer();
+      return _next();
+    }
+
     protected int getNextCellStartPosition() {
       return blockBuffer.position() + KEY_VALUE_LEN_SIZE + currKeyLen + currValueLen
           + currMemstoreTSLen;
@@ -918,34 +940,87 @@ public class HFileReaderV2 extends AbstractHFileReader {
       this.nextIndexedKey = null;
     }
 
+    /**
+     * @param v
+     * @return True if v < 0 or v > current block buffer limit.
+     */
+    protected final boolean checkLen(final int v) {
+      return v < 0 || v > this.blockBuffer.limit();
+    }
+
+    /**
+     * Check key and value lengths are wholesome.
+     */
+    protected final void checkKeyValueLen() {
+      if (checkLen(this.currKeyLen) || checkLen(this.currValueLen)) {
+        throw new IllegalStateException("Invalid currKeyLen " + this.currKeyLen +
+          " or currValueLen " + this.currValueLen + ". Block offset: " + block.getOffset() +
+          ", block length: " + this.blockBuffer.limit() + ", position: " +
+           this.blockBuffer.position() + " (without header).");
+      }
+    }
+
     protected void readKeyValueLen() {
-      blockBuffer.mark();
-      currKeyLen = blockBuffer.getInt();
-      currValueLen = blockBuffer.getInt();
-      ByteBufferUtils.skip(blockBuffer, currKeyLen + currValueLen);
-      readMvccVersion();
-      if (currKeyLen < 0 || currValueLen < 0
-          || currKeyLen > blockBuffer.limit()
-          || currValueLen > blockBuffer.limit()) {
-        throw new IllegalStateException("Invalid currKeyLen " + currKeyLen
-            + " or currValueLen " + currValueLen + ". Block offset: "
-            + block.getOffset() + ", block length: " + blockBuffer.limit()
-            + ", position: " + blockBuffer.position() + " (without header).");
+      // TODO: METHOD (mostly) DUPLICATED IN V3!!!!! FIXED in master branch by collapsing v3 and v2.
+      // This is a hot method. We go out of our way to make this method short so it can be
+      // inlined and is not too big to compile. We also manage position in ByteBuffer ourselves
+      // because it is faster than going via range-checked ByteBuffer methods or going through a
+      // byte buffer array a byte at a time.
+      int p = blockBuffer.position() + blockBuffer.arrayOffset();
+      // Get a long at a time rather than read two individual ints. In micro-benchmarking, even
+      // with the extra bit-fiddling, this is order-of-magnitude faster than getting two ints.
+      long ll = Bytes.toLong(blockBuffer.array(), p);
+      // Read top half as an int of key length and bottom int as value length
+      this.currKeyLen = (int)(ll >> Integer.SIZE);
+      this.currValueLen = (int)(Bytes.MASK_FOR_LOWER_INT_IN_LONG ^ ll);
+      checkKeyValueLen();
+      // Move position past the key and value lengths and then beyond the key and value
+      p += (Bytes.SIZEOF_LONG + currKeyLen + currValueLen);
+      readMvccVersion(p);
+    }
+
+    /**
+     * Read mvcc. Does checks to see if we even need to read the mvcc at all.
+     * @param position
+     */
+    protected void readMvccVersion(final int position) {
+      // See if we even need to decode mvcc.
+      if (!this.reader.shouldIncludeMemstoreTS()) return;
+      if (!this.reader.decodeMemstoreTS) {
+        currMemstoreTS = 0;
+        currMemstoreTSLen = 1;
+        return;
       }
-      blockBuffer.reset();
+      _readMvccVersion(position);
     }
 
-    protected void readMvccVersion() {
-      if (this.reader.shouldIncludeMemstoreTS()) {
-        if (this.reader.decodeMemstoreTS) {
-          currMemstoreTS = Bytes.readAsVLong(blockBuffer.array(), blockBuffer.arrayOffset()
-              + blockBuffer.position());
-          currMemstoreTSLen = WritableUtils.getVIntSize(currMemstoreTS);
-        } else {
-          currMemstoreTS = 0;
-          currMemstoreTSLen = 1;
+    /**
+     * Actually do the mvcc read. Does no checks.
+     * @param position
+     */
+    private void _readMvccVersion(final int position) {
+      // This is Bytes#bytesToVint inlined so can save a few instructions in this hot method; i.e.
+      // previous if one-byte vint, we'd redo the vint call to find int size.
+      // Also the method is kept small so can be inlined.
+      byte firstByte = blockBuffer.array()[position];
+      int len = WritableUtils.decodeVIntSize(firstByte);
+      if (len == 1) {
+        this.currMemstoreTS = firstByte;
+      } else {
+        long i = 0;
+        for (int idx = 0; idx < len - 1; idx++) {
+          byte b = blockBuffer.array()[position + 1 + idx];
+          i = i << 8;
+          i = i | (b & 0xFF);
         }
+        currMemstoreTS = (WritableUtils.isNegativeVInt(firstByte) ? ~i : i);
       }
+      this.currMemstoreTSLen = len;
+    }
+
+    protected void readMvccVersion() {
+      // TODO CLEANUP!!!
+      readMvccVersion(blockBuffer.arrayOffset() + blockBuffer.position());
     }
 
     /**

http://git-wip-us.apache.org/repos/asf/hbase/blob/8166142b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV3.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV3.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV3.java
index 13a8aef..f881ff5a 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV3.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV3.java
@@ -69,7 +69,8 @@ public class HFileReaderV3 extends HFileReaderV2 {
    * @param conf
    *          Configuration
    */
-  public HFileReaderV3(final Path path, FixedFileTrailer trailer, final FSDataInputStreamWrapper fsdis,
+  public HFileReaderV3(final Path path, FixedFileTrailer trailer,
+      final FSDataInputStreamWrapper fsdis,
       final long size, final CacheConfig cacheConf, final HFileSystem hfs,
       final Configuration conf) throws IOException {
     super(path, trailer, fsdis, size, cacheConf, hfs, conf);
@@ -89,7 +90,7 @@ public class HFileReaderV3 extends HFileReaderV2 {
       HFileSystem hfs, Path path, FixedFileTrailer trailer) throws IOException {
     trailer.expectMajorVersion(3);
     HFileContextBuilder builder = new HFileContextBuilder()
-      .withIncludesMvcc(this.includesMemstoreTS)
+      .withIncludesMvcc(shouldIncludeMemstoreTS())
       .withHBaseCheckSum(true)
       .withCompression(this.compressAlgo);
 
@@ -203,30 +204,37 @@ public class HFileReaderV3 extends HFileReaderV2 {
       return nextKvPos;
     }
 
-    protected void readKeyValueLen() {
-      blockBuffer.mark();
-      currKeyLen = blockBuffer.getInt();
-      currValueLen = blockBuffer.getInt();
-      if (currKeyLen < 0 || currValueLen < 0 || currKeyLen > blockBuffer.limit()
-          || currValueLen > blockBuffer.limit()) {
-        throw new IllegalStateException("Invalid currKeyLen " + currKeyLen + " or currValueLen "
-            + currValueLen + ". Block offset: "
-            + block.getOffset() + ", block length: " + blockBuffer.limit() + ", position: "
-            + blockBuffer.position() + " (without header).");
+    private final void checkTagsLen() {
+      if (checkLen(this.currTagsLen)) {
+        throw new IllegalStateException("Invalid currTagsLen " + this.currTagsLen +
+          ". Block offset: " + block.getOffset() + ", block length: " + this.blockBuffer.limit() +
+          ", position: " + this.blockBuffer.position() + " (without header).");
       }
-      ByteBufferUtils.skip(blockBuffer, currKeyLen + currValueLen);
+    }
+
+    protected final void readKeyValueLen() {
+      // TODO: METHOD (mostly) DUPLICATED IN V2!!!! FIXED in master branch by collapsing v3 and v2.
+      // This is a hot method. We go out of our way to make this method short so it can be
+      // inlined and is not too big to compile. We also manage position in ByteBuffer ourselves
+      // because it is faster than going via range-checked ByteBuffer methods or going through a
+      // byte buffer array a byte at a time.
+      int p = blockBuffer.position() + blockBuffer.arrayOffset();
+      // Get a long at a time rather than read two individual ints. In micro-benchmarking, even
+      // with the extra bit-fiddling, this is order-of-magnitude faster than getting two ints.
+      long ll = Bytes.toLong(blockBuffer.array(), p);
+      // Read top half as an int of key length and bottom int as value length
+      this.currKeyLen = (int)(ll >> Integer.SIZE);
+      this.currValueLen = (int)(Bytes.MASK_FOR_LOWER_INT_IN_LONG ^ ll);
+      checkKeyValueLen();
+      // Move position past the key and value lengths and then beyond the key and value
+      p += (Bytes.SIZEOF_LONG + currKeyLen + currValueLen);
       if (reader.hfileContext.isIncludesTags()) {
-        // Read short as unsigned, high byte first
-        currTagsLen = ((blockBuffer.get() & 0xff) << 8) ^ (blockBuffer.get() & 0xff);
-        if (currTagsLen < 0 || currTagsLen > blockBuffer.limit()) {
-          throw new IllegalStateException("Invalid currTagsLen " + currTagsLen + ". Block offset: "
-              + block.getOffset() + ", block length: " + blockBuffer.limit() + ", position: "
-              + blockBuffer.position() + " (without header).");
-        }
-        ByteBufferUtils.skip(blockBuffer, currTagsLen);
+        // Tags length is a short.
+        this.currTagsLen = Bytes.toShort(blockBuffer.array(), p);
+        checkTagsLen();
+        p += (Bytes.SIZEOF_SHORT + currTagsLen);
       }
-      readMvccVersion();
-      blockBuffer.reset();
+      readMvccVersion(p);
     }
 
     /**
@@ -284,7 +292,8 @@ public class HFileReaderV3 extends HFileReaderV2 {
           }
         }
         blockBuffer.reset();
-        int keyOffset = blockBuffer.arrayOffset() + blockBuffer.position() + (Bytes.SIZEOF_INT * 2);
+        int keyOffset =
+          blockBuffer.arrayOffset() + blockBuffer.position() + (Bytes.SIZEOF_INT * 2);
         keyOnlyKv.setKey(blockBuffer.array(), keyOffset, klen);
         int comp = reader.getComparator().compareOnlyKeyPortion(key, keyOnlyKv);
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/8166142b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
index 67c261c..c647301 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
@@ -1757,8 +1757,8 @@ public class HStore implements Store {
    * @return true if the cell is expired
    */
   static boolean isCellTTLExpired(final Cell cell, final long oldestTimestamp, final long now) {
-    // Do not create an Iterator or Tag objects unless the cell actually has
-    // tags
+    // Do not create an Iterator or Tag objects unless the cell actually has tags.
+    // TODO: This check for tags is really expensive. We decode an int for key and value. Costs.
     if (cell.getTagsLength() > 0) {
       // Look for a TTL tag first. Use it instead of the family setting if
       // found. If a cell has multiple TTLs, resolve the conflict by using the

http://git-wip-us.apache.org/repos/asf/hbase/blob/8166142b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
index 1c9a22a..7949198 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
@@ -2249,7 +2249,10 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
             if (maxResultSize <= 0) {
               maxResultSize = maxQuotaResultSize;
             }
-            List<Cell> values = new ArrayList<Cell>();
+            // This is cells inside a row. Default size is 10 so if many versions or many cfs,
+            // then we'll resize. Resizings show in profiler. Set it higher than 10. For now
+            // arbitrary 32. TODO: keep record of general size of results being returned.
+            List<Cell> values = new ArrayList<Cell>(32);
             region.startRegionOperation(Operation.SCAN);
             try {
               int i = 0;

http://git-wip-us.apache.org/repos/asf/hbase/blob/8166142b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestTags.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestTags.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestTags.java
index 581b987..94e2028 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestTags.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestTags.java
@@ -278,7 +278,7 @@ public class TestTags {
         Put put = new Put(row);
         byte[] value = Bytes.toBytes("value");
         put.add(fam, qual, HConstants.LATEST_TIMESTAMP, value);
-        int bigTagLen = Short.MAX_VALUE + 5;
+        int bigTagLen = Short.MAX_VALUE - 5;
         put.setAttribute("visibility", new byte[bigTagLen]);
         table.put(put);
         Put put1 = new Put(row1);