You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by zh...@apache.org on 2023/08/15 15:24:59 UTC

[hbase] branch branch-2.4 updated: HBASE-28012 Avoid CellUtil.cloneRow in BufferedEncodedSeeker (#5347)

This is an automated email from the ASF dual-hosted git repository.

zhangduo pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2.4 by this push:
     new 9aa8b9b6ae7 HBASE-28012 Avoid CellUtil.cloneRow in BufferedEncodedSeeker (#5347)
9aa8b9b6ae7 is described below

commit 9aa8b9b6ae71b318388214f4999a57d15e78092c
Author: jbewing <ew...@northeastern.edu>
AuthorDate: Tue Aug 15 10:28:13 2023 -0400

    HBASE-28012 Avoid CellUtil.cloneRow in BufferedEncodedSeeker (#5347)
    
    Signed-off-by: Duo Zhang <zh...@apache.org>
    (cherry picked from commit 2fb2ae152a2d8aedda154bb1cf0bc72bf6e6b09c)
---
 .../io/encoding/BufferedDataBlockEncoder.java      | 157 ++++++++++++++++++---
 .../apache/hadoop/hbase/util/ByteBufferUtils.java  |  24 ++++
 .../hbase/io/encoding/TestDataBlockEncoders.java   |  65 +++++++++
 3 files changed, 223 insertions(+), 23 deletions(-)

diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java
index 15a98663588..eba77013d4a 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java
@@ -88,21 +88,100 @@ abstract class BufferedDataBlockEncoder extends AbstractDataBlockEncoder {
   // Having this as static is fine but if META is having DBE then we should
   // change this.
   public static int compareCommonRowPrefix(Cell left, Cell right, int rowCommonPrefix) {
-    return Bytes.compareTo(left.getRowArray(), left.getRowOffset() + rowCommonPrefix,
-      left.getRowLength() - rowCommonPrefix, right.getRowArray(),
-      right.getRowOffset() + rowCommonPrefix, right.getRowLength() - rowCommonPrefix);
+    if (left instanceof ByteBufferExtendedCell) {
+      ByteBufferExtendedCell bbLeft = (ByteBufferExtendedCell) left;
+      if (right instanceof ByteBufferExtendedCell) {
+        ByteBufferExtendedCell bbRight = (ByteBufferExtendedCell) right;
+        return ByteBufferUtils.compareTo(bbLeft.getRowByteBuffer(),
+          bbLeft.getRowPosition() + rowCommonPrefix, left.getRowLength() - rowCommonPrefix,
+          bbRight.getRowByteBuffer(), bbRight.getRowPosition() + rowCommonPrefix,
+          right.getRowLength() - rowCommonPrefix);
+      } else {
+        return ByteBufferUtils.compareTo(bbLeft.getRowByteBuffer(),
+          bbLeft.getRowPosition() + rowCommonPrefix, left.getRowLength() - rowCommonPrefix,
+          right.getRowArray(), right.getRowOffset() + rowCommonPrefix,
+          right.getRowLength() - rowCommonPrefix);
+      }
+    } else {
+      if (right instanceof ByteBufferExtendedCell) {
+        ByteBufferExtendedCell bbRight = (ByteBufferExtendedCell) right;
+        return ByteBufferUtils.compareTo(left.getRowArray(), left.getRowOffset() + rowCommonPrefix,
+          left.getRowLength() - rowCommonPrefix, bbRight.getRowByteBuffer(),
+          bbRight.getRowPosition() + rowCommonPrefix, right.getRowLength() - rowCommonPrefix);
+      } else {
+        return Bytes.compareTo(left.getRowArray(), left.getRowOffset() + rowCommonPrefix,
+          left.getRowLength() - rowCommonPrefix, right.getRowArray(),
+          right.getRowOffset() + rowCommonPrefix, right.getRowLength() - rowCommonPrefix);
+      }
+    }
   }
 
   public static int compareCommonFamilyPrefix(Cell left, Cell right, int familyCommonPrefix) {
-    return Bytes.compareTo(left.getFamilyArray(), left.getFamilyOffset() + familyCommonPrefix,
-      left.getFamilyLength() - familyCommonPrefix, right.getFamilyArray(),
-      right.getFamilyOffset() + familyCommonPrefix, right.getFamilyLength() - familyCommonPrefix);
+    if (left instanceof ByteBufferExtendedCell) {
+      ByteBufferExtendedCell bbLeft = (ByteBufferExtendedCell) left;
+      if (right instanceof ByteBufferExtendedCell) {
+        ByteBufferExtendedCell bbRight = (ByteBufferExtendedCell) right;
+        return ByteBufferUtils.compareTo(bbLeft.getFamilyByteBuffer(),
+          bbLeft.getFamilyPosition() + familyCommonPrefix,
+          left.getFamilyLength() - familyCommonPrefix, bbRight.getFamilyByteBuffer(),
+          bbRight.getFamilyPosition() + familyCommonPrefix,
+          right.getFamilyLength() - familyCommonPrefix);
+      } else {
+        return ByteBufferUtils.compareTo(bbLeft.getFamilyByteBuffer(),
+          bbLeft.getFamilyPosition() + familyCommonPrefix,
+          left.getFamilyLength() - familyCommonPrefix, right.getFamilyArray(),
+          right.getFamilyOffset() + familyCommonPrefix,
+          right.getFamilyLength() - familyCommonPrefix);
+      }
+    } else {
+      if (right instanceof ByteBufferExtendedCell) {
+        ByteBufferExtendedCell bbRight = (ByteBufferExtendedCell) right;
+        return ByteBufferUtils.compareTo(left.getFamilyArray(),
+          left.getFamilyOffset() + familyCommonPrefix, left.getFamilyLength() - familyCommonPrefix,
+          bbRight.getFamilyByteBuffer(), bbRight.getFamilyPosition() + familyCommonPrefix,
+          right.getFamilyLength() - familyCommonPrefix);
+      } else {
+        return Bytes.compareTo(left.getFamilyArray(), left.getFamilyOffset() + familyCommonPrefix,
+          left.getFamilyLength() - familyCommonPrefix, right.getFamilyArray(),
+          right.getFamilyOffset() + familyCommonPrefix,
+          right.getFamilyLength() - familyCommonPrefix);
+      }
+    }
   }
 
   public static int compareCommonQualifierPrefix(Cell left, Cell right, int qualCommonPrefix) {
-    return Bytes.compareTo(left.getQualifierArray(), left.getQualifierOffset() + qualCommonPrefix,
-      left.getQualifierLength() - qualCommonPrefix, right.getQualifierArray(),
-      right.getQualifierOffset() + qualCommonPrefix, right.getQualifierLength() - qualCommonPrefix);
+    if (left instanceof ByteBufferExtendedCell) {
+      ByteBufferExtendedCell bbLeft = (ByteBufferExtendedCell) left;
+      if (right instanceof ByteBufferExtendedCell) {
+        ByteBufferExtendedCell bbRight = (ByteBufferExtendedCell) right;
+        return ByteBufferUtils.compareTo(bbLeft.getQualifierByteBuffer(),
+          bbLeft.getQualifierPosition() + qualCommonPrefix,
+          left.getQualifierLength() - qualCommonPrefix, bbRight.getQualifierByteBuffer(),
+          bbRight.getQualifierPosition() + qualCommonPrefix,
+          right.getQualifierLength() - qualCommonPrefix);
+      } else {
+        return ByteBufferUtils.compareTo(bbLeft.getQualifierByteBuffer(),
+          bbLeft.getQualifierPosition() + qualCommonPrefix,
+          left.getQualifierLength() - qualCommonPrefix, right.getQualifierArray(),
+          right.getQualifierOffset() + qualCommonPrefix,
+          right.getQualifierLength() - qualCommonPrefix);
+      }
+    } else {
+      if (right instanceof ByteBufferExtendedCell) {
+        ByteBufferExtendedCell bbRight = (ByteBufferExtendedCell) right;
+        return ByteBufferUtils.compareTo(left.getQualifierArray(),
+          left.getQualifierOffset() + qualCommonPrefix,
+          left.getQualifierLength() - qualCommonPrefix, bbRight.getQualifierByteBuffer(),
+          bbRight.getQualifierPosition() + qualCommonPrefix,
+          right.getQualifierLength() - qualCommonPrefix);
+      } else {
+        return Bytes.compareTo(left.getQualifierArray(),
+          left.getQualifierOffset() + qualCommonPrefix,
+          left.getQualifierLength() - qualCommonPrefix, right.getQualifierArray(),
+          right.getQualifierOffset() + qualCommonPrefix,
+          right.getQualifierLength() - qualCommonPrefix);
+      }
+    }
   }
 
   protected static class SeekerState {
@@ -951,25 +1030,57 @@ abstract class BufferedDataBlockEncoder extends AbstractDataBlockEncoder {
       return 0;
     }
 
-    private static int findCommonPrefixInRowPart(Cell left, Cell right, int rowCommonPrefix) {
-      return Bytes.findCommonPrefix(left.getRowArray(), right.getRowArray(),
-        left.getRowLength() - rowCommonPrefix, right.getRowLength() - rowCommonPrefix,
-        left.getRowOffset() + rowCommonPrefix, right.getRowOffset() + rowCommonPrefix);
+    // These findCommonPrefix* methods rely on the fact that keyOnlyKv is the "right" cell argument
+    // and always on-heap
+
+    private static int findCommonPrefixInRowPart(Cell left, KeyValue.KeyOnlyKeyValue right,
+      int rowCommonPrefix) {
+      if (left instanceof ByteBufferExtendedCell) {
+        ByteBufferExtendedCell bbLeft = (ByteBufferExtendedCell) left;
+        return ByteBufferUtils.findCommonPrefix(bbLeft.getRowByteBuffer(),
+          bbLeft.getRowPosition() + rowCommonPrefix, left.getRowLength() - rowCommonPrefix,
+          right.getRowArray(), right.getRowOffset() + rowCommonPrefix,
+          right.getRowLength() - rowCommonPrefix);
+      } else {
+        return Bytes.findCommonPrefix(left.getRowArray(), right.getRowArray(),
+          left.getRowLength() - rowCommonPrefix, right.getRowLength() - rowCommonPrefix,
+          left.getRowOffset() + rowCommonPrefix, right.getRowOffset() + rowCommonPrefix);
+      }
     }
 
-    private static int findCommonPrefixInFamilyPart(Cell left, Cell right, int familyCommonPrefix) {
-      return Bytes.findCommonPrefix(left.getFamilyArray(), right.getFamilyArray(),
-        left.getFamilyLength() - familyCommonPrefix, right.getFamilyLength() - familyCommonPrefix,
-        left.getFamilyOffset() + familyCommonPrefix, right.getFamilyOffset() + familyCommonPrefix);
+    private static int findCommonPrefixInFamilyPart(Cell left, KeyValue.KeyOnlyKeyValue right,
+      int familyCommonPrefix) {
+      if (left instanceof ByteBufferExtendedCell) {
+        ByteBufferExtendedCell bbLeft = (ByteBufferExtendedCell) left;
+        return ByteBufferUtils.findCommonPrefix(bbLeft.getFamilyByteBuffer(),
+          bbLeft.getFamilyPosition() + familyCommonPrefix,
+          left.getFamilyLength() - familyCommonPrefix, right.getFamilyArray(),
+          right.getFamilyOffset() + familyCommonPrefix,
+          right.getFamilyLength() - familyCommonPrefix);
+      } else {
+        return Bytes.findCommonPrefix(left.getFamilyArray(), right.getFamilyArray(),
+          left.getFamilyLength() - familyCommonPrefix, right.getFamilyLength() - familyCommonPrefix,
+          left.getFamilyOffset() + familyCommonPrefix,
+          right.getFamilyOffset() + familyCommonPrefix);
+      }
     }
 
-    private static int findCommonPrefixInQualifierPart(Cell left, Cell right,
+    private static int findCommonPrefixInQualifierPart(Cell left, KeyValue.KeyOnlyKeyValue right,
       int qualifierCommonPrefix) {
-      return Bytes.findCommonPrefix(left.getQualifierArray(), right.getQualifierArray(),
-        left.getQualifierLength() - qualifierCommonPrefix,
-        right.getQualifierLength() - qualifierCommonPrefix,
-        left.getQualifierOffset() + qualifierCommonPrefix,
-        right.getQualifierOffset() + qualifierCommonPrefix);
+      if (left instanceof ByteBufferExtendedCell) {
+        ByteBufferExtendedCell bbLeft = (ByteBufferExtendedCell) left;
+        return ByteBufferUtils.findCommonPrefix(bbLeft.getQualifierByteBuffer(),
+          bbLeft.getQualifierPosition() + qualifierCommonPrefix,
+          left.getQualifierLength() - qualifierCommonPrefix, right.getQualifierArray(),
+          right.getQualifierOffset() + qualifierCommonPrefix,
+          right.getQualifierLength() - qualifierCommonPrefix);
+      } else {
+        return Bytes.findCommonPrefix(left.getQualifierArray(), right.getQualifierArray(),
+          left.getQualifierLength() - qualifierCommonPrefix,
+          right.getQualifierLength() - qualifierCommonPrefix,
+          left.getQualifierOffset() + qualifierCommonPrefix,
+          right.getQualifierOffset() + qualifierCommonPrefix);
+      }
     }
 
     private void moveToPrevious() {
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java
index 6c92ac1aac4..0ff639f021f 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java
@@ -796,6 +796,30 @@ public final class ByteBufferUtils {
     return result;
   }
 
+  /**
+   * Find length of common prefix in two arrays.
+   * @param left        ByteBuffer to be compared.
+   * @param leftOffset  Offset in left ByteBuffer.
+   * @param leftLength  Length of left ByteBuffer.
+   * @param right       Array to be compared
+   * @param rightOffset Offset in right Array.
+   * @param rightLength Length of right Array.
+   */
+  public static int findCommonPrefix(ByteBuffer left, int leftOffset, int leftLength, byte[] right,
+    int rightOffset, int rightLength) {
+    int length = Math.min(leftLength, rightLength);
+    int result = 0;
+
+    while (
+      result < length
+        && ByteBufferUtils.toByte(left, leftOffset + result) == right[rightOffset + result]
+    ) {
+      result++;
+    }
+
+    return result;
+  }
+
   /**
    * Check whether two parts in the same buffer are equal.
    * @param buffer      In which buffer there are parts
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestDataBlockEncoders.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestDataBlockEncoders.java
index 9402c762f34..dc5c5e326f0 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestDataBlockEncoders.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestDataBlockEncoders.java
@@ -32,6 +32,7 @@ import java.util.List;
 import java.util.Random;
 import java.util.concurrent.ThreadLocalRandom;
 import org.apache.hadoop.hbase.ArrayBackedTag;
+import org.apache.hadoop.hbase.ByteBufferKeyValue;
 import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.CellComparatorImpl;
 import org.apache.hadoop.hbase.CellUtil;
@@ -227,6 +228,59 @@ public class TestDataBlockEncoders {
     LOG.info("Done");
   }
 
+  @Test
+  public void testSeekingToOffHeapKeyValueInSample() throws IOException {
+    List<KeyValue> sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV, includesTags);
+
+    // create all seekers
+    List<DataBlockEncoder.EncodedSeeker> encodedSeekers = new ArrayList<>();
+    for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
+      LOG.info("Encoding: " + encoding);
+      DataBlockEncoder encoder = encoding.getEncoder();
+      if (encoder == null) {
+        continue;
+      }
+      LOG.info("Encoder: " + encoder);
+      ByteBuffer encodedBuffer = encodeKeyValues(encoding, sampleKv,
+        getEncodingContext(Compression.Algorithm.NONE, encoding), this.useOffheapData);
+      HFileContext meta =
+        new HFileContextBuilder().withHBaseCheckSum(false).withIncludesMvcc(includesMemstoreTS)
+          .withIncludesTags(includesTags).withCompression(Compression.Algorithm.NONE).build();
+      DataBlockEncoder.EncodedSeeker seeker =
+        encoder.createSeeker(encoder.newDataBlockDecodingContext(meta));
+      seeker.setCurrentBuffer(new SingleByteBuff(encodedBuffer));
+      encodedSeekers.add(seeker);
+    }
+    LOG.info("Testing it!");
+    // test it!
+    // try a few random seeks
+    Random rand = ThreadLocalRandom.current();
+    for (boolean seekBefore : new boolean[] { false, true }) {
+      for (int i = 0; i < NUM_RANDOM_SEEKS; ++i) {
+        int keyValueId;
+        if (!seekBefore) {
+          keyValueId = rand.nextInt(sampleKv.size());
+        } else {
+          keyValueId = rand.nextInt(sampleKv.size() - 1) + 1;
+        }
+
+        KeyValue keyValue = sampleKv.get(keyValueId);
+        checkSeekingConsistency(encodedSeekers, seekBefore, buildOffHeapKeyValue(keyValue));
+      }
+    }
+
+    // check edge cases
+    LOG.info("Checking edge cases");
+    checkSeekingConsistency(encodedSeekers, false, sampleKv.get(0));
+    for (boolean seekBefore : new boolean[] { false, true }) {
+      checkSeekingConsistency(encodedSeekers, seekBefore, sampleKv.get(sampleKv.size() - 1));
+      KeyValue midKv = sampleKv.get(sampleKv.size() / 2);
+      Cell lastMidKv = PrivateCellUtil.createLastOnRowCol(midKv);
+      checkSeekingConsistency(encodedSeekers, seekBefore, lastMidKv);
+    }
+    LOG.info("Done");
+  }
+
   static ByteBuffer encodeKeyValues(DataBlockEncoding encoding, List<KeyValue> kvs,
     HFileBlockEncodingContext encodingContext, boolean useOffheapData) throws IOException {
     DataBlockEncoder encoder = encoding.getEncoder();
@@ -436,4 +490,15 @@ public class TestDataBlockEncoders {
     assertEquals("Encoding -> decoding gives different results for " + encoder,
       Bytes.toStringBinary(unencodedDataBuf), Bytes.toStringBinary(actualDataset));
   }
+
+  private static ByteBufferKeyValue buildOffHeapKeyValue(KeyValue keyValue) throws IOException {
+    ByteArrayOutputStream out = new ByteArrayOutputStream();
+    keyValue.write(out, false);
+    byte[] bytes = out.toByteArray();
+    ByteBuffer bb = ByteBuffer.allocateDirect(bytes.length);
+    bb.put(bytes);
+    bb.flip();
+
+    return new ByteBufferKeyValue(bb, 0, bytes.length);
+  }
 }