You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by bb...@apache.org on 2023/11/09 14:26:57 UTC

(hbase) branch branch-3 updated: HBASE-28043 Reduce seeks from beginning of block in StoreFileScanner.seekToPreviousRow (#5373)

This is an automated email from the ASF dual-hosted git repository.

bbeaudreault pushed a commit to branch branch-3
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-3 by this push:
     new 361bd5175f5 HBASE-28043 Reduce seeks from beginning of block in StoreFileScanner.seekToPreviousRow (#5373)
361bd5175f5 is described below

commit 361bd5175f53017c557ef47b6d6ee4c068753983
Author: jbewing <jb...@live.com>
AuthorDate: Thu Nov 9 09:14:02 2023 -0500

    HBASE-28043 Reduce seeks from beginning of block in StoreFileScanner.seekToPreviousRow (#5373)
    
    Signed-off-by: Bryan Beaudreault <bb...@apache.org>
    Signed-off-by: Wellington Chevreuil <wc...@apache.org>
---
 .../apache/hadoop/hbase/PerformanceEvaluation.java |  45 ++++
 .../hadoop/hbase/regionserver/StoreFileReader.java |   4 +-
 .../hbase/regionserver/StoreFileScanner.java       | 240 +++++++++++++++++----
 .../hadoop/hbase/regionserver/TestHStoreFile.java  |   4 +-
 4 files changed, 251 insertions(+), 42 deletions(-)

diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java
index 8fd2d5f7fb2..e0040c1f178 100644
--- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java
@@ -185,6 +185,8 @@ public class PerformanceEvaluation extends Configured implements Tool {
     addCommandDescriptor(MetaWriteTest.class, "metaWrite",
       "Populate meta table;used with 1 thread; to be cleaned up by cleanMeta");
     addCommandDescriptor(ScanTest.class, "scan", "Run scan test (read every row)");
+    addCommandDescriptor(ReverseScanTest.class, "reverseScan",
+      "Run reverse scan test (read every row)");
     addCommandDescriptor(FilteredScanTest.class, "filterScan",
       "Run scan test using a filter to find a specific row based on it's value "
         + "(make sure to use --rows=20)");
@@ -2104,6 +2106,49 @@ public class PerformanceEvaluation extends Configured implements Tool {
     }
   }
 
+  static class ReverseScanTest extends TableTest {
+    private ResultScanner testScanner;
+
+    ReverseScanTest(Connection con, TestOptions options, Status status) {
+      super(con, options, status);
+    }
+
+    @Override
+    void testTakedown() throws IOException {
+      if (this.testScanner != null) {
+        this.testScanner.close();
+      }
+      super.testTakedown();
+    }
+
+    @Override
+    boolean testRow(final int i, final long startTime) throws IOException {
+      if (this.testScanner == null) {
+        Scan scan = new Scan().setCaching(opts.caching).setCacheBlocks(opts.cacheBlocks)
+          .setAsyncPrefetch(opts.asyncPrefetch).setReadType(opts.scanReadType)
+          .setScanMetricsEnabled(true).setReversed(true);
+        for (int family = 0; family < opts.families; family++) {
+          byte[] familyName = Bytes.toBytes(FAMILY_NAME_BASE + family);
+          if (opts.addColumns) {
+            for (int column = 0; column < opts.columns; column++) {
+              byte[] qualifier = column == 0 ? COLUMN_ZERO : Bytes.toBytes("" + column);
+              scan.addColumn(familyName, qualifier);
+            }
+          } else {
+            scan.addFamily(familyName);
+          }
+        }
+        if (opts.filterAll) {
+          scan.setFilter(new FilterAllFilter());
+        }
+        this.testScanner = table.getScanner(scan);
+      }
+      Result r = testScanner.next();
+      updateValueSize(r);
+      return true;
+    }
+  }
+
   /**
    * Base class for operations that are CAS-like; that read a value and then set it based off what
    * they read. In this category is increment, append, checkAndPut, etc.
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileReader.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileReader.java
index 72e93c3f75a..09c379227bd 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileReader.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileReader.java
@@ -37,6 +37,7 @@ import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.PrivateCellUtil;
 import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.io.TimeRange;
+import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
 import org.apache.hadoop.hbase.io.hfile.BlockType;
 import org.apache.hadoop.hbase.io.hfile.BloomFilterMetrics;
 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
@@ -146,7 +147,8 @@ public class StoreFileReader {
   public StoreFileScanner getStoreFileScanner(boolean cacheBlocks, boolean pread,
     boolean isCompaction, long readPt, long scannerOrder, boolean canOptimizeForNonNullColumn) {
     return new StoreFileScanner(this, getScanner(cacheBlocks, pread, isCompaction), !isCompaction,
-      reader.hasMVCCInfo(), readPt, scannerOrder, canOptimizeForNonNullColumn);
+      reader.hasMVCCInfo(), readPt, scannerOrder, canOptimizeForNonNullColumn,
+      reader.getDataBlockEncoding() == DataBlockEncoding.ROW_INDEX_V1);
   }
 
   /**
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java
index 5e666659c02..fd941de4df8 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java
@@ -61,6 +61,13 @@ public class StoreFileScanner implements KeyValueScanner {
   // A flag represents whether could stop skipping KeyValues for MVCC
   // if have encountered the next row. Only used for reversed scan
   private boolean stopSkippingKVsIfNextRow = false;
+  // A Cell that represents the row before the most previously seeked to row in seekToPreviousRow
+  private Cell previousRow = null;
+  // Whether the underlying HFile is using a data block encoding that has lower cost for seeking to
+  // a row from the beginning of a block (i.e. RIV1). If the data block encoding has a high cost for
+  // seeks, then we can use a modified reverse scanning algorithm to reduce seeks from the beginning
+  // of the block
+  private final boolean isFastSeekingEncoding;
 
   private static LongAdder seekCount;
 
@@ -83,9 +90,13 @@ public class StoreFileScanner implements KeyValueScanner {
    *                                    {@link KeyValueScanner#getScannerOrder()}.
    * @param canOptimizeForNonNullColumn {@code true} if we can make sure there is no null column,
    *                                    otherwise {@code false}. This is a hint for optimization.
+   * @param isFastSeekingEncoding       {@code true} if the data block encoding can seek quickly
+   *                                    from the beginning of a block (i.e. RIV1), otherwise
+   *                                    {@code false}. This is a hint for optimization.
    */
   public StoreFileScanner(StoreFileReader reader, HFileScanner hfs, boolean useMVCC,
-    boolean hasMVCC, long readPt, long scannerOrder, boolean canOptimizeForNonNullColumn) {
+    boolean hasMVCC, long readPt, long scannerOrder, boolean canOptimizeForNonNullColumn,
+    boolean isFastSeekingEncoding) {
     this.readPt = readPt;
     this.reader = reader;
     this.hfs = hfs;
@@ -93,6 +104,7 @@ public class StoreFileScanner implements KeyValueScanner {
     this.hasMVCCInfo = hasMVCC;
     this.scannerOrder = scannerOrder;
     this.canOptimizeForNonNullColumn = canOptimizeForNonNullColumn;
+    this.isFastSeekingEncoding = isFastSeekingEncoding;
     this.reader.incrementRefCount();
   }
 
@@ -226,6 +238,7 @@ public class StoreFileScanner implements KeyValueScanner {
         }
       } finally {
         realSeekDone = true;
+        previousRow = null;
       }
     } catch (FileNotFoundException e) {
       throw e;
@@ -253,6 +266,7 @@ public class StoreFileScanner implements KeyValueScanner {
         }
       } finally {
         realSeekDone = true;
+        previousRow = null;
       }
     } catch (FileNotFoundException e) {
       throw e;
@@ -486,50 +500,198 @@ public class StoreFileScanner implements KeyValueScanner {
   @Override
   public boolean seekToPreviousRow(Cell originalKey) throws IOException {
     try {
-      try {
-        boolean keepSeeking = false;
-        Cell key = originalKey;
-        do {
-          Cell seekKey = PrivateCellUtil.createFirstOnRow(key);
-          if (seekCount != null) seekCount.increment();
-          if (!hfs.seekBefore(seekKey)) {
-            this.cur = null;
-            return false;
-          }
-          Cell curCell = hfs.getCell();
-          Cell firstKeyOfPreviousRow = PrivateCellUtil.createFirstOnRow(curCell);
-
-          if (seekCount != null) seekCount.increment();
-          if (!seekAtOrAfter(hfs, firstKeyOfPreviousRow)) {
-            this.cur = null;
-            return false;
-          }
-
-          setCurrentCell(hfs.getCell());
-          this.stopSkippingKVsIfNextRow = true;
-          boolean resultOfSkipKVs;
-          try {
-            resultOfSkipKVs = skipKVsNewerThanReadpoint();
-          } finally {
-            this.stopSkippingKVsIfNextRow = false;
-          }
-          if (!resultOfSkipKVs || getComparator().compareRows(cur, firstKeyOfPreviousRow) > 0) {
-            keepSeeking = true;
-            key = firstKeyOfPreviousRow;
-            continue;
-          } else {
-            keepSeeking = false;
-          }
-        } while (keepSeeking);
-        return true;
-      } finally {
-        realSeekDone = true;
+      if (isFastSeekingEncoding) {
+        return seekToPreviousRowStateless(originalKey);
+      } else if (previousRow == null || getComparator().compareRows(previousRow, originalKey) > 0) {
+        return seekToPreviousRowWithoutHint(originalKey);
+      } else {
+        return seekToPreviousRowWithHint();
       }
     } catch (FileNotFoundException e) {
       throw e;
     } catch (IOException ioe) {
       throw new IOException("Could not seekToPreviousRow " + this + " to key " + originalKey, ioe);
+    } finally {
+      this.realSeekDone = true;
+    }
+  }
+
+  /**
+   * This variant of the {@link StoreFileScanner#seekToPreviousRow(Cell)} method requires one seek
+   * and one reseek. This method maintains state in {@link StoreFileScanner#previousRow} which only
+   * makes sense in the context of a sequential row-by-row reverse scan.
+   * {@link StoreFileScanner#previousRow} should be reset if that is not the case. The reasoning for
+   * why this method is faster than {@link StoreFileScanner#seekToPreviousRowStateless(Cell)} is
+   * that seeks are slower as they need to start from the beginning of the file, while reseeks go
+   * forward from the current position.
+   */
+  private boolean seekToPreviousRowWithHint() throws IOException {
+    do {
+      // Using our existing seek hint, set our next seek hint
+      Cell firstKeyOfPreviousRow = PrivateCellUtil.createFirstOnRow(previousRow);
+      seekBeforeAndSaveKeyToPreviousRow(firstKeyOfPreviousRow);
+
+      // Reseek back to our initial seek hint (i.e. what we think is the start of the
+      // previous row)
+      if (!reseekAtOrAfter(firstKeyOfPreviousRow)) {
+        return false;
+      }
+
+      // If after skipping newer Kvs, we're still in our seek hint row, then we're finished
+      if (isStillAtSeekTargetAfterSkippingNewerKvs(firstKeyOfPreviousRow)) {
+        return true;
+      }
+
+      // If the previousRow seek hint is missing, that means that we're at row after the first row
+      // in the storefile. Use the without-hint seek path to process the final row
+      if (previousRow == null) {
+        return seekToPreviousRowWithoutHint(firstKeyOfPreviousRow);
+      }
+
+      // Otherwise, use the previousRow seek hint to continue traversing backwards
+    } while (true);
+  }
+
+  /**
+   * This variant of the {@link StoreFileScanner#seekToPreviousRow(Cell)} method requires two seeks
+   * and one reseek. The extra expense/seek is with the intent of speeding up subsequent calls by
+   * using the {@link StoreFileScanner#seekToPreviousRowWithHint} which this method seeds the state
+   * for by setting {@link StoreFileScanner#previousRow}
+   */
+  private boolean seekToPreviousRowWithoutHint(Cell originalKey) throws IOException {
+    // Rewind to the cell before the beginning of this row
+    Cell keyAtBeginningOfRow = PrivateCellUtil.createFirstOnRow(originalKey);
+    if (!seekBefore(keyAtBeginningOfRow)) {
+      return false;
+    }
+
+    // Rewind before this row and save what we find as a seek hint
+    Cell firstKeyOfPreviousRow = PrivateCellUtil.createFirstOnRow(hfs.getCell());
+    seekBeforeAndSaveKeyToPreviousRow(firstKeyOfPreviousRow);
+
+    // Seek back to the start of the previous row
+    if (!reseekAtOrAfter(firstKeyOfPreviousRow)) {
+      return false;
+    }
+
+    // If after skipping newer Kvs, we're still in what we thought was the previous
+    // row, then we can exit
+    if (isStillAtSeekTargetAfterSkippingNewerKvs(firstKeyOfPreviousRow)) {
+      return true;
+    }
+
+    // Skipping newer kvs resulted in skipping the entire row that we thought was the
+    // previous row. If we've set a seek hint, then we can use that to go backwards
+    // further
+    if (previousRow != null) {
+      return seekToPreviousRowWithHint();
+    }
+
+    // If we've made it here, then we weren't able to set a seek hint. This can happen
+    // only if we're at the beginning of the storefile i.e. there is no row before this
+    // one
+    return false;
+  }
+
+  /**
+   * This variant of the {@link StoreFileScanner#seekToPreviousRow(Cell)} method requires two seeks.
+   * It should be used if the cost for seeking is lower i.e. when using a fast seeking data block
+   * encoding like RIV1.
+   */
+  private boolean seekToPreviousRowStateless(Cell originalKey) throws IOException {
+    Cell key = originalKey;
+    do {
+      Cell keyAtBeginningOfRow = PrivateCellUtil.createFirstOnRow(key);
+      if (!seekBefore(keyAtBeginningOfRow)) {
+        return false;
+      }
+
+      Cell firstKeyOfPreviousRow = PrivateCellUtil.createFirstOnRow(hfs.getCell());
+      if (!seekAtOrAfter(firstKeyOfPreviousRow)) {
+        return false;
+      }
+
+      if (isStillAtSeekTargetAfterSkippingNewerKvs(firstKeyOfPreviousRow)) {
+        return true;
+      }
+      key = firstKeyOfPreviousRow;
+    } while (true);
+  }
+
+  private boolean seekBefore(Cell seekKey) throws IOException {
+    if (seekCount != null) {
+      seekCount.increment();
     }
+    if (!hfs.seekBefore(seekKey)) {
+      this.cur = null;
+      return false;
+    }
+
+    return true;
+  }
+
+  /**
+   * Seeks before the seek target cell and saves the location to {@link #previousRow}. If there
+   * doesn't exist a KV in this file before the seek target cell, reposition the scanner at the
+   * beginning of the storefile (in preparation to a reseek at or after the seek key) and set the
+   * {@link #previousRow} to null. If {@link #previousRow} is ever non-null and then transitions to
+   * being null again via this method, that's because there doesn't exist a row before the seek
+   * target in the storefile (i.e. we're at the beginning of the storefile)
+   */
+  private void seekBeforeAndSaveKeyToPreviousRow(Cell seekKey) throws IOException {
+    if (seekCount != null) {
+      seekCount.increment();
+    }
+    if (!hfs.seekBefore(seekKey)) {
+      // Since the above seek failed, we need to position ourselves back at the start of the
+      // block or else our reseek might fail. seekTo() cannot return false here as at least
+      // one seekBefore will have returned true by the time we get here
+      hfs.seekTo();
+      this.previousRow = null;
+    } else {
+      this.previousRow = hfs.getCell();
+    }
+  }
+
+  private boolean seekAtOrAfter(Cell seekKey) throws IOException {
+    if (seekCount != null) {
+      seekCount.increment();
+    }
+    if (!seekAtOrAfter(hfs, seekKey)) {
+      this.cur = null;
+      return false;
+    }
+
+    return true;
+  }
+
+  private boolean reseekAtOrAfter(Cell seekKey) throws IOException {
+    if (seekCount != null) {
+      seekCount.increment();
+    }
+    if (!reseekAtOrAfter(hfs, seekKey)) {
+      this.cur = null;
+      return false;
+    }
+
+    return true;
+  }
+
+  private boolean isStillAtSeekTargetAfterSkippingNewerKvs(Cell seekKey) throws IOException {
+    setCurrentCell(hfs.getCell());
+    return skipKvsNewerThanReadpointReversed() && getComparator().compareRows(cur, seekKey) <= 0;
+  }
+
+  private boolean skipKvsNewerThanReadpointReversed() throws IOException {
+    this.stopSkippingKVsIfNextRow = true;
+    boolean resultOfSkipKVs;
+    try {
+      resultOfSkipKVs = skipKVsNewerThanReadpoint();
+    } finally {
+      this.stopSkippingKVsIfNextRow = false;
+    }
+
+    return resultOfSkipKVs;
   }
 
   @Override
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStoreFile.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStoreFile.java
index a0c23af5ef0..aa7fb53566d 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStoreFile.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStoreFile.java
@@ -280,7 +280,7 @@ public class TestHStoreFile {
     StoreFileReader r = file.getReader();
     assertNotNull(r);
     StoreFileScanner scanner =
-      new StoreFileScanner(r, mock(HFileScanner.class), false, false, 0, 0, false);
+      new StoreFileScanner(r, mock(HFileScanner.class), false, false, 0, 0, false, false);
 
     // Verify after instantiating scanner refCount is increased
     assertTrue("Verify file is being referenced", file.isReferencedInReads());
@@ -297,7 +297,7 @@ public class TestHStoreFile {
     ColumnFamilyDescriptor cfd = ColumnFamilyDescriptorBuilder.of(cf);
     when(store.getColumnFamilyDescriptor()).thenReturn(cfd);
     try (StoreFileScanner scanner =
-      new StoreFileScanner(reader, mock(HFileScanner.class), false, false, 0, 0, true)) {
+      new StoreFileScanner(reader, mock(HFileScanner.class), false, false, 0, 0, true, false)) {
       Scan scan = new Scan();
       scan.setColumnFamilyTimeRange(cf, 0, 1);
       assertFalse(scanner.shouldUseScanner(scan, store, 0));