You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by bb...@apache.org on 2023/11/09 14:26:57 UTC
(hbase) branch branch-3 updated: HBASE-28043 Reduce seeks from beginning of block in StoreFileScanner.seekToPreviousRow (#5373)
This is an automated email from the ASF dual-hosted git repository.
bbeaudreault pushed a commit to branch branch-3
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-3 by this push:
new 361bd5175f5 HBASE-28043 Reduce seeks from beginning of block in StoreFileScanner.seekToPreviousRow (#5373)
361bd5175f5 is described below
commit 361bd5175f53017c557ef47b6d6ee4c068753983
Author: jbewing <jb...@live.com>
AuthorDate: Thu Nov 9 09:14:02 2023 -0500
HBASE-28043 Reduce seeks from beginning of block in StoreFileScanner.seekToPreviousRow (#5373)
Signed-off-by: Bryan Beaudreault <bb...@apache.org>
Signed-off-by: Wellington Chevreuil <wc...@apache.org>
---
.../apache/hadoop/hbase/PerformanceEvaluation.java | 45 ++++
.../hadoop/hbase/regionserver/StoreFileReader.java | 4 +-
.../hbase/regionserver/StoreFileScanner.java | 240 +++++++++++++++++----
.../hadoop/hbase/regionserver/TestHStoreFile.java | 4 +-
4 files changed, 251 insertions(+), 42 deletions(-)
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java
index 8fd2d5f7fb2..e0040c1f178 100644
--- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java
@@ -185,6 +185,8 @@ public class PerformanceEvaluation extends Configured implements Tool {
addCommandDescriptor(MetaWriteTest.class, "metaWrite",
"Populate meta table;used with 1 thread; to be cleaned up by cleanMeta");
addCommandDescriptor(ScanTest.class, "scan", "Run scan test (read every row)");
+ addCommandDescriptor(ReverseScanTest.class, "reverseScan",
+ "Run reverse scan test (read every row)");
addCommandDescriptor(FilteredScanTest.class, "filterScan",
"Run scan test using a filter to find a specific row based on it's value "
+ "(make sure to use --rows=20)");
@@ -2104,6 +2106,49 @@ public class PerformanceEvaluation extends Configured implements Tool {
}
}
+ static class ReverseScanTest extends TableTest {
+ private ResultScanner testScanner;
+
+ ReverseScanTest(Connection con, TestOptions options, Status status) {
+ super(con, options, status);
+ }
+
+ @Override
+ void testTakedown() throws IOException {
+ if (this.testScanner != null) {
+ this.testScanner.close();
+ }
+ super.testTakedown();
+ }
+
+ @Override
+ boolean testRow(final int i, final long startTime) throws IOException {
+ if (this.testScanner == null) {
+ Scan scan = new Scan().setCaching(opts.caching).setCacheBlocks(opts.cacheBlocks)
+ .setAsyncPrefetch(opts.asyncPrefetch).setReadType(opts.scanReadType)
+ .setScanMetricsEnabled(true).setReversed(true);
+ for (int family = 0; family < opts.families; family++) {
+ byte[] familyName = Bytes.toBytes(FAMILY_NAME_BASE + family);
+ if (opts.addColumns) {
+ for (int column = 0; column < opts.columns; column++) {
+ byte[] qualifier = column == 0 ? COLUMN_ZERO : Bytes.toBytes("" + column);
+ scan.addColumn(familyName, qualifier);
+ }
+ } else {
+ scan.addFamily(familyName);
+ }
+ }
+ if (opts.filterAll) {
+ scan.setFilter(new FilterAllFilter());
+ }
+ this.testScanner = table.getScanner(scan);
+ }
+ Result r = testScanner.next();
+ updateValueSize(r);
+ return true;
+ }
+ }
+
/**
* Base class for operations that are CAS-like; that read a value and then set it based off what
* they read. In this category is increment, append, checkAndPut, etc.
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileReader.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileReader.java
index 72e93c3f75a..09c379227bd 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileReader.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileReader.java
@@ -37,6 +37,7 @@ import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.PrivateCellUtil;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.TimeRange;
+import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.io.hfile.BlockType;
import org.apache.hadoop.hbase.io.hfile.BloomFilterMetrics;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
@@ -146,7 +147,8 @@ public class StoreFileReader {
public StoreFileScanner getStoreFileScanner(boolean cacheBlocks, boolean pread,
boolean isCompaction, long readPt, long scannerOrder, boolean canOptimizeForNonNullColumn) {
return new StoreFileScanner(this, getScanner(cacheBlocks, pread, isCompaction), !isCompaction,
- reader.hasMVCCInfo(), readPt, scannerOrder, canOptimizeForNonNullColumn);
+ reader.hasMVCCInfo(), readPt, scannerOrder, canOptimizeForNonNullColumn,
+ reader.getDataBlockEncoding() == DataBlockEncoding.ROW_INDEX_V1);
}
/**
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java
index 5e666659c02..fd941de4df8 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java
@@ -61,6 +61,13 @@ public class StoreFileScanner implements KeyValueScanner {
// A flag represents whether could stop skipping KeyValues for MVCC
// if have encountered the next row. Only used for reversed scan
private boolean stopSkippingKVsIfNextRow = false;
+ // A Cell that represents the row before the most previously seeked to row in seekToPreviousRow
+ private Cell previousRow = null;
+ // Whether the underlying HFile is using a data block encoding that has lower cost for seeking to
+ // a row from the beginning of a block (i.e. RIV1). If the data block encoding has a high cost for
+ // seeks, then we can use a modified reverse scanning algorithm to reduce seeks from the beginning
+ // of the block
+ private final boolean isFastSeekingEncoding;
private static LongAdder seekCount;
@@ -83,9 +90,13 @@ public class StoreFileScanner implements KeyValueScanner {
* {@link KeyValueScanner#getScannerOrder()}.
* @param canOptimizeForNonNullColumn {@code true} if we can make sure there is no null column,
* otherwise {@code false}. This is a hint for optimization.
+ * @param isFastSeekingEncoding {@code true} if the data block encoding can seek quickly
+ * from the beginning of a block (i.e. RIV1), otherwise
+ * {@code false}. This is a hint for optimization.
*/
public StoreFileScanner(StoreFileReader reader, HFileScanner hfs, boolean useMVCC,
- boolean hasMVCC, long readPt, long scannerOrder, boolean canOptimizeForNonNullColumn) {
+ boolean hasMVCC, long readPt, long scannerOrder, boolean canOptimizeForNonNullColumn,
+ boolean isFastSeekingEncoding) {
this.readPt = readPt;
this.reader = reader;
this.hfs = hfs;
@@ -93,6 +104,7 @@ public class StoreFileScanner implements KeyValueScanner {
this.hasMVCCInfo = hasMVCC;
this.scannerOrder = scannerOrder;
this.canOptimizeForNonNullColumn = canOptimizeForNonNullColumn;
+ this.isFastSeekingEncoding = isFastSeekingEncoding;
this.reader.incrementRefCount();
}
@@ -226,6 +238,7 @@ public class StoreFileScanner implements KeyValueScanner {
}
} finally {
realSeekDone = true;
+ previousRow = null;
}
} catch (FileNotFoundException e) {
throw e;
@@ -253,6 +266,7 @@ public class StoreFileScanner implements KeyValueScanner {
}
} finally {
realSeekDone = true;
+ previousRow = null;
}
} catch (FileNotFoundException e) {
throw e;
@@ -486,50 +500,198 @@ public class StoreFileScanner implements KeyValueScanner {
@Override
public boolean seekToPreviousRow(Cell originalKey) throws IOException {
try {
- try {
- boolean keepSeeking = false;
- Cell key = originalKey;
- do {
- Cell seekKey = PrivateCellUtil.createFirstOnRow(key);
- if (seekCount != null) seekCount.increment();
- if (!hfs.seekBefore(seekKey)) {
- this.cur = null;
- return false;
- }
- Cell curCell = hfs.getCell();
- Cell firstKeyOfPreviousRow = PrivateCellUtil.createFirstOnRow(curCell);
-
- if (seekCount != null) seekCount.increment();
- if (!seekAtOrAfter(hfs, firstKeyOfPreviousRow)) {
- this.cur = null;
- return false;
- }
-
- setCurrentCell(hfs.getCell());
- this.stopSkippingKVsIfNextRow = true;
- boolean resultOfSkipKVs;
- try {
- resultOfSkipKVs = skipKVsNewerThanReadpoint();
- } finally {
- this.stopSkippingKVsIfNextRow = false;
- }
- if (!resultOfSkipKVs || getComparator().compareRows(cur, firstKeyOfPreviousRow) > 0) {
- keepSeeking = true;
- key = firstKeyOfPreviousRow;
- continue;
- } else {
- keepSeeking = false;
- }
- } while (keepSeeking);
- return true;
- } finally {
- realSeekDone = true;
+ if (isFastSeekingEncoding) {
+ return seekToPreviousRowStateless(originalKey);
+ } else if (previousRow == null || getComparator().compareRows(previousRow, originalKey) > 0) {
+ return seekToPreviousRowWithoutHint(originalKey);
+ } else {
+ return seekToPreviousRowWithHint();
}
} catch (FileNotFoundException e) {
throw e;
} catch (IOException ioe) {
throw new IOException("Could not seekToPreviousRow " + this + " to key " + originalKey, ioe);
+ } finally {
+ this.realSeekDone = true;
+ }
+ }
+
+ /**
+ * This variant of the {@link StoreFileScanner#seekToPreviousRow(Cell)} method requires one seek
+ * and one reseek. This method maintains state in {@link StoreFileScanner#previousRow} which only
+ * makes sense in the context of a sequential row-by-row reverse scan.
+ * {@link StoreFileScanner#previousRow} should be reset if that is not the case. The reasoning for
+ * why this method is faster than {@link StoreFileScanner#seekToPreviousRowStateless(Cell)} is
+ * that seeks are slower as they need to start from the beginning of the file, while reseeks go
+ * forward from the current position.
+ */
+ private boolean seekToPreviousRowWithHint() throws IOException {
+ do {
+ // Using our existing seek hint, set our next seek hint
+ Cell firstKeyOfPreviousRow = PrivateCellUtil.createFirstOnRow(previousRow);
+ seekBeforeAndSaveKeyToPreviousRow(firstKeyOfPreviousRow);
+
+ // Reseek back to our initial seek hint (i.e. what we think is the start of the
+ // previous row)
+ if (!reseekAtOrAfter(firstKeyOfPreviousRow)) {
+ return false;
+ }
+
+ // If after skipping newer Kvs, we're still in our seek hint row, then we're finished
+ if (isStillAtSeekTargetAfterSkippingNewerKvs(firstKeyOfPreviousRow)) {
+ return true;
+ }
+
+ // If the previousRow seek hint is missing, that means that we're at row after the first row
+ // in the storefile. Use the without-hint seek path to process the final row
+ if (previousRow == null) {
+ return seekToPreviousRowWithoutHint(firstKeyOfPreviousRow);
+ }
+
+ // Otherwise, use the previousRow seek hint to continue traversing backwards
+ } while (true);
+ }
+
+ /**
+ * This variant of the {@link StoreFileScanner#seekToPreviousRow(Cell)} method requires two seeks
+ * and one reseek. The extra expense/seek is with the intent of speeding up subsequent calls by
+ * using the {@link StoreFileScanner#seekToPreviousRowWithHint} which this method seeds the state
+ * for by setting {@link StoreFileScanner#previousRow}
+ */
+ private boolean seekToPreviousRowWithoutHint(Cell originalKey) throws IOException {
+ // Rewind to the cell before the beginning of this row
+ Cell keyAtBeginningOfRow = PrivateCellUtil.createFirstOnRow(originalKey);
+ if (!seekBefore(keyAtBeginningOfRow)) {
+ return false;
+ }
+
+ // Rewind before this row and save what we find as a seek hint
+ Cell firstKeyOfPreviousRow = PrivateCellUtil.createFirstOnRow(hfs.getCell());
+ seekBeforeAndSaveKeyToPreviousRow(firstKeyOfPreviousRow);
+
+ // Seek back to the start of the previous row
+ if (!reseekAtOrAfter(firstKeyOfPreviousRow)) {
+ return false;
+ }
+
+ // If after skipping newer Kvs, we're still in what we thought was the previous
+ // row, then we can exit
+ if (isStillAtSeekTargetAfterSkippingNewerKvs(firstKeyOfPreviousRow)) {
+ return true;
+ }
+
+ // Skipping newer kvs resulted in skipping the entire row that we thought was the
+ // previous row. If we've set a seek hint, then we can use that to go backwards
+ // further
+ if (previousRow != null) {
+ return seekToPreviousRowWithHint();
+ }
+
+ // If we've made it here, then we weren't able to set a seek hint. This can happen
+ // only if we're at the beginning of the storefile i.e. there is no row before this
+ // one
+ return false;
+ }
+
+ /**
+ * This variant of the {@link StoreFileScanner#seekToPreviousRow(Cell)} method requires two seeks.
+ * It should be used if the cost for seeking is lower i.e. when using a fast seeking data block
+ * encoding like RIV1.
+ */
+ private boolean seekToPreviousRowStateless(Cell originalKey) throws IOException {
+ Cell key = originalKey;
+ do {
+ Cell keyAtBeginningOfRow = PrivateCellUtil.createFirstOnRow(key);
+ if (!seekBefore(keyAtBeginningOfRow)) {
+ return false;
+ }
+
+ Cell firstKeyOfPreviousRow = PrivateCellUtil.createFirstOnRow(hfs.getCell());
+ if (!seekAtOrAfter(firstKeyOfPreviousRow)) {
+ return false;
+ }
+
+ if (isStillAtSeekTargetAfterSkippingNewerKvs(firstKeyOfPreviousRow)) {
+ return true;
+ }
+ key = firstKeyOfPreviousRow;
+ } while (true);
+ }
+
+ private boolean seekBefore(Cell seekKey) throws IOException {
+ if (seekCount != null) {
+ seekCount.increment();
}
+ if (!hfs.seekBefore(seekKey)) {
+ this.cur = null;
+ return false;
+ }
+
+ return true;
+ }
+
+ /**
+ * Seeks before the seek target cell and saves the location to {@link #previousRow}. If there
+ * doesn't exist a KV in this file before the seek target cell, reposition the scanner at the
+ * beginning of the storefile (in preparation to a reseek at or after the seek key) and set the
+ * {@link #previousRow} to null. If {@link #previousRow} is ever non-null and then transitions to
+ * being null again via this method, that's because there doesn't exist a row before the seek
+ * target in the storefile (i.e. we're at the beginning of the storefile)
+ */
+ private void seekBeforeAndSaveKeyToPreviousRow(Cell seekKey) throws IOException {
+ if (seekCount != null) {
+ seekCount.increment();
+ }
+ if (!hfs.seekBefore(seekKey)) {
+ // Since the above seek failed, we need to position ourselves back at the start of the
+ // block or else our reseek might fail. seekTo() cannot return false here as at least
+ // one seekBefore will have returned true by the time we get here
+ hfs.seekTo();
+ this.previousRow = null;
+ } else {
+ this.previousRow = hfs.getCell();
+ }
+ }
+
+ private boolean seekAtOrAfter(Cell seekKey) throws IOException {
+ if (seekCount != null) {
+ seekCount.increment();
+ }
+ if (!seekAtOrAfter(hfs, seekKey)) {
+ this.cur = null;
+ return false;
+ }
+
+ return true;
+ }
+
+ private boolean reseekAtOrAfter(Cell seekKey) throws IOException {
+ if (seekCount != null) {
+ seekCount.increment();
+ }
+ if (!reseekAtOrAfter(hfs, seekKey)) {
+ this.cur = null;
+ return false;
+ }
+
+ return true;
+ }
+
+ private boolean isStillAtSeekTargetAfterSkippingNewerKvs(Cell seekKey) throws IOException {
+ setCurrentCell(hfs.getCell());
+ return skipKvsNewerThanReadpointReversed() && getComparator().compareRows(cur, seekKey) <= 0;
+ }
+
+ private boolean skipKvsNewerThanReadpointReversed() throws IOException {
+ this.stopSkippingKVsIfNextRow = true;
+ boolean resultOfSkipKVs;
+ try {
+ resultOfSkipKVs = skipKVsNewerThanReadpoint();
+ } finally {
+ this.stopSkippingKVsIfNextRow = false;
+ }
+
+ return resultOfSkipKVs;
}
@Override
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStoreFile.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStoreFile.java
index a0c23af5ef0..aa7fb53566d 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStoreFile.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStoreFile.java
@@ -280,7 +280,7 @@ public class TestHStoreFile {
StoreFileReader r = file.getReader();
assertNotNull(r);
StoreFileScanner scanner =
- new StoreFileScanner(r, mock(HFileScanner.class), false, false, 0, 0, false);
+ new StoreFileScanner(r, mock(HFileScanner.class), false, false, 0, 0, false, false);
// Verify after instantiating scanner refCount is increased
assertTrue("Verify file is being referenced", file.isReferencedInReads());
@@ -297,7 +297,7 @@ public class TestHStoreFile {
ColumnFamilyDescriptor cfd = ColumnFamilyDescriptorBuilder.of(cf);
when(store.getColumnFamilyDescriptor()).thenReturn(cfd);
try (StoreFileScanner scanner =
- new StoreFileScanner(reader, mock(HFileScanner.class), false, false, 0, 0, true)) {
+ new StoreFileScanner(reader, mock(HFileScanner.class), false, false, 0, 0, true, false)) {
Scan scan = new Scan();
scan.setColumnFamilyTimeRange(cf, 0, 1);
assertFalse(scanner.shouldUseScanner(scan, store, 0));