You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2021/08/31 18:21:22 UTC
[hbase] 01/01: HBASE-23584 cache filestatus in storefileinfo
This is an automated email from the ASF dual-hosted git repository.
stack pushed a commit to branch HBASE-23584.master
in repository https://gitbox.apache.org/repos/asf/hbase.git
commit b9a425b73b7a5fb654ee14c2425cf1b8b49640f1
Author: yuhuiyang <yu...@qianxin.com>
AuthorDate: Tue Aug 31 10:47:16 2021 -0700
HBASE-23584 cache filestatus in storefileinfo
---
.../apache/hadoop/hbase/regionserver/HRegion.java | 3 +-
.../hadoop/hbase/regionserver/StoreFileInfo.java | 246 +++++++++++----------
2 files changed, 127 insertions(+), 122 deletions(-)
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
index 22e3901..4837528 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
@@ -1291,8 +1291,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
// Only construct StoreFileInfo object if its not a hfile, save obj
// creation
StoreFileInfo storeFileInfo = new StoreFileInfo(conf, fs, status);
- hdfsBlocksDistribution.add(storeFileInfo
- .computeHDFSBlocksDistribution(fs));
+ hdfsBlocksDistribution.add(storeFileInfo.computeHDFSBlocksDistribution());
} else if (StoreFileInfo.isHFile(p)) {
// If its a HFile, then lets just add to the block distribution
// lets not create more objects here, not even another HDFSBlocksDistribution
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileInfo.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileInfo.java
index 5eaab23..c2f5700 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileInfo.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileInfo.java
@@ -1,5 +1,4 @@
-/**
- *
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -18,13 +17,11 @@
*/
package org.apache.hadoop.hbase.regionserver;
-
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@@ -46,7 +43,8 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
- * Describe a StoreFile (hfile, reference, link)
+ * StoreFile info.
+ * The info could be for a plain storefile/hfile or it could be a reference or link to a storefile.
*/
@InterfaceAudience.Private
public class StoreFileInfo {
@@ -112,6 +110,16 @@ public class StoreFileInfo {
final AtomicInteger refCount = new AtomicInteger(0);
/**
+ * Cached fileStatus.
+ * Used selectively. Cache the FileStatus if this StoreFileInfo is for a plain StoreFile.
+ * Save on having to go to the filesystem every time (costly). We cannot cache FileStatus if this
+ * StoreFileInfo is for a link or for a reference that might in turn be to a link. The file behind
+ * a link can move during the lifetime of this StoreFileInfo invalidating what we might have
+ * cached here; do a lookup of FileStatus every time when a link to be safe.
+ */
+ private FileStatus cachedFileStatus = null;
+
+ /**
* Create a Store File Info
* @param conf the {@link Configuration} to use
* @param fs The current file system to use.
@@ -135,37 +143,32 @@ public class StoreFileInfo {
this.primaryReplica = primaryReplica;
this.noReadahead = this.conf.getBoolean(STORE_FILE_READER_NO_READAHEAD,
DEFAULT_STORE_FILE_READER_NO_READAHEAD);
- Path p = initialPath;
- if (HFileLink.isHFileLink(p)) {
- // HFileLink
+ if (HFileLink.isHFileLink(initialPath)) {
this.reference = null;
- this.link = HFileLink.buildFromHFileLinkPattern(conf, p);
- LOG.trace("{} is a link", p);
- } else if (isReference(p)) {
- this.reference = Reference.read(fs, p);
- Path referencePath = getReferredToFile(p);
- if (HFileLink.isHFileLink(referencePath)) {
- // HFileLink Reference
- this.link = HFileLink.buildFromHFileLinkPattern(conf, referencePath);
- } else {
- // Reference
- this.link = null;
- }
- LOG.trace("{} is a {} reference to {}", p, reference.getFileRegion(), referencePath);
- } else if (isHFile(p) || isMobFile(p) || isMobRefFile(p)) {
- // HFile
- if (fileStatus != null) {
- this.createdTimestamp = fileStatus.getModificationTime();
- this.size = fileStatus.getLen();
- } else {
- FileStatus fStatus = fs.getFileStatus(initialPath);
- this.createdTimestamp = fStatus.getModificationTime();
- this.size = fStatus.getLen();
- }
+ this.link = HFileLink.buildFromHFileLinkPattern(conf, initialPath);
+ LOG.trace("{} is a link", initialPath);
+ } else if (isReference(initialPath)) {
+ this.reference = Reference.read(fs, initialPath);
+ Path referencedPath = getReferredToFile(initialPath);
+ // Check if the referenced file is a link.
+ this.link = HFileLink.isHFileLink(referencedPath)?
+ HFileLink.buildFromHFileLinkPattern(conf, referencedPath): null;
+ LOG.trace("{} is a {} reference to {} (link={})",
+ initialPath, reference.getFileRegion(), referencedPath, link == null);
+ } else if (isHFile(initialPath) || isMobFile(initialPath) || isMobRefFile(initialPath)) {
+ // Safe to cache passed filestatus when NOT a link or reference; i.e. when it filestatus on
+ // a plain storefile/hfile.
+ assert fileStatus != null && fileStatus.getPath().equals(initialPath);
+ this.cachedFileStatus = fileStatus != null?
+ fileStatus: this.fs.getFileStatus(this.initialPath);
+ this.createdTimestamp = this.cachedFileStatus.getModificationTime();
+ this.size = this.cachedFileStatus.getLen();
this.reference = null;
this.link = null;
+ LOG.trace("{}", initialPath);
} else {
- throw new IOException("path=" + p + " doesn't look like a valid StoreFile");
+ throw new IOException("Path=" + initialPath + " doesn't look like a valid StoreFile; " +
+ "it is not a link, a reference or an hfile.");
}
}
@@ -213,6 +216,8 @@ public class StoreFileInfo {
*/
public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus,
final Reference reference, final HFileLink link) {
+ assert fs != null;
+ assert conf != null;
this.fs = fs;
this.conf = conf;
this.primaryReplica = false;
@@ -234,7 +239,6 @@ public class StoreFileInfo {
/**
* Sets the region coprocessor env.
- * @param coprocessorHost
*/
public void setRegionCoprocessorHost(RegionCoprocessorHost coprocessorHost) {
this.coprocessorHost = coprocessorHost;
@@ -270,13 +274,9 @@ public class StoreFileInfo {
}
StoreFileReader createReader(ReaderContext context, CacheConfig cacheConf) throws IOException {
- StoreFileReader reader = null;
- if (this.reference != null) {
- reader = new HalfStoreFileReader(context, hfileInfo, cacheConf, reference, refCount, conf);
- } else {
- reader = new StoreFileReader(context, hfileInfo, cacheConf, refCount, conf);
- }
- return reader;
+ return this.reference != null?
+ new HalfStoreFileReader(context, hfileInfo, cacheConf, reference, refCount, conf):
+ new StoreFileReader(context, hfileInfo, cacheConf, refCount, conf);
}
ReaderContext createReaderContext(boolean doDropBehind, long readahead, ReaderType type)
@@ -293,22 +293,21 @@ public class StoreFileInfo {
try {
in = new FSDataInputStreamWrapper(fs, referencePath, doDropBehind, readahead);
} catch (FileNotFoundException fnfe) {
- // Intercept the exception so can insert more info about the Reference; otherwise
- // exception just complains about some random file -- operator doesn't realize it
- // other end of a Reference
- FileNotFoundException newFnfe = new FileNotFoundException(toString());
- newFnfe.initCause(fnfe);
- throw newFnfe;
+ throw decorateFileNotFoundException(fnfe);
}
status = fs.getFileStatus(referencePath);
} else {
in = new FSDataInputStreamWrapper(fs, this.getPath(), doDropBehind, readahead);
- status = fs.getFileStatus(initialPath);
+ if (this.cachedFileStatus == null) {
+ // Safe to cache filestatus for a plain storefile/hfile.
+ this.cachedFileStatus = this.fs.getFileStatus(this.initialPath);
+ }
+ status = this.cachedFileStatus;
}
long length = status.getLen();
- ReaderContextBuilder contextBuilder =
- new ReaderContextBuilder().withInputStreamWrapper(in).withFileSize(length)
- .withPrimaryReplicaReader(this.primaryReplica).withReaderType(type).withFileSystem(fs);
+ ReaderContextBuilder contextBuilder = new ReaderContextBuilder().withInputStreamWrapper(in).
+ withFileSize(length).withPrimaryReplicaReader(this.primaryReplica).withReaderType(type).
+ withFileSystem(fs);
if (this.reference != null) {
contextBuilder.withFilePath(this.getPath());
} else {
@@ -320,76 +319,76 @@ public class StoreFileInfo {
/**
* Compute the HDFS Block Distribution for this StoreFile
*/
- public HDFSBlocksDistribution computeHDFSBlocksDistribution(final FileSystem fs)
- throws IOException {
- // guard against the case where we get the FileStatus from link, but by the time we
- // call compute the file is moved again
+ public HDFSBlocksDistribution computeHDFSBlocksDistribution() throws IOException {
if (this.link != null) {
+ // Guard against case where file behind link has moved when we go to calculate distribution;
+ // e.g. from data dir to archive dir. Retry up to number of locations under the link.
FileNotFoundException exToThrow = null;
for (int i = 0; i < this.link.getLocations().length; i++) {
try {
- return computeHDFSBlocksDistributionInternal(fs);
- } catch (FileNotFoundException ex) {
- // try the other location
- exToThrow = ex;
+ return computeHDFSBlocksDistributionInternal();
+ } catch (FileNotFoundException fnfe) {
+ // Try the other locations -- file behind link may have moved.
+ exToThrow = fnfe;
}
}
- throw exToThrow;
+ throw decorateFileNotFoundException(exToThrow);
} else {
- return computeHDFSBlocksDistributionInternal(fs);
+ return computeHDFSBlocksDistributionInternal();
}
}
- private HDFSBlocksDistribution computeHDFSBlocksDistributionInternal(final FileSystem fs)
- throws IOException {
- FileStatus status = getReferencedFileStatus(fs);
+ private HDFSBlocksDistribution computeHDFSBlocksDistributionInternal() throws IOException {
+ FileStatus status = getFileStatus();
if (this.reference != null) {
- return computeRefFileHDFSBlockDistribution(fs, reference, status);
+ return computeRefFileHDFSBlockDistribution(status);
} else {
- return FSUtils.computeHDFSBlocksDistribution(fs, status, 0, status.getLen());
+ return FSUtils.computeHDFSBlocksDistribution(this.fs, status, 0, status.getLen());
}
}
/**
- * Get the {@link FileStatus} of the file referenced by this StoreFileInfo
- * @param fs The current file system to use.
+ * Get the {@link FileStatus} of the file referenced by this StoreFileInfo on the passed
+ * <code>fs</code>.
+ * This {@link StoreFileInfo} could be for a link or a reference or a plain hfile/storefile; get
+ * the filestatus for whatever the link or reference points to (or just the plain hfile/storefile
+ * if not a link/reference). This method is called by snapshot. Filesystem may not be same as
+ * that of this hosting {@link StoreFileInfo}. If a link, when you go to use the passed
+ * FileStatus, the file may have moved; e.g. from data to archive... be aware.
+ * @param fs The filesystem to use.
* @return The {@link FileStatus} of the file referenced by this StoreFileInfo
*/
public FileStatus getReferencedFileStatus(final FileSystem fs) throws IOException {
+ if (this.fs.equals(fs) && this.cachedFileStatus != null) {
+ return this.cachedFileStatus;
+ }
FileStatus status;
if (this.reference != null) {
if (this.link != null) {
- FileNotFoundException exToThrow = null;
- for (int i = 0; i < this.link.getLocations().length; i++) {
- // HFileLink Reference
- try {
- return link.getFileStatus(fs);
- } catch (FileNotFoundException ex) {
- // try the other location
- exToThrow = ex;
- }
- }
- throw exToThrow;
+ status = this.link.getFileStatus(this.fs);
} else {
- // HFile Reference
- Path referencePath = getReferredToFile(this.getPath());
- status = fs.getFileStatus(referencePath);
+ try {
+ Path referencePath = getReferredToFile(this.getPath());
+ status = this.fs.getFileStatus(referencePath);
+ } catch (FileNotFoundException ex) {
+ throw decorateFileNotFoundException(ex);
+ }
}
} else {
- if (this.link != null) {
- FileNotFoundException exToThrow = null;
- for (int i = 0; i < this.link.getLocations().length; i++) {
- // HFileLink
- try {
- return link.getFileStatus(fs);
- } catch (FileNotFoundException ex) {
- // try the other location
- exToThrow = ex;
+ try {
+ if (this.link != null) {
+ status = this.link.getFileStatus(this.fs);
+ } else {
+ status = this.fs.getFileStatus(this.initialPath);
+ if (this.fs.equals(fs)) {
+ // If the passed in filestatus is same as that of this StoreFileStatus, take this
+ // opportunity to cache the filestatus. It is safe to cache passed filestatus when NOT a
+ // link or reference; i.e. when it filestatus on a plain storefile/hfile.
+ this.cachedFileStatus = status;
}
}
- throw exToThrow;
- } else {
- status = fs.getFileStatus(initialPath);
+ } catch (FileNotFoundException fnfe) {
+ throw decorateFileNotFoundException(fnfe);
}
}
return status;
@@ -400,9 +399,22 @@ public class StoreFileInfo {
return initialPath;
}
- /** @return The {@link FileStatus} of the file */
+ /**
+ * @return A new FNFE with <param>fnfe</param> as cause but including info if reference or link.
+ */
+ private FileNotFoundException decorateFileNotFoundException(FileNotFoundException fnfe) {
+ FileNotFoundException newFnfe = new FileNotFoundException(toString());
+ newFnfe.initCause(fnfe);
+ return newFnfe;
+ }
+
+ /**
+ * @return {@link FileStatus} for the linked or referenced file or if not a link/reference, then
+ * the FileStatus for the plain storefile (Be aware, if a link, the file may have moved
+ * by the time you go to use the FileStatus).
+ */
public FileStatus getFileStatus() throws IOException {
- return getReferencedFileStatus(fs);
+ return getReferencedFileStatus(this.fs);
}
/** @return Get the modification time of the file. */
@@ -412,8 +424,8 @@ public class StoreFileInfo {
@Override
public String toString() {
- return this.getPath()
- + (isReference() ? "->" + getReferredToFile(this.getPath()) + "-" + reference : "");
+ return isLink()? this.link.toString(): this.getPath() +
+ (isReference()? "->" + getReferredToFile(this.getPath()) + "-" + reference: "");
}
/**
@@ -523,10 +535,7 @@ public class StoreFileInfo {
* @return <tt>true</tt> if the file could be a valid store file, <tt>false</tt> otherwise
*/
public static boolean validateStoreFileName(final String fileName) {
- if (HFileLink.isHFileLink(fileName) || isReference(fileName)) {
- return true;
- }
- return !fileName.contains("-");
+ return (HFileLink.isHFileLink(fileName) || isReference(fileName)) || !fileName.contains("-");
}
/**
@@ -553,29 +562,27 @@ public class StoreFileInfo {
}
/**
- * helper function to compute HDFS blocks distribution of a given reference file.For reference
- * file, we don't compute the exact value. We use some estimate instead given it might be good
- * enough. we assume bottom part takes the first half of reference file, top part takes the second
+ * Helper function to compute HDFS blocks distribution of a given reference file. For reference
+ * file, we don't compute the exact value. We use an estimate instead presuming it good enough.
+ * We assume bottom part takes the first half of a reference file, top part takes the second
* half of the reference file. This is just estimate, given midkey ofregion != midkey of HFile,
* also the number and size of keys vary. If this estimate isn't good enough, we can improve it
* later.
- * @param fs The FileSystem
- * @param reference The reference
- * @param status The reference FileStatus
+ * @param status The reference FileStatus
* @return HDFS blocks distribution
*/
- private static HDFSBlocksDistribution computeRefFileHDFSBlockDistribution(final FileSystem fs,
- final Reference reference, final FileStatus status) throws IOException {
+ private HDFSBlocksDistribution computeRefFileHDFSBlockDistribution(final FileStatus status)
+ throws IOException {
if (status == null) {
return null;
}
- long start = 0;
- long length = 0;
+ long start;
+ long length;
- if (Reference.isTopFileRegion(reference.getFileRegion())) {
+ if (Reference.isTopFileRegion(this.reference.getFileRegion())) {
start = status.getLen() / 2;
- length = status.getLen() - status.getLen() / 2;
+ length = status.getLen() - (status.getLen() / 2);
} else {
start = 0;
length = status.getLen() / 2;
@@ -596,15 +603,15 @@ public class StoreFileInfo {
return false;
}
- StoreFileInfo o = (StoreFileInfo) that;
+ StoreFileInfo o = (StoreFileInfo)that;
if (initialPath != null && o.initialPath == null) {
return false;
}
if (initialPath == null && o.initialPath != null) {
return false;
}
- if (initialPath != o.initialPath && initialPath != null
- && !initialPath.equals(o.initialPath)) {
+ if (initialPath != o.initialPath && initialPath != null &&
+ !initialPath.equals(o.initialPath)) {
return false;
}
if (reference != null && o.reference == null) {
@@ -613,8 +620,8 @@ public class StoreFileInfo {
if (reference == null && o.reference != null) {
return false;
}
- if (reference != o.reference && reference != null
- && !reference.equals(o.reference)) {
+ if (reference != o.reference && reference != null &&
+ !reference.equals(o.reference)) {
return false;
}
@@ -627,7 +634,6 @@ public class StoreFileInfo {
if (link != o.link && link != null && !link.equals(o.link)) {
return false;
}
-
return true;
}
@@ -672,7 +678,7 @@ public class StoreFileInfo {
}
void initHDFSBlocksDistribution() throws IOException {
- hdfsBlocksDistribution = computeHDFSBlocksDistribution(fs);
+ hdfsBlocksDistribution = computeHDFSBlocksDistribution();
}
StoreFileReader preStoreFileReaderOpen(ReaderContext context, CacheConfig cacheConf)