You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2021/08/31 18:21:22 UTC

[hbase] 01/01: HBASE-23584 cache filestatus in storefileinfo

This is an automated email from the ASF dual-hosted git repository.

stack pushed a commit to branch HBASE-23584.master
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit b9a425b73b7a5fb654ee14c2425cf1b8b49640f1
Author: yuhuiyang <yu...@qianxin.com>
AuthorDate: Tue Aug 31 10:47:16 2021 -0700

    HBASE-23584 cache filestatus in storefileinfo
---
 .../apache/hadoop/hbase/regionserver/HRegion.java  |   3 +-
 .../hadoop/hbase/regionserver/StoreFileInfo.java   | 246 +++++++++++----------
 2 files changed, 127 insertions(+), 122 deletions(-)

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
index 22e3901..4837528 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
@@ -1291,8 +1291,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
           // Only construct StoreFileInfo object if its not a hfile, save obj
           // creation
           StoreFileInfo storeFileInfo = new StoreFileInfo(conf, fs, status);
-          hdfsBlocksDistribution.add(storeFileInfo
-              .computeHDFSBlocksDistribution(fs));
+          hdfsBlocksDistribution.add(storeFileInfo.computeHDFSBlocksDistribution());
         } else if (StoreFileInfo.isHFile(p)) {
           // If its a HFile, then lets just add to the block distribution
           // lets not create more objects here, not even another HDFSBlocksDistribution
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileInfo.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileInfo.java
index 5eaab23..c2f5700 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileInfo.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileInfo.java
@@ -1,5 +1,4 @@
-/**
- *
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -18,13 +17,11 @@
  */
 
 package org.apache.hadoop.hbase.regionserver;
-
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
@@ -46,7 +43,8 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 /**
- * Describe a StoreFile (hfile, reference, link)
+ * StoreFile info.
+ * The info could be for a plain storefile/hfile or it could be a reference or link to a storefile.
  */
 @InterfaceAudience.Private
 public class StoreFileInfo {
@@ -112,6 +110,16 @@ public class StoreFileInfo {
   final AtomicInteger refCount = new AtomicInteger(0);
 
   /**
+   * Cached fileStatus.
+   * Used selectively. Cache the FileStatus if this StoreFileInfo is for a plain StoreFile.
+   * Save on having to go to the filesystem every time (costly). We cannot cache FileStatus if this
+   * StoreFileInfo is for a link or for a reference that might in turn be to a link. The file behind
+   * a link can move during the lifetime of this StoreFileInfo invalidating what we might have
+   * cached here; do a lookup of FileStatus every time when a link to be safe.
+   */
+  private FileStatus cachedFileStatus = null;
+
+  /**
    * Create a Store File Info
    * @param conf the {@link Configuration} to use
    * @param fs The current file system to use.
@@ -135,37 +143,32 @@ public class StoreFileInfo {
     this.primaryReplica = primaryReplica;
     this.noReadahead = this.conf.getBoolean(STORE_FILE_READER_NO_READAHEAD,
       DEFAULT_STORE_FILE_READER_NO_READAHEAD);
-    Path p = initialPath;
-    if (HFileLink.isHFileLink(p)) {
-      // HFileLink
+    if (HFileLink.isHFileLink(initialPath)) {
       this.reference = null;
-      this.link = HFileLink.buildFromHFileLinkPattern(conf, p);
-      LOG.trace("{} is a link", p);
-    } else if (isReference(p)) {
-      this.reference = Reference.read(fs, p);
-      Path referencePath = getReferredToFile(p);
-      if (HFileLink.isHFileLink(referencePath)) {
-        // HFileLink Reference
-        this.link = HFileLink.buildFromHFileLinkPattern(conf, referencePath);
-      } else {
-        // Reference
-        this.link = null;
-      }
-      LOG.trace("{} is a {} reference to {}", p, reference.getFileRegion(), referencePath);
-    } else if (isHFile(p) || isMobFile(p) || isMobRefFile(p)) {
-      // HFile
-      if (fileStatus != null) {
-        this.createdTimestamp = fileStatus.getModificationTime();
-        this.size = fileStatus.getLen();
-      } else {
-        FileStatus fStatus = fs.getFileStatus(initialPath);
-        this.createdTimestamp = fStatus.getModificationTime();
-        this.size = fStatus.getLen();
-      }
+      this.link = HFileLink.buildFromHFileLinkPattern(conf, initialPath);
+      LOG.trace("{} is a link", initialPath);
+    } else if (isReference(initialPath)) {
+      this.reference = Reference.read(fs, initialPath);
+      Path referencedPath = getReferredToFile(initialPath);
+      // Check if the referenced file is a link.
+      this.link = HFileLink.isHFileLink(referencedPath)?
+        HFileLink.buildFromHFileLinkPattern(conf, referencedPath): null;
+      LOG.trace("{} is a {} reference to {} (link={})",
+        initialPath, reference.getFileRegion(), referencedPath, link == null);
+    } else if (isHFile(initialPath) || isMobFile(initialPath) || isMobRefFile(initialPath)) {
+      // Safe to cache passed filestatus when NOT a link or reference; i.e. when it filestatus on
+      // a plain storefile/hfile.
+      assert fileStatus != null && fileStatus.getPath().equals(initialPath);
+      this.cachedFileStatus = fileStatus != null?
+        fileStatus: this.fs.getFileStatus(this.initialPath);
+      this.createdTimestamp = this.cachedFileStatus.getModificationTime();
+      this.size = this.cachedFileStatus.getLen();
       this.reference = null;
       this.link = null;
+      LOG.trace("{}", initialPath);
     } else {
-      throw new IOException("path=" + p + " doesn't look like a valid StoreFile");
+      throw new IOException("Path=" + initialPath + " doesn't look like a valid StoreFile; " +
+        "it is not a link, a reference or an hfile.");
     }
   }
 
@@ -213,6 +216,8 @@ public class StoreFileInfo {
    */
   public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus,
       final Reference reference, final HFileLink link) {
+    assert fs != null;
+    assert conf != null;
     this.fs = fs;
     this.conf = conf;
     this.primaryReplica = false;
@@ -234,7 +239,6 @@ public class StoreFileInfo {
 
   /**
    * Sets the region coprocessor env.
-   * @param coprocessorHost
    */
   public void setRegionCoprocessorHost(RegionCoprocessorHost coprocessorHost) {
     this.coprocessorHost = coprocessorHost;
@@ -270,13 +274,9 @@ public class StoreFileInfo {
   }
 
   StoreFileReader createReader(ReaderContext context, CacheConfig cacheConf) throws IOException {
-    StoreFileReader reader = null;
-    if (this.reference != null) {
-      reader = new HalfStoreFileReader(context, hfileInfo, cacheConf, reference, refCount, conf);
-    } else {
-      reader = new StoreFileReader(context, hfileInfo, cacheConf, refCount, conf);
-    }
-    return reader;
+    return this.reference != null?
+      new HalfStoreFileReader(context, hfileInfo, cacheConf, reference, refCount, conf):
+      new StoreFileReader(context, hfileInfo, cacheConf, refCount, conf);
   }
 
   ReaderContext createReaderContext(boolean doDropBehind, long readahead, ReaderType type)
@@ -293,22 +293,21 @@ public class StoreFileInfo {
       try {
         in = new FSDataInputStreamWrapper(fs, referencePath, doDropBehind, readahead);
       } catch (FileNotFoundException fnfe) {
-        // Intercept the exception so can insert more info about the Reference; otherwise
-        // exception just complains about some random file -- operator doesn't realize it
-        // other end of a Reference
-        FileNotFoundException newFnfe = new FileNotFoundException(toString());
-        newFnfe.initCause(fnfe);
-        throw newFnfe;
+        throw decorateFileNotFoundException(fnfe);
       }
       status = fs.getFileStatus(referencePath);
     } else {
       in = new FSDataInputStreamWrapper(fs, this.getPath(), doDropBehind, readahead);
-      status = fs.getFileStatus(initialPath);
+      if (this.cachedFileStatus == null) {
+        // Safe to cache filestatus for a plain storefile/hfile.
+        this.cachedFileStatus = this.fs.getFileStatus(this.initialPath);
+      }
+      status = this.cachedFileStatus;
     }
     long length = status.getLen();
-    ReaderContextBuilder contextBuilder =
-        new ReaderContextBuilder().withInputStreamWrapper(in).withFileSize(length)
-            .withPrimaryReplicaReader(this.primaryReplica).withReaderType(type).withFileSystem(fs);
+    ReaderContextBuilder contextBuilder = new ReaderContextBuilder().withInputStreamWrapper(in).
+      withFileSize(length).withPrimaryReplicaReader(this.primaryReplica).withReaderType(type).
+      withFileSystem(fs);
     if (this.reference != null) {
       contextBuilder.withFilePath(this.getPath());
     } else {
@@ -320,76 +319,76 @@ public class StoreFileInfo {
   /**
    * Compute the HDFS Block Distribution for this StoreFile
    */
-  public HDFSBlocksDistribution computeHDFSBlocksDistribution(final FileSystem fs)
-      throws IOException {
-    // guard against the case where we get the FileStatus from link, but by the time we
-    // call compute the file is moved again
+  public HDFSBlocksDistribution computeHDFSBlocksDistribution() throws IOException {
     if (this.link != null) {
+      // Guard against case where file behind link has moved when we go to calculate distribution;
+      // e.g. from data dir to archive dir. Retry up to number of locations under the link.
       FileNotFoundException exToThrow = null;
       for (int i = 0; i < this.link.getLocations().length; i++) {
         try {
-          return computeHDFSBlocksDistributionInternal(fs);
-        } catch (FileNotFoundException ex) {
-          // try the other location
-          exToThrow = ex;
+          return computeHDFSBlocksDistributionInternal();
+        } catch (FileNotFoundException fnfe) {
+          // Try the other locations -- file behind link may have moved.
+          exToThrow = fnfe;
         }
       }
-      throw exToThrow;
+      throw decorateFileNotFoundException(exToThrow);
     } else {
-      return computeHDFSBlocksDistributionInternal(fs);
+      return computeHDFSBlocksDistributionInternal();
     }
   }
 
-  private HDFSBlocksDistribution computeHDFSBlocksDistributionInternal(final FileSystem fs)
-      throws IOException {
-    FileStatus status = getReferencedFileStatus(fs);
+  private HDFSBlocksDistribution computeHDFSBlocksDistributionInternal() throws IOException {
+    FileStatus status = getFileStatus();
     if (this.reference != null) {
-      return computeRefFileHDFSBlockDistribution(fs, reference, status);
+      return computeRefFileHDFSBlockDistribution(status);
     } else {
-      return FSUtils.computeHDFSBlocksDistribution(fs, status, 0, status.getLen());
+      return FSUtils.computeHDFSBlocksDistribution(this.fs, status, 0, status.getLen());
     }
   }
 
   /**
-   * Get the {@link FileStatus} of the file referenced by this StoreFileInfo
-   * @param fs The current file system to use.
+   * Get the {@link FileStatus} of the file referenced by this StoreFileInfo on the passed
+   * <code>fs</code>.
+   * This {@link StoreFileInfo} could be for a link or a reference or a plain hfile/storefile; get
+   * the filestatus for whatever the link or reference points to (or just the plain hfile/storefile
+   * if not a link/reference). This method is called by snapshot. Filesystem may not be same as
+   * that of this hosting {@link StoreFileInfo}. If a link, when you go to use the passed
+   * FileStatus, the file may have moved; e.g. from data to archive... be aware.
+   * @param fs The filesystem to use.
    * @return The {@link FileStatus} of the file referenced by this StoreFileInfo
    */
   public FileStatus getReferencedFileStatus(final FileSystem fs) throws IOException {
+    if (this.fs.equals(fs) && this.cachedFileStatus != null) {
+      return this.cachedFileStatus;
+    }
     FileStatus status;
     if (this.reference != null) {
       if (this.link != null) {
-        FileNotFoundException exToThrow = null;
-        for (int i = 0; i < this.link.getLocations().length; i++) {
-          // HFileLink Reference
-          try {
-            return link.getFileStatus(fs);
-          } catch (FileNotFoundException ex) {
-            // try the other location
-            exToThrow = ex;
-          }
-        }
-        throw exToThrow;
+        status = this.link.getFileStatus(this.fs);
       } else {
-        // HFile Reference
-        Path referencePath = getReferredToFile(this.getPath());
-        status = fs.getFileStatus(referencePath);
+        try {
+          Path referencePath = getReferredToFile(this.getPath());
+          status = this.fs.getFileStatus(referencePath);
+        } catch (FileNotFoundException ex) {
+          throw decorateFileNotFoundException(ex);
+        }
       }
     } else {
-      if (this.link != null) {
-        FileNotFoundException exToThrow = null;
-        for (int i = 0; i < this.link.getLocations().length; i++) {
-          // HFileLink
-          try {
-            return link.getFileStatus(fs);
-          } catch (FileNotFoundException ex) {
-            // try the other location
-            exToThrow = ex;
+      try {
+        if (this.link != null) {
+          status = this.link.getFileStatus(this.fs);
+        } else {
+          status = this.fs.getFileStatus(this.initialPath);
+          if (this.fs.equals(fs)) {
+            // If the passed in filestatus is same as that of this StoreFileStatus, take this
+            // opportunity to cache the filestatus. It is safe to cache passed filestatus when NOT a
+            // link or reference; i.e. when it filestatus on a plain storefile/hfile.
+            this.cachedFileStatus = status;
           }
         }
-        throw exToThrow;
-      } else {
-        status = fs.getFileStatus(initialPath);
+      } catch (FileNotFoundException fnfe) {
+        throw decorateFileNotFoundException(fnfe);
       }
     }
     return status;
@@ -400,9 +399,22 @@ public class StoreFileInfo {
     return initialPath;
   }
 
-  /** @return The {@link FileStatus} of the file */
+  /**
+   * @return A new FNFE with <param>fnfe</param> as cause but including info if reference or link.
+   */
+  private FileNotFoundException decorateFileNotFoundException(FileNotFoundException fnfe) {
+    FileNotFoundException newFnfe = new FileNotFoundException(toString());
+    newFnfe.initCause(fnfe);
+    return newFnfe;
+  }
+
+  /**
+   * @return {@link FileStatus} for the linked or referenced file or if not a link/reference, then
+   *   the FileStatus for the plain storefile (Be aware, if a link, the file may have moved
+   *   by the time you go to use the FileStatus).
+   */
   public FileStatus getFileStatus() throws IOException {
-    return getReferencedFileStatus(fs);
+    return getReferencedFileStatus(this.fs);
   }
 
   /** @return Get the modification time of the file. */
@@ -412,8 +424,8 @@ public class StoreFileInfo {
 
   @Override
   public String toString() {
-    return this.getPath()
-        + (isReference() ? "->" + getReferredToFile(this.getPath()) + "-" + reference : "");
+    return isLink()? this.link.toString(): this.getPath() +
+      (isReference()? "->" + getReferredToFile(this.getPath()) + "-" + reference: "");
   }
 
   /**
@@ -523,10 +535,7 @@ public class StoreFileInfo {
    * @return <tt>true</tt> if the file could be a valid store file, <tt>false</tt> otherwise
    */
   public static boolean validateStoreFileName(final String fileName) {
-    if (HFileLink.isHFileLink(fileName) || isReference(fileName)) {
-      return true;
-    }
-    return !fileName.contains("-");
+    return (HFileLink.isHFileLink(fileName) || isReference(fileName)) || !fileName.contains("-");
   }
 
   /**
@@ -553,29 +562,27 @@ public class StoreFileInfo {
   }
 
   /**
-   * helper function to compute HDFS blocks distribution of a given reference file.For reference
-   * file, we don't compute the exact value. We use some estimate instead given it might be good
-   * enough. we assume bottom part takes the first half of reference file, top part takes the second
+   * Helper function to compute HDFS blocks distribution of a given reference file. For reference
+   * file, we don't compute the exact value. We use an estimate instead presuming it good enough.
+   * We assume bottom part takes the first half of a reference file, top part takes the second
    * half of the reference file. This is just estimate, given midkey ofregion != midkey of HFile,
    * also the number and size of keys vary. If this estimate isn't good enough, we can improve it
    * later.
-   * @param fs The FileSystem
-   * @param reference The reference
-   * @param status The reference FileStatus
+   * @param status  The reference FileStatus
    * @return HDFS blocks distribution
    */
-  private static HDFSBlocksDistribution computeRefFileHDFSBlockDistribution(final FileSystem fs,
-      final Reference reference, final FileStatus status) throws IOException {
+  private HDFSBlocksDistribution computeRefFileHDFSBlockDistribution(final FileStatus status)
+      throws IOException {
     if (status == null) {
       return null;
     }
 
-    long start = 0;
-    long length = 0;
+    long start;
+    long length;
 
-    if (Reference.isTopFileRegion(reference.getFileRegion())) {
+    if (Reference.isTopFileRegion(this.reference.getFileRegion())) {
       start = status.getLen() / 2;
-      length = status.getLen() - status.getLen() / 2;
+      length = status.getLen() - (status.getLen() / 2);
     } else {
       start = 0;
       length = status.getLen() / 2;
@@ -596,15 +603,15 @@ public class StoreFileInfo {
       return false;
     }
 
-    StoreFileInfo o = (StoreFileInfo) that;
+    StoreFileInfo o = (StoreFileInfo)that;
     if (initialPath != null && o.initialPath == null) {
       return false;
     }
     if (initialPath == null && o.initialPath != null) {
       return false;
     }
-    if (initialPath != o.initialPath && initialPath != null
-        && !initialPath.equals(o.initialPath)) {
+    if (initialPath != o.initialPath && initialPath != null &&
+        !initialPath.equals(o.initialPath)) {
       return false;
     }
     if (reference != null && o.reference == null) {
@@ -613,8 +620,8 @@ public class StoreFileInfo {
     if (reference == null && o.reference != null) {
       return false;
     }
-    if (reference != o.reference && reference != null
-        && !reference.equals(o.reference)) {
+    if (reference != o.reference && reference != null &&
+        !reference.equals(o.reference)) {
       return false;
     }
 
@@ -627,7 +634,6 @@ public class StoreFileInfo {
     if (link != o.link && link != null && !link.equals(o.link)) {
       return false;
     }
-
     return true;
   }
 
@@ -672,7 +678,7 @@ public class StoreFileInfo {
   }
 
   void initHDFSBlocksDistribution() throws IOException {
-    hdfsBlocksDistribution = computeHDFSBlocksDistribution(fs);
+    hdfsBlocksDistribution = computeHDFSBlocksDistribution();
   }
 
   StoreFileReader preStoreFileReaderOpen(ReaderContext context, CacheConfig cacheConf)