You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ps...@apache.org on 2023/02/09 14:21:25 UTC

[hbase] branch branch-2.5 updated: HBASE-27590 Change Iterable to List in SnapshotFileCache (#4995)

This is an automated email from the ASF dual-hosted git repository.

psomogyi pushed a commit to branch branch-2.5
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2.5 by this push:
     new d48e7af6cb0 HBASE-27590 Change Iterable to List in SnapshotFileCache (#4995)
d48e7af6cb0 is described below

commit d48e7af6cb093a88af253b1633d380c059d5c0f5
Author: Peter Somogyi <ps...@apache.org>
AuthorDate: Thu Feb 9 11:20:35 2023 +0100

    HBASE-27590 Change Iterable to List in SnapshotFileCache (#4995)
    
    Signed-off-by: Duo Zhang <zh...@apache.org>
    (cherry picked from commit d2c5af11ea46c86c592751608e44f563406aa4f3)
---
 .../hadoop/hbase/master/snapshot/SnapshotFileCache.java      |  8 ++------
 .../hadoop/hbase/master/snapshot/SnapshotHFileCleaner.java   | 12 +++++++++++-
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotFileCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotFileCache.java
index e9a33f6cb28..da3da42f35a 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotFileCache.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotFileCache.java
@@ -178,15 +178,11 @@ public class SnapshotFileCache implements Stoppable {
    * at that point, cache will still think the file system contains that file and return
    * <tt>true</tt>, even if it is no longer present (false positive). However, if the file never was
    * on the filesystem, we will never find it and always return <tt>false</tt>.
-   * @param files file to check, NOTE: Relies that files are loaded from hdfs before method is
-   *              called (NOT LAZY)
+   * @param files file to check
    * @return <tt>unReferencedFiles</tt> the collection of files that do not have snapshot references
    * @throws IOException if there is an unexpected error reaching the filesystem.
    */
-  // XXX this is inefficient to synchronize on the method, when what we really need to guard against
-  // is an illegal access to the cache. Really we could do a mutex-guarded pointer swap on the
-  // cache, but that seems overkill at the moment and isn't necessarily a bottleneck.
-  public Iterable<FileStatus> getUnreferencedFiles(Iterable<FileStatus> files,
+  public Iterable<FileStatus> getUnreferencedFiles(List<FileStatus> files,
     final SnapshotManager snapshotManager) throws IOException {
     List<FileStatus> unReferencedFiles = Lists.newArrayList();
     List<String> snapshotsInProgress = null;
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotHFileCleaner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotHFileCleaner.java
index a300cbbce68..bcd129afc21 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotHFileCleaner.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotHFileCleaner.java
@@ -20,7 +20,10 @@ package org.apache.hadoop.hbase.master.snapshot;
 import java.io.IOException;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.List;
 import java.util.Map;
+import java.util.stream.Collectors;
+import java.util.stream.StreamSupport;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
@@ -64,8 +67,15 @@ public class SnapshotHFileCleaner extends BaseHFileCleanerDelegate {
 
   @Override
   public Iterable<FileStatus> getDeletableFiles(Iterable<FileStatus> files) {
+    // The Iterable is lazy evaluated, so if we just pass this Iterable in, we will access the HFile
+    // storage inside the snapshot lock, which could take a lot of time (for example, several
+    // seconds), and block all other operations, especially other cleaners.
+    // So here we convert it to List first, to force it evaluated before calling
+    // getUnreferencedFiles, so we will not hold snapshot lock for a long time.
+    List<FileStatus> filesList =
+      StreamSupport.stream(files.spliterator(), false).collect(Collectors.toList());
     try {
-      return cache.getUnreferencedFiles(files, master.getSnapshotManager());
+      return cache.getUnreferencedFiles(filesList, master.getSnapshotManager());
     } catch (CorruptedSnapshotException cse) {
       LOG.debug("Corrupted in-progress snapshot file exception, ignored ", cse);
     } catch (IOException e) {