You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by bu...@apache.org on 2018/04/17 17:34:40 UTC

[1/2] hbase git commit: HBASE-20404 Fixes to CleanChore correctness and operability.

Repository: hbase
Updated Branches:
  refs/heads/branch-2 d951675df -> 1c8d9d788
  refs/heads/branch-2.0 c7c0861c9 -> 10f74dfbd


HBASE-20404 Fixes to CleanChore correctness and operability.

* Make CleanerChore less chatty: move WARN message to DEBUG when we expect non-empty dirs
* Make CleanerChore less chatty: move IOE we'll retry to INFO
* CleanerChore should treat IOE for FileStatus as a failure
* Add tests asserting assumptions in above

Signed-off-by: Reid Chan <re...@outlook.com>
Signed-off-by: Mike Drob <md...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/1c8d9d78
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/1c8d9d78
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/1c8d9d78

Branch: refs/heads/branch-2
Commit: 1c8d9d788f3e39bf9406d11e590fd64781cd73de
Parents: d951675
Author: Sean Busbey <bu...@apache.org>
Authored: Fri Apr 13 00:57:35 2018 -0500
Committer: Sean Busbey <bu...@apache.org>
Committed: Tue Apr 17 11:53:19 2018 -0500

----------------------------------------------------------------------
 .../hbase/master/cleaner/CleanerChore.java      | 27 ++++++++--
 .../hbase/master/cleaner/TestCleanerChore.java  | 54 ++++++++++++++++++--
 .../apache/hadoop/hbase/util/TestFSUtils.java   | 10 ++++
 3 files changed, 83 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/1c8d9d78/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java
index 396fbaf..dc13645 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java
@@ -32,6 +32,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathIsNotEmptyDirectoryException;
 import org.apache.hadoop.hbase.ScheduledChore;
 import org.apache.hadoop.hbase.Stoppable;
 import org.apache.hadoop.hbase.conf.ConfigurationObserver;
@@ -446,6 +447,10 @@ public abstract class CleanerChore<T extends FileCleanerDelegate> extends Schedu
     T act() throws IOException;
   }
 
+  /**
+   * Attemps to clean up a directory, its subdirectories, and files.
+   * Return value is true if everything was deleted. false on partial / total failures.
+   */
   private class CleanerTask extends RecursiveTask<Boolean> {
     private final Path dir;
     private final boolean root;
@@ -465,11 +470,13 @@ public abstract class CleanerChore<T extends FileCleanerDelegate> extends Schedu
       List<FileStatus> subDirs;
       List<FileStatus> files;
       try {
+        // if dir doesn't exist, we'll get null back for both of these
+        // which will fall through to succeeding.
         subDirs = getFilteredStatus(status -> status.isDirectory());
         files = getFilteredStatus(status -> status.isFile());
       } catch (IOException ioe) {
-        LOG.warn(dir + " doesn't exist, just skip it. ", ioe);
-        return true;
+        LOG.warn("failed to get FileStatus for contents of '{}'", dir, ioe);
+        return false;
       }
 
       boolean nullSubDirs = subDirs == null;
@@ -507,8 +514,8 @@ public abstract class CleanerChore<T extends FileCleanerDelegate> extends Schedu
      * Pay attention that FSUtils #listStatusWithStatusFilter would return null,
      * even though status is empty but not null.
      * @param function a filter function
-     * @return filtered FileStatus
-     * @throws IOException if there's no such a directory
+     * @return filtered FileStatus or null if dir doesn't exist
+     * @throws IOException if there's an error other than dir not existing
      */
     private List<FileStatus> getFilteredStatus(Predicate<FileStatus> function) throws IOException {
       return FSUtils.listStatusWithStatusFilter(fs, dir, status -> function.test(status));
@@ -525,8 +532,18 @@ public abstract class CleanerChore<T extends FileCleanerDelegate> extends Schedu
       try {
         LOG.trace("Start deleting {} under {}", type, dir);
         deleted = deletion.act();
+      } catch (PathIsNotEmptyDirectoryException exception) {
+        // N.B. HDFS throws this exception when we try to delete a non-empty directory, but
+        // LocalFileSystem throws a bare IOException. So some test code will get the verbose
+        // message below.
+        LOG.debug("Couldn't delete '{}' yet because it isn't empty. Probably transient. " +
+            "exception details at TRACE.", dir);
+        LOG.trace("Couldn't delete '{}' yet because it isn't empty w/exception.", dir, exception);
+        deleted = false;
       } catch (IOException ioe) {
-        LOG.warn("Could not delete {} under {}; {}", type, dir, ioe);
+        LOG.info("Could not delete {} under {}. might be transient; we'll retry. if it keeps " +
+                  "happening, use following exception when asking on mailing list.",
+                  type, dir, ioe);
         deleted = false;
       }
       LOG.trace("Finish deleting {} under {}, deleted=", type, dir, deleted);

http://git-wip-us.apache.org/repos/asf/hbase/blob/1c8d9d78/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestCleanerChore.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestCleanerChore.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestCleanerChore.java
index 5736663..c977f98 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestCleanerChore.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestCleanerChore.java
@@ -23,10 +23,12 @@ import static org.junit.Assert.assertTrue;
 
 import java.io.IOException;
 import java.util.Random;
+import java.util.concurrent.atomic.AtomicBoolean;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FilterFileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.HBaseClassTestRule;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
@@ -89,9 +91,55 @@ public class TestCleanerChore {
     // run the chore
     chore.chore();
 
-    // verify all the files got deleted
-    assertTrue("File didn't get deleted", fs.exists(file));
-    assertTrue("Empty directory didn't get deleted", fs.exists(parent));
+    // verify all the files were preserved
+    assertTrue("File shouldn't have been deleted", fs.exists(file));
+    assertTrue("directory shouldn't have been deleted", fs.exists(parent));
+  }
+
+  @Test
+  public void retriesIOExceptionInStatus() throws Exception {
+    Stoppable stop = new StoppableImplementation();
+    Configuration conf = UTIL.getConfiguration();
+    Path testDir = UTIL.getDataTestDir();
+    FileSystem fs = UTIL.getTestFileSystem();
+    String confKey = "hbase.test.cleaner.delegates";
+
+    Path child = new Path(testDir, "child");
+    Path file = new Path(child, "file");
+    fs.mkdirs(child);
+    fs.create(file).close();
+    assertTrue("test file didn't get created.", fs.exists(file));
+    final AtomicBoolean fails = new AtomicBoolean(true);
+
+    FilterFileSystem filtered = new FilterFileSystem(fs) {
+      public FileStatus[] listStatus(Path f) throws IOException {
+        if (fails.get()) {
+          throw new IOException("whomp whomp.");
+        }
+        return fs.listStatus(f);
+      }
+    };
+
+    AllValidPaths chore = new AllValidPaths("test-retry-ioe", stop, conf, filtered, testDir, confKey);
+
+    // trouble talking to the filesystem
+    Boolean result = chore.runCleaner();
+
+    // verify that it couldn't clean the files.
+    assertTrue("test rig failed to inject failure.", fs.exists(file));
+    assertTrue("test rig failed to inject failure.", fs.exists(child));
+    // and verify that it accurately reported the failure.
+    assertFalse("chore should report that it failed.", result);
+
+    // filesystem is back
+    fails.set(false);
+    result = chore.runCleaner();
+
+    // verify everything is gone.
+    assertFalse("file should have been destroyed.", fs.exists(file));
+    assertFalse("directory should have been destroyed.", fs.exists(child));
+    // and verify that it accurately reported success.
+    assertTrue("chore should claim it succeeded.", result);
   }
 
   @Test

http://git-wip-us.apache.org/repos/asf/hbase/blob/1c8d9d78/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestFSUtils.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestFSUtils.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestFSUtils.java
index edd35c7..2718120 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestFSUtils.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestFSUtils.java
@@ -289,6 +289,16 @@ public class TestFSUtils {
     }
   }
 
+  @Test
+  public void testFilteredStatusDoesNotThrowOnNotFound() throws Exception {
+    MiniDFSCluster cluster = htu.startMiniDFSCluster(1);
+    try {
+      assertNull(FSUtils.listStatusWithStatusFilter(cluster.getFileSystem(), new Path("definitely/doesn't/exist"), null));
+    } finally {
+      cluster.shutdown();
+    }
+
+  }
 
   @Test
   public void testRenameAndSetModifyTime() throws Exception {


[2/2] hbase git commit: HBASE-20404 Fixes to CleanChore correctness and operability.

Posted by bu...@apache.org.
HBASE-20404 Fixes to CleanChore correctness and operability.

* Make CleanerChore less chatty: move WARN message to DEBUG when we expect non-empty dirs
* Make CleanerChore less chatty: move IOE we'll retry to INFO
* CleanerChore should treat IOE for FileStatus as a failure
* Add tests asserting assumptions in above

Signed-off-by: Reid Chan <re...@outlook.com>
Signed-off-by: Mike Drob <md...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/10f74dfb
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/10f74dfb
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/10f74dfb

Branch: refs/heads/branch-2.0
Commit: 10f74dfbdb2e2b28739e3759e0dff429efe2d58f
Parents: c7c0861
Author: Sean Busbey <bu...@apache.org>
Authored: Fri Apr 13 00:57:35 2018 -0500
Committer: Sean Busbey <bu...@apache.org>
Committed: Tue Apr 17 11:58:47 2018 -0500

----------------------------------------------------------------------
 .../hbase/master/cleaner/CleanerChore.java      | 27 ++++++++--
 .../hbase/master/cleaner/TestCleanerChore.java  | 54 ++++++++++++++++++--
 .../apache/hadoop/hbase/util/TestFSUtils.java   | 10 ++++
 3 files changed, 83 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/10f74dfb/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java
index 46f6217..8765a43 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java
@@ -31,6 +31,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathIsNotEmptyDirectoryException;
 import org.apache.hadoop.hbase.ScheduledChore;
 import org.apache.hadoop.hbase.Stoppable;
 import org.apache.hadoop.hbase.conf.ConfigurationObserver;
@@ -391,6 +392,10 @@ public abstract class CleanerChore<T extends FileCleanerDelegate> extends Schedu
     T act() throws IOException;
   }
 
+  /**
+   * Attemps to clean up a directory, its subdirectories, and files.
+   * Return value is true if everything was deleted. false on partial / total failures.
+   */
   private class CleanerTask extends RecursiveTask<Boolean> {
     private final Path dir;
     private final boolean root;
@@ -410,11 +415,13 @@ public abstract class CleanerChore<T extends FileCleanerDelegate> extends Schedu
       List<FileStatus> subDirs;
       List<FileStatus> files;
       try {
+        // if dir doesn't exist, we'll get null back for both of these
+        // which will fall through to succeeding.
         subDirs = getFilteredStatus(status -> status.isDirectory());
         files = getFilteredStatus(status -> status.isFile());
       } catch (IOException ioe) {
-        LOG.warn(dir + " doesn't exist, just skip it. ", ioe);
-        return true;
+        LOG.warn("failed to get FileStatus for contents of '{}'", dir, ioe);
+        return false;
       }
 
       boolean nullSubDirs = subDirs == null;
@@ -452,8 +459,8 @@ public abstract class CleanerChore<T extends FileCleanerDelegate> extends Schedu
      * Pay attention that FSUtils #listStatusWithStatusFilter would return null,
      * even though status is empty but not null.
      * @param function a filter function
-     * @return filtered FileStatus
-     * @throws IOException if there's no such a directory
+     * @return filtered FileStatus or null if dir doesn't exist
+     * @throws IOException if there's an error other than dir not existing
      */
     private List<FileStatus> getFilteredStatus(Predicate<FileStatus> function) throws IOException {
       return FSUtils.listStatusWithStatusFilter(fs, dir, status -> function.test(status));
@@ -470,8 +477,18 @@ public abstract class CleanerChore<T extends FileCleanerDelegate> extends Schedu
       try {
         LOG.trace("Start deleting {} under {}", type, dir);
         deleted = deletion.act();
+      } catch (PathIsNotEmptyDirectoryException exception) {
+        // N.B. HDFS throws this exception when we try to delete a non-empty directory, but
+        // LocalFileSystem throws a bare IOException. So some test code will get the verbose
+        // message below.
+        LOG.debug("Couldn't delete '{}' yet because it isn't empty. Probably transient. " +
+            "exception details at TRACE.", dir);
+        LOG.trace("Couldn't delete '{}' yet because it isn't empty w/exception.", dir, exception);
+        deleted = false;
       } catch (IOException ioe) {
-        LOG.warn("Could not delete {} under {}; {}", type, dir, ioe);
+        LOG.info("Could not delete {} under {}. might be transient; we'll retry. if it keeps " +
+                  "happening, use following exception when asking on mailing list.",
+                  type, dir, ioe);
         deleted = false;
       }
       LOG.trace("Finish deleting {} under {}, deleted=", type, dir, deleted);

http://git-wip-us.apache.org/repos/asf/hbase/blob/10f74dfb/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestCleanerChore.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestCleanerChore.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestCleanerChore.java
index 75557fb..8851ab3 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestCleanerChore.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestCleanerChore.java
@@ -23,10 +23,12 @@ import static org.junit.Assert.assertTrue;
 
 import java.io.IOException;
 import java.util.Random;
+import java.util.concurrent.atomic.AtomicBoolean;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FilterFileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.HBaseClassTestRule;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
@@ -83,9 +85,55 @@ public class TestCleanerChore {
     // run the chore
     chore.chore();
 
-    // verify all the files got deleted
-    assertTrue("File didn't get deleted", fs.exists(file));
-    assertTrue("Empty directory didn't get deleted", fs.exists(parent));
+    // verify all the files were preserved
+    assertTrue("File shouldn't have been deleted", fs.exists(file));
+    assertTrue("directory shouldn't have been deleted", fs.exists(parent));
+  }
+
+  @Test
+  public void retriesIOExceptionInStatus() throws Exception {
+    Stoppable stop = new StoppableImplementation();
+    Configuration conf = UTIL.getConfiguration();
+    Path testDir = UTIL.getDataTestDir();
+    FileSystem fs = UTIL.getTestFileSystem();
+    String confKey = "hbase.test.cleaner.delegates";
+
+    Path child = new Path(testDir, "child");
+    Path file = new Path(child, "file");
+    fs.mkdirs(child);
+    fs.create(file).close();
+    assertTrue("test file didn't get created.", fs.exists(file));
+    final AtomicBoolean fails = new AtomicBoolean(true);
+
+    FilterFileSystem filtered = new FilterFileSystem(fs) {
+      public FileStatus[] listStatus(Path f) throws IOException {
+        if (fails.get()) {
+          throw new IOException("whomp whomp.");
+        }
+        return fs.listStatus(f);
+      }
+    };
+
+    AllValidPaths chore = new AllValidPaths("test-retry-ioe", stop, conf, filtered, testDir, confKey);
+
+    // trouble talking to the filesystem
+    Boolean result = chore.runCleaner();
+
+    // verify that it couldn't clean the files.
+    assertTrue("test rig failed to inject failure.", fs.exists(file));
+    assertTrue("test rig failed to inject failure.", fs.exists(child));
+    // and verify that it accurately reported the failure.
+    assertFalse("chore should report that it failed.", result);
+
+    // filesystem is back
+    fails.set(false);
+    result = chore.runCleaner();
+
+    // verify everything is gone.
+    assertFalse("file should have been destroyed.", fs.exists(file));
+    assertFalse("directory should have been destroyed.", fs.exists(child));
+    // and verify that it accurately reported success.
+    assertTrue("chore should claim it succeeded.", result);
   }
 
   @Test

http://git-wip-us.apache.org/repos/asf/hbase/blob/10f74dfb/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestFSUtils.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestFSUtils.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestFSUtils.java
index edd35c7..2718120 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestFSUtils.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestFSUtils.java
@@ -289,6 +289,16 @@ public class TestFSUtils {
     }
   }
 
+  @Test
+  public void testFilteredStatusDoesNotThrowOnNotFound() throws Exception {
+    MiniDFSCluster cluster = htu.startMiniDFSCluster(1);
+    try {
+      assertNull(FSUtils.listStatusWithStatusFilter(cluster.getFileSystem(), new Path("definitely/doesn't/exist"), null));
+    } finally {
+      cluster.shutdown();
+    }
+
+  }
 
   @Test
   public void testRenameAndSetModifyTime() throws Exception {