You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by bu...@apache.org on 2018/04/17 17:34:41 UTC
[2/2] hbase git commit: HBASE-20404 Fixes to CleanChore correctness
and operability.
HBASE-20404 Fixes to CleanChore correctness and operability.
* Make CleanerChore less chatty: move WARN message to DEBUG when we expect non-empty dirs
* Make CleanerChore less chatty: move IOE we'll retry to INFO
* CleanerChore should treat IOE for FileStatus as a failure
* Add tests asserting assumptions in above
Signed-off-by: Reid Chan <re...@outlook.com>
Signed-off-by: Mike Drob <md...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/10f74dfb
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/10f74dfb
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/10f74dfb
Branch: refs/heads/branch-2.0
Commit: 10f74dfbdb2e2b28739e3759e0dff429efe2d58f
Parents: c7c0861
Author: Sean Busbey <bu...@apache.org>
Authored: Fri Apr 13 00:57:35 2018 -0500
Committer: Sean Busbey <bu...@apache.org>
Committed: Tue Apr 17 11:58:47 2018 -0500
----------------------------------------------------------------------
.../hbase/master/cleaner/CleanerChore.java | 27 ++++++++--
.../hbase/master/cleaner/TestCleanerChore.java | 54 ++++++++++++++++++--
.../apache/hadoop/hbase/util/TestFSUtils.java | 10 ++++
3 files changed, 83 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/10f74dfb/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java
index 46f6217..8765a43 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java
@@ -31,6 +31,7 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathIsNotEmptyDirectoryException;
import org.apache.hadoop.hbase.ScheduledChore;
import org.apache.hadoop.hbase.Stoppable;
import org.apache.hadoop.hbase.conf.ConfigurationObserver;
@@ -391,6 +392,10 @@ public abstract class CleanerChore<T extends FileCleanerDelegate> extends Schedu
T act() throws IOException;
}
+ /**
+ * Attemps to clean up a directory, its subdirectories, and files.
+ * Return value is true if everything was deleted. false on partial / total failures.
+ */
private class CleanerTask extends RecursiveTask<Boolean> {
private final Path dir;
private final boolean root;
@@ -410,11 +415,13 @@ public abstract class CleanerChore<T extends FileCleanerDelegate> extends Schedu
List<FileStatus> subDirs;
List<FileStatus> files;
try {
+ // if dir doesn't exist, we'll get null back for both of these
+ // which will fall through to succeeding.
subDirs = getFilteredStatus(status -> status.isDirectory());
files = getFilteredStatus(status -> status.isFile());
} catch (IOException ioe) {
- LOG.warn(dir + " doesn't exist, just skip it. ", ioe);
- return true;
+ LOG.warn("failed to get FileStatus for contents of '{}'", dir, ioe);
+ return false;
}
boolean nullSubDirs = subDirs == null;
@@ -452,8 +459,8 @@ public abstract class CleanerChore<T extends FileCleanerDelegate> extends Schedu
* Pay attention that FSUtils #listStatusWithStatusFilter would return null,
* even though status is empty but not null.
* @param function a filter function
- * @return filtered FileStatus
- * @throws IOException if there's no such a directory
+ * @return filtered FileStatus or null if dir doesn't exist
+ * @throws IOException if there's an error other than dir not existing
*/
private List<FileStatus> getFilteredStatus(Predicate<FileStatus> function) throws IOException {
return FSUtils.listStatusWithStatusFilter(fs, dir, status -> function.test(status));
@@ -470,8 +477,18 @@ public abstract class CleanerChore<T extends FileCleanerDelegate> extends Schedu
try {
LOG.trace("Start deleting {} under {}", type, dir);
deleted = deletion.act();
+ } catch (PathIsNotEmptyDirectoryException exception) {
+ // N.B. HDFS throws this exception when we try to delete a non-empty directory, but
+ // LocalFileSystem throws a bare IOException. So some test code will get the verbose
+ // message below.
+ LOG.debug("Couldn't delete '{}' yet because it isn't empty. Probably transient. " +
+ "exception details at TRACE.", dir);
+ LOG.trace("Couldn't delete '{}' yet because it isn't empty w/exception.", dir, exception);
+ deleted = false;
} catch (IOException ioe) {
- LOG.warn("Could not delete {} under {}; {}", type, dir, ioe);
+ LOG.info("Could not delete {} under {}. might be transient; we'll retry. if it keeps " +
+ "happening, use following exception when asking on mailing list.",
+ type, dir, ioe);
deleted = false;
}
LOG.trace("Finish deleting {} under {}, deleted=", type, dir, deleted);
http://git-wip-us.apache.org/repos/asf/hbase/blob/10f74dfb/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestCleanerChore.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestCleanerChore.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestCleanerChore.java
index 75557fb..8851ab3 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestCleanerChore.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestCleanerChore.java
@@ -23,10 +23,12 @@ import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.util.Random;
+import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FilterFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtility;
@@ -83,9 +85,55 @@ public class TestCleanerChore {
// run the chore
chore.chore();
- // verify all the files got deleted
- assertTrue("File didn't get deleted", fs.exists(file));
- assertTrue("Empty directory didn't get deleted", fs.exists(parent));
+ // verify all the files were preserved
+ assertTrue("File shouldn't have been deleted", fs.exists(file));
+ assertTrue("directory shouldn't have been deleted", fs.exists(parent));
+ }
+
+ @Test
+ public void retriesIOExceptionInStatus() throws Exception {
+ Stoppable stop = new StoppableImplementation();
+ Configuration conf = UTIL.getConfiguration();
+ Path testDir = UTIL.getDataTestDir();
+ FileSystem fs = UTIL.getTestFileSystem();
+ String confKey = "hbase.test.cleaner.delegates";
+
+ Path child = new Path(testDir, "child");
+ Path file = new Path(child, "file");
+ fs.mkdirs(child);
+ fs.create(file).close();
+ assertTrue("test file didn't get created.", fs.exists(file));
+ final AtomicBoolean fails = new AtomicBoolean(true);
+
+ FilterFileSystem filtered = new FilterFileSystem(fs) {
+ public FileStatus[] listStatus(Path f) throws IOException {
+ if (fails.get()) {
+ throw new IOException("whomp whomp.");
+ }
+ return fs.listStatus(f);
+ }
+ };
+
+ AllValidPaths chore = new AllValidPaths("test-retry-ioe", stop, conf, filtered, testDir, confKey);
+
+ // trouble talking to the filesystem
+ Boolean result = chore.runCleaner();
+
+ // verify that it couldn't clean the files.
+ assertTrue("test rig failed to inject failure.", fs.exists(file));
+ assertTrue("test rig failed to inject failure.", fs.exists(child));
+ // and verify that it accurately reported the failure.
+ assertFalse("chore should report that it failed.", result);
+
+ // filesystem is back
+ fails.set(false);
+ result = chore.runCleaner();
+
+ // verify everything is gone.
+ assertFalse("file should have been destroyed.", fs.exists(file));
+ assertFalse("directory should have been destroyed.", fs.exists(child));
+ // and verify that it accurately reported success.
+ assertTrue("chore should claim it succeeded.", result);
}
@Test
http://git-wip-us.apache.org/repos/asf/hbase/blob/10f74dfb/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestFSUtils.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestFSUtils.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestFSUtils.java
index edd35c7..2718120 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestFSUtils.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestFSUtils.java
@@ -289,6 +289,16 @@ public class TestFSUtils {
}
}
+ @Test
+ public void testFilteredStatusDoesNotThrowOnNotFound() throws Exception {
+ MiniDFSCluster cluster = htu.startMiniDFSCluster(1);
+ try {
+ assertNull(FSUtils.listStatusWithStatusFilter(cluster.getFileSystem(), new Path("definitely/doesn't/exist"), null));
+ } finally {
+ cluster.shutdown();
+ }
+
+ }
@Test
public void testRenameAndSetModifyTime() throws Exception {