You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ji...@apache.org on 2015/07/14 02:29:11 UTC

[10/11] hadoop git commit: HDFS-8541. Mover should exit with NO_MOVE_PROGRESS if there is no move progress. Contributed by Surendra Singh Lilhore

HDFS-8541. Mover should exit with NO_MOVE_PROGRESS if there is no move progress.  Contributed by Surendra Singh Lilhore


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/9ef03a4c
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/9ef03a4c
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/9ef03a4c

Branch: refs/heads/YARN-1197
Commit: 9ef03a4c5bb5573eadc7d04e371c4af2dc6bae37
Parents: f7c8311
Author: Tsz-Wo Nicholas Sze <sz...@hortonworks.com>
Authored: Mon Jul 13 15:12:26 2015 -0700
Committer: Tsz-Wo Nicholas Sze <sz...@hortonworks.com>
Committed: Mon Jul 13 15:12:26 2015 -0700

----------------------------------------------------------------------
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt     |  3 +++
 .../hadoop/hdfs/server/balancer/Dispatcher.java | 18 +++++++++++++
 .../apache/hadoop/hdfs/server/mover/Mover.java  | 27 +++++++++++++++-----
 .../hadoop/hdfs/server/mover/TestMover.java     |  2 +-
 4 files changed, 43 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/9ef03a4c/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 1491990..e843dcc 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -716,6 +716,9 @@ Release 2.8.0 - UNRELEASED
     HDFS-8751. Remove setBlocks API from INodeFile and misc code cleanup. (Zhe
     Zhang via jing9)
 
+    HDFS-8541. Mover should exit with NO_MOVE_PROGRESS if there is no move
+    progress.  (Surendra Singh Lilhore via szetszwo)
+
   OPTIMIZATIONS
 
     HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9ef03a4c/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java
index 4a8f40f..298b86d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java
@@ -317,6 +317,7 @@ public class Dispatcher {
         sendRequest(out, eb, accessToken);
         receiveResponse(in);
         nnc.getBytesMoved().addAndGet(block.getNumBytes());
+        target.getDDatanode().setHasSuccess();
         LOG.info("Successfully moved " + this);
       } catch (IOException e) {
         LOG.warn("Failed to move " + this + ": " + e.getMessage());
@@ -500,6 +501,7 @@ public class Dispatcher {
     /** blocks being moved but not confirmed yet */
     private final List<PendingMove> pendings;
     private volatile boolean hasFailure = false;
+    private volatile boolean hasSuccess = false;
     private final int maxConcurrentMoves;
 
     @Override
@@ -573,6 +575,10 @@ public class Dispatcher {
     void setHasFailure() {
       this.hasFailure = true;
     }
+
+    void setHasSuccess() {
+      this.hasSuccess = true;
+    }
   }
 
   /** A node that can be the sources of a block move */
@@ -965,6 +971,18 @@ public class Dispatcher {
   }
 
   /**
+   * @return true if some moves are success.
+   */
+  public static boolean checkForSuccess(
+      Iterable<? extends StorageGroup> targets) {
+    boolean hasSuccess = false;
+    for (StorageGroup t : targets) {
+      hasSuccess |= t.getDDatanode().hasSuccess;
+    }
+    return hasSuccess;
+  }
+
+  /**
    * Decide if the block is a good candidate to be moved from source to target.
    * A block is a good candidate if
    * 1. the block is not in the process of being moved/has not been moved;

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9ef03a4c/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/mover/Mover.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/mover/Mover.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/mover/Mover.java
index 344b9fc..afacebb 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/mover/Mover.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/mover/Mover.java
@@ -269,10 +269,14 @@ public class Mover {
       // wait for pending move to finish and retry the failed migration
       boolean hasFailed = Dispatcher.waitForMoveCompletion(storages.targets
           .values());
-      if (hasFailed) {
+      boolean hasSuccess = Dispatcher.checkForSuccess(storages.targets
+          .values());
+      if (hasFailed && !hasSuccess) {
         if (retryCount.get() == retryMaxAttempts) {
-          throw new IOException("Failed to move some block's after "
+          result.setRetryFailed();
+          LOG.error("Failed to move some block's after "
               + retryMaxAttempts + " retries.");
+          return result;
         } else {
           retryCount.incrementAndGet();
         }
@@ -713,10 +717,12 @@ public class Mover {
 
     private boolean hasRemaining;
     private boolean noBlockMoved;
+    private boolean retryFailed;
 
     Result() {
       hasRemaining = false;
       noBlockMoved = true;
+      retryFailed = false;
     }
 
     boolean isHasRemaining() {
@@ -735,16 +741,25 @@ public class Mover {
       this.noBlockMoved = noBlockMoved;
     }
 
+    void setRetryFailed() {
+      this.retryFailed = true;
+    }
+
     /**
-     * @return SUCCESS if all moves are success and there is no remaining move.
+     * @return NO_MOVE_PROGRESS if no progress in move after some retry. Return
+     *         SUCCESS if all moves are success and there is no remaining move.
      *         Return NO_MOVE_BLOCK if there moves available but all the moves
      *         cannot be scheduled. Otherwise, return IN_PROGRESS since there
      *         must be some remaining moves.
      */
     ExitStatus getExitStatus() {
-      return !isHasRemaining() ? ExitStatus.SUCCESS
-          : isNoBlockMoved() ? ExitStatus.NO_MOVE_BLOCK
-              : ExitStatus.IN_PROGRESS;
+      if (retryFailed) {
+        return ExitStatus.NO_MOVE_PROGRESS;
+      } else {
+        return !isHasRemaining() ? ExitStatus.SUCCESS
+            : isNoBlockMoved() ? ExitStatus.NO_MOVE_BLOCK
+                : ExitStatus.IN_PROGRESS;
+      }
     }
 
   }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9ef03a4c/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestMover.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestMover.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestMover.java
index 899b5c0..d3d814c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestMover.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestMover.java
@@ -404,7 +404,7 @@ public class TestMover {
       int rc = ToolRunner.run(conf, new Mover.Cli(),
           new String[] {"-p", file.toString()});
       Assert.assertEquals("Movement should fail after some retry",
-          ExitStatus.IO_EXCEPTION.getExitCode(), rc);
+          ExitStatus.NO_MOVE_PROGRESS.getExitCode(), rc);
     } finally {
       cluster.shutdown();
     }