You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by zj...@apache.org on 2015/05/12 22:49:39 UTC

[22/36] hadoop git commit: HDFS-7916. 'reportBadBlocks' from datanodes to standby Node BPServiceActor goes for infinite loop. Contributed by Rushabh Shah.

HDFS-7916. 'reportBadBlocks' from datanodes to standby Node BPServiceActor goes for infinite loop. Contributed by Rushabh Shah.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/d7597a2d
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/d7597a2d
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/d7597a2d

Branch: refs/heads/YARN-2928
Commit: d7597a2d2138530b94c847236e582dbfd69d8078
Parents: ed685f1
Author: Kihwal Lee <ki...@apache.org>
Authored: Mon May 11 14:30:35 2015 -0500
Committer: Zhijie Shen <zj...@apache.org>
Committed: Tue May 12 13:24:14 2015 -0700

----------------------------------------------------------------------
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt     |  3 ++
 .../hdfs/server/datanode/ErrorReportAction.java |  4 ++
 .../server/datanode/ReportBadBlockAction.java   |  4 ++
 .../server/datanode/TestBPOfferService.java     | 54 ++++++++++++++++++++
 4 files changed, 65 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/d7597a2d/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 8060644..b67caed 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -814,6 +814,9 @@ Release 2.7.1 - UNRELEASED
     HDFS-8254. Standby namenode doesn't process DELETED_BLOCK if the add block
     request is in edit log. (Rushabh S Shah via kihwal)
 
+    HDFS-7916. 'reportBadBlocks' from datanodes to standby Node BPServiceActor
+    goes for infinite loop (Rushabh S Shah  via kihwal)
+
 Release 2.7.0 - 2015-04-20
 
   INCOMPATIBLE CHANGES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/d7597a2d/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ErrorReportAction.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ErrorReportAction.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ErrorReportAction.java
index 331822a..b7a9dae 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ErrorReportAction.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ErrorReportAction.java
@@ -22,6 +22,7 @@ import java.io.IOException;
 
 import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
+import org.apache.hadoop.ipc.RemoteException;
 
 
 /**
@@ -43,6 +44,9 @@ public class ErrorReportAction implements BPServiceActorAction {
     DatanodeRegistration bpRegistration) throws BPServiceActorActionException {
     try {
       bpNamenode.errorReport(bpRegistration, errorCode, errorMessage);
+    } catch (RemoteException re) {
+      DataNode.LOG.info("trySendErrorReport encountered RemoteException  "
+          + "errorMessage: " + errorMessage + "  errorCode: " + errorCode, re);
     } catch(IOException e) {
       throw new BPServiceActorActionException("Error reporting "
           + "an error to namenode: ");

http://git-wip-us.apache.org/repos/asf/hadoop/blob/d7597a2d/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReportBadBlockAction.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReportBadBlockAction.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReportBadBlockAction.java
index 7155eae..671a1fe 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReportBadBlockAction.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReportBadBlockAction.java
@@ -26,6 +26,7 @@ import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
+import org.apache.hadoop.ipc.RemoteException;
 
 /**
  * ReportBadBlockAction is an instruction issued by {{BPOfferService}} to
@@ -59,6 +60,9 @@ public class ReportBadBlockAction implements BPServiceActorAction {
 
     try {
       bpNamenode.reportBadBlocks(locatedBlock);
+    } catch (RemoteException re) {
+      DataNode.LOG.info("reportBadBlock encountered RemoteException for "
+          + "block:  " + block , re);
     } catch (IOException e) {
       throw new BPServiceActorActionException("Failed to report bad block "
           + block + " to namenode: ");

http://git-wip-us.apache.org/repos/asf/hadoop/blob/d7597a2d/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java
index 3aa9a7b..64cc78b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java
@@ -55,6 +55,9 @@ import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport;
 import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks;
 import org.apache.hadoop.hdfs.server.protocol.StorageReport;
 import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary;
+import org.apache.hadoop.ipc.RemoteException;
+import org.apache.hadoop.ipc.StandbyException;
+import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RpcResponseHeaderProto.RpcErrorCodeProto;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.test.PathUtils;
 import org.apache.hadoop.util.Time;
@@ -621,4 +624,55 @@ public class TestBPOfferService {
       bpos.stop();
     }
   } 
+
+  /**
+   * This test case doesn't add the reportBadBlock request to
+   * {@link BPServiceActor#bpThreadEnqueue} when the Standby namenode throws
+   * {@link StandbyException}
+   * @throws Exception
+   */
+ @Test
+  public void testReportBadBlocksWhenNNThrowsStandbyException()
+      throws Exception {
+    BPOfferService bpos = setupBPOSForNNs(mockNN1, mockNN2);
+    bpos.start();
+    try {
+      waitForInitialization(bpos);
+      // Should start with neither NN as active.
+      assertNull(bpos.getActiveNN());
+      // Have NN1 claim active at txid 1
+      mockHaStatuses[0] = new NNHAStatusHeartbeat(HAServiceState.ACTIVE, 1);
+      bpos.triggerHeartbeatForTests();
+      // Now mockNN1 is acting like active namenode and mockNN2 as Standby
+      assertSame(mockNN1, bpos.getActiveNN());
+      // Return nothing when active Active Namenode calls reportBadBlocks
+      Mockito.doNothing().when(mockNN1).reportBadBlocks
+          (Mockito.any(LocatedBlock[].class));
+
+      RemoteException re = new RemoteException(StandbyException.class.
+          getName(), "Operation category WRITE is not supported in state "
+          + "standby", RpcErrorCodeProto.ERROR_APPLICATION);
+      // Return StandbyException wrapped in RemoteException when Standby NN
+      // calls reportBadBlocks
+      Mockito.doThrow(re).when(mockNN2).reportBadBlocks
+          (Mockito.any(LocatedBlock[].class));
+
+      bpos.reportBadBlocks(FAKE_BLOCK, mockFSDataset.getVolume(FAKE_BLOCK)
+          .getStorageID(), mockFSDataset.getVolume(FAKE_BLOCK)
+          .getStorageType());
+      // Send heartbeat so that the BpServiceActor can report bad block to
+      // namenode
+      bpos.triggerHeartbeatForTests();
+      Mockito.verify(mockNN2, Mockito.times(1))
+      .reportBadBlocks(Mockito.any(LocatedBlock[].class));
+
+      // Trigger another heartbeat, this will send reportBadBlock again if it
+      // is present in the queue.
+      bpos.triggerHeartbeatForTests();
+      Mockito.verify(mockNN2, Mockito.times(1))
+          .reportBadBlocks(Mockito.any(LocatedBlock[].class));
+    } finally {
+      bpos.stop();
+    }
+  }
 }