You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by na...@apache.org on 2019/06/03 19:29:57 UTC

[hadoop] branch trunk updated: HDDS-1558. IllegalArgumentException while processing container Reports.

This is an automated email from the ASF dual-hosted git repository.

nanda pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/trunk by this push:
     new f327112  HDDS-1558. IllegalArgumentException while processing container Reports.
f327112 is described below

commit f3271126fc9a3ad178b7dadd8edf851e16cf76d0
Author: Shashikant Banerjee <sh...@apache.org>
AuthorDate: Tue Jun 4 00:59:02 2019 +0530

    HDDS-1558. IllegalArgumentException while processing container Reports.
    
    Signed-off-by: Nanda kumar <na...@apache.org>
---
 .../container/common/impl/HddsDispatcher.java      | 15 +++-
 .../ozone/container/common/interfaces/Handler.java |  9 +++
 .../container/keyvalue/KeyValueContainer.java      |  6 +-
 .../ozone/container/keyvalue/KeyValueHandler.java  | 14 ++++
 .../rpc/TestContainerStateMachineFailures.java     | 85 ++++++++++++++++++++++
 5 files changed, 125 insertions(+), 4 deletions(-)

diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java
index 4e8d5b9..6f56b3c 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java
@@ -67,6 +67,7 @@ import io.opentracing.Scope;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.IOException;
 import java.util.Map;
 import java.util.Optional;
 import java.util.Set;
@@ -299,8 +300,18 @@ public class HddsDispatcher implements ContainerDispatcher, Auditor {
         State containerState = container.getContainerData().getState();
         Preconditions.checkState(
             containerState == State.OPEN || containerState == State.CLOSING);
-        container.getContainerData()
-            .setState(ContainerDataProto.State.UNHEALTHY);
+        // mark and persist the container state to be unhealthy
+        try {
+          handler.markContainerUhealthy(container);
+        } catch (IOException ioe) {
+          // just log the error here in case marking the container fails,
+          // Return the actual failure response to the client
+          LOG.error("Failed to mark container " + containerID + " UNHEALTHY. ",
+              ioe);
+        }
+        // in any case, the in memory state of the container should be unhealthy
+        Preconditions.checkArgument(
+            container.getContainerData().getState() == State.UNHEALTHY);
         sendCloseContainerActionIfNeeded(container);
       }
 
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java
index a3bb34b..52d14db 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java
@@ -130,6 +130,15 @@ public abstract class Handler {
       throws IOException;
 
   /**
+   * Marks the container Unhealthy. Moves the container to UHEALTHY state.
+   *
+   * @param container container to update
+   * @throws IOException in case of exception
+   */
+  public abstract void markContainerUhealthy(Container container)
+      throws IOException;
+
+  /**
    * Moves the Container to QUASI_CLOSED state.
    *
    * @param container container to be quasi closed
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java
index 38257c3..6a1ca86 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java
@@ -339,8 +339,10 @@ public class KeyValueContainer implements Container<KeyValueContainerData> {
       updateContainerFile(containerFile);
 
     } catch (StorageContainerException ex) {
-      if (oldState != null) {
-        // Failed to update .container file. Reset the state to CLOSING
+      if (oldState != null
+          && containerData.getState() != ContainerDataProto.State.UNHEALTHY) {
+        // Failed to update .container file. Reset the state to old state only
+        // if the current state is not unhealthy.
         containerData.setState(oldState);
       }
       throw ex;
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
index 531fb02..72f48fa 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
@@ -893,6 +893,14 @@ public class KeyValueHandler extends Handler {
   }
 
   @Override
+  public void markContainerUhealthy(Container container)
+      throws IOException {
+    // this will mark the container unhealthy and a close container action will
+    // be sent from the dispatcher ton SCM to close down this container.
+    container.markContainerUnhealthy();
+  }
+
+  @Override
   public void quasiCloseContainer(Container container)
       throws IOException {
     final State state = container.getContainerState();
@@ -920,6 +928,12 @@ public class KeyValueHandler extends Handler {
     if (state == State.CLOSED) {
       return;
     }
+    if (state == State.UNHEALTHY) {
+      throw new StorageContainerException(
+          "Cannot close container #" + container.getContainerData()
+              .getContainerID() + " while in " + state + " state.",
+          ContainerProtos.Result.CONTAINER_UNHEALTHY);
+    }
     // The container has to be either in CLOSING or in QUASI_CLOSED state.
     if (state != State.CLOSING && state != State.QUASI_CLOSED) {
       ContainerProtos.Result error = state == State.INVALID ?
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachineFailures.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachineFailures.java
index 5739d48..744f687 100644
--- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachineFailures.java
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestContainerStateMachineFailures.java
@@ -24,12 +24,16 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration;
 import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
 import org.apache.hadoop.ozone.MiniOzoneCluster;
+import org.apache.hadoop.ozone.OzoneConsts;
 import org.apache.hadoop.ozone.client.ObjectStore;
 import org.apache.hadoop.ozone.client.OzoneClient;
 import org.apache.hadoop.ozone.client.OzoneClientFactory;
 import org.apache.hadoop.ozone.client.io.KeyOutputStream;
 import org.apache.hadoop.ozone.client.io.OzoneOutputStream;
+import org.apache.hadoop.ozone.container.common.impl.ContainerData;
+import org.apache.hadoop.ozone.container.common.impl.ContainerDataYaml;
 import org.apache.hadoop.ozone.container.common.impl.HddsDispatcher;
+import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData;
 import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer;
 import org.apache.hadoop.ozone.om.helpers.OmKeyArgs;
 import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo;
@@ -49,10 +53,13 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.
     HDDS_COMMAND_STATUS_REPORT_INTERVAL;
 import static org.apache.hadoop.hdds.HddsConfigKeys.
     HDDS_CONTAINER_REPORT_INTERVAL;
+import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State.UNHEALTHY;
 import static org.apache.hadoop.hdds.scm.ScmConfigKeys.
     HDDS_SCM_WATCHER_TIMEOUT;
 import static org.apache.hadoop.hdds.scm.ScmConfigKeys.
     OZONE_SCM_STALENODE_INTERVAL;
+import static org.hamcrest.core.Is.is;
+import static org.junit.Assert.assertThat;
 
 /**
  * Tests the containerStateMachine failure handling.
@@ -185,4 +192,82 @@ public class TestContainerStateMachineFailures {
     Assert.assertEquals(ContainerProtos.Result.CONTAINER_MISSING,
         dispatcher.dispatch(request.build(), null).getResult());
   }
+
+  @Test
+  public void testUnhealthyContainer() throws Exception {
+    OzoneOutputStream key =
+        objectStore.getVolume(volumeName).getBucket(bucketName)
+            .createKey("ratis", 1024, ReplicationType.RATIS,
+                ReplicationFactor.ONE, new HashMap<>());
+    // First write and flush creates a container in the datanode
+    key.write("ratis".getBytes());
+    key.flush();
+    key.write("ratis".getBytes());
+
+    //get the name of a valid container
+    OmKeyArgs keyArgs = new OmKeyArgs.Builder().setVolumeName(volumeName).
+        setBucketName(bucketName).setType(HddsProtos.ReplicationType.RATIS)
+        .setFactor(HddsProtos.ReplicationFactor.ONE).setKeyName("ratis")
+        .build();
+    KeyOutputStream groupOutputStream = (KeyOutputStream) key.getOutputStream();
+    List<OmKeyLocationInfo> locationInfoList =
+        groupOutputStream.getLocationInfoList();
+    Assert.assertEquals(1, locationInfoList.size());
+    OmKeyLocationInfo omKeyLocationInfo = locationInfoList.get(0);
+    ContainerData containerData =
+        cluster.getHddsDatanodes().get(0).getDatanodeStateMachine()
+            .getContainer().getContainerSet()
+            .getContainer(omKeyLocationInfo.getContainerID())
+            .getContainerData();
+    Assert.assertTrue(containerData instanceof KeyValueContainerData);
+    KeyValueContainerData keyValueContainerData =
+        (KeyValueContainerData) containerData;
+    // delete the container db file
+    FileUtil.fullyDelete(new File(keyValueContainerData.getChunksPath()));
+    try {
+      key.close();
+      Assert.fail();
+    } catch (IOException ioe) {
+      Assert.assertTrue(ioe.getMessage().contains(
+          "Requested operation not allowed as ContainerState is UNHEALTHY"));
+    }
+    long containerID = omKeyLocationInfo.getContainerID();
+
+    // Make sure the container is marked unhealthy
+    Assert.assertTrue(
+        cluster.getHddsDatanodes().get(0).getDatanodeStateMachine()
+            .getContainer().getContainerSet().getContainer(containerID)
+            .getContainerState()
+            == ContainerProtos.ContainerDataProto.State.UNHEALTHY);
+    // Check metadata in the .container file
+    File containerFile = new File(keyValueContainerData.getMetadataPath(),
+        containerID + OzoneConsts.CONTAINER_EXTENSION);
+
+    keyValueContainerData = (KeyValueContainerData) ContainerDataYaml
+        .readContainerFile(containerFile);
+    assertThat(keyValueContainerData.getState(), is(UNHEALTHY));
+
+    // restart the hdds datanode and see if the container is listed in the
+    // in the missing container set and not in the regular set
+    cluster.restartHddsDatanode(0, true);
+    // make sure the container state is still marked unhealthy after restart
+    keyValueContainerData = (KeyValueContainerData) ContainerDataYaml
+        .readContainerFile(containerFile);
+    assertThat(keyValueContainerData.getState(), is(UNHEALTHY));
+
+    OzoneContainer ozoneContainer;
+    ozoneContainer = cluster.getHddsDatanodes().get(0).getDatanodeStateMachine()
+        .getContainer();
+    HddsDispatcher dispatcher = (HddsDispatcher) ozoneContainer.getDispatcher();
+    ContainerProtos.ContainerCommandRequestProto.Builder request =
+        ContainerProtos.ContainerCommandRequestProto.newBuilder();
+    request.setCmdType(ContainerProtos.Type.CloseContainer);
+    request.setContainerID(containerID);
+    request.setCloseContainer(
+        ContainerProtos.CloseContainerRequestProto.getDefaultInstance());
+    request.setDatanodeUuid(
+        cluster.getHddsDatanodes().get(0).getDatanodeDetails().getUuidString());
+    Assert.assertEquals(ContainerProtos.Result.CONTAINER_UNHEALTHY,
+        dispatcher.dispatch(request.build(), null).getResult());
+  }
 }
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org