You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by in...@apache.org on 2019/06/06 17:20:50 UTC

[hadoop] branch trunk updated: HDFS-14527. Stop all DataNodes may result in NN terminate. Contributed by He Xiaoqiao.

This is an automated email from the ASF dual-hosted git repository.

inigoiri pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/trunk by this push:
     new 944adc6  HDFS-14527. Stop all DataNodes may result in NN terminate. Contributed by He Xiaoqiao.
944adc6 is described below

commit 944adc61b1830388d520d4052fc7eb6c7ba2790d
Author: Inigo Goiri <in...@apache.org>
AuthorDate: Thu Jun 6 10:20:28 2019 -0700

    HDFS-14527. Stop all DataNodes may result in NN terminate. Contributed by He Xiaoqiao.
---
 .../BlockPlacementPolicyDefault.java               |   4 +-
 .../BlockPlacementPolicyRackFaultTolerant.java     |   4 +-
 .../blockmanagement/TestRedundancyMonitor.java     | 108 +++++++++++++++++++++
 3 files changed, 114 insertions(+), 2 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java
index 6fed8a1..1320c80 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java
@@ -348,7 +348,9 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
     }
     // No calculation needed when there is only one rack or picking one node.
     int numOfRacks = clusterMap.getNumOfRacks();
-    if (numOfRacks == 1 || totalNumOfReplicas <= 1) {
+    // HDFS-14527 return default when numOfRacks = 0 to avoid
+    // ArithmeticException when calc maxNodesPerRack at following logic.
+    if (numOfRacks <= 1 || totalNumOfReplicas <= 1) {
       return new int[] {numOfReplicas, totalNumOfReplicas};
     }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyRackFaultTolerant.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyRackFaultTolerant.java
index 95c5c88..b204450 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyRackFaultTolerant.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyRackFaultTolerant.java
@@ -43,7 +43,9 @@ public class BlockPlacementPolicyRackFaultTolerant extends BlockPlacementPolicyD
     }
     // No calculation needed when there is only one rack or picking one node.
     int numOfRacks = clusterMap.getNumOfRacks();
-    if (numOfRacks == 1 || totalNumOfReplicas <= 1) {
+    // HDFS-14527 return default when numOfRacks = 0 to avoid
+    // ArithmeticException when calc maxNodesPerRack at following logic.
+    if (numOfRacks <= 1 || totalNumOfReplicas <= 1) {
       return new int[] {numOfReplicas, totalNumOfReplicas};
     }
     // If more racks than replicas, put one replica per rack.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestRedundancyMonitor.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestRedundancyMonitor.java
new file mode 100644
index 0000000..0667e26
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestRedundancyMonitor.java
@@ -0,0 +1,108 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.blockmanagement;
+
+import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.TestBlockStoragePolicy;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.net.NetworkTopology;
+import org.apache.hadoop.test.GenericTestUtils.DelayAnswer;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.Set;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+
+import static org.apache.hadoop.fs.contract.hdfs.HDFSContract.BLOCK_SIZE;
+import static org.mockito.Mockito.doAnswer;
+import static org.mockito.Mockito.spy;
+
+/**
+ * This class tests RedundancyMonitor in BlockManager.
+ */
+public class TestRedundancyMonitor {
+  private static final String FILENAME = "/dummyfile.txt";
+
+  /**
+   * RedundancyMonitor invoke choose target out of global lock when
+   * #computeDatanodeWork. However it may result in NN terminate when choose
+   * target meet runtime exception(ArithmeticException) since we stop all
+   * DataNodes during that time.
+   * Verify that NN should not terminate even stop all datanodes.
+   */
+  @Test
+  public void testChooseTargetWhenAllDataNodesStop() throws Throwable {
+
+    HdfsConfiguration conf = new HdfsConfiguration();
+    String[] hosts = new String[]{"host1", "host2"};
+    String[] racks = new String[]{"/d1/r1", "/d1/r1"};
+    try (MiniDFSCluster miniCluster = new MiniDFSCluster.Builder(conf)
+        .racks(racks).hosts(hosts).numDataNodes(hosts.length).build()) {
+      miniCluster.waitActive();
+
+      FSNamesystem fsn = miniCluster.getNamesystem();
+      BlockManager blockManager = fsn.getBlockManager();
+
+      BlockPlacementPolicyDefault replicator
+          = (BlockPlacementPolicyDefault) blockManager
+              .getBlockPlacementPolicy();
+      Set<DatanodeDescriptor> dns = blockManager.getDatanodeManager()
+          .getDatanodes();
+
+      DelayAnswer delayer = new DelayAnswer(BlockPlacementPolicyDefault.LOG);
+      NetworkTopology clusterMap = replicator.clusterMap;
+      NetworkTopology spyClusterMap = spy(clusterMap);
+      replicator.clusterMap = spyClusterMap;
+      doAnswer(delayer).when(spyClusterMap).getNumOfRacks();
+
+      ExecutorService pool = Executors.newFixedThreadPool(2);
+
+      // Trigger chooseTarget
+      Future<Void> chooseTargetFuture = pool.submit(() -> {
+        replicator.chooseTarget(FILENAME, 2, dns.iterator().next(),
+            new ArrayList<DatanodeStorageInfo>(), false, null, BLOCK_SIZE,
+            TestBlockStoragePolicy.DEFAULT_STORAGE_POLICY, null);
+        return null;
+      });
+
+      // Wait until chooseTarget calls NetworkTopology#getNumOfRacks
+      delayer.waitForCall();
+      // Remove all DataNodes
+      Future<Void> stopDatanodesFuture = pool.submit(() -> {
+        for (DatanodeDescriptor dn : dns) {
+          spyClusterMap.remove(dn);
+        }
+        return null;
+      });
+      // Wait stopDatanodesFuture run finish
+      stopDatanodesFuture.get();
+
+      // Allow chooseTarget to proceed
+      delayer.proceed();
+      try {
+        chooseTargetFuture.get();
+      } catch (ExecutionException ee) {
+        throw ee.getCause();
+      }
+    }
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org