You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by xg...@apache.org on 2017/06/05 21:04:18 UTC
[09/50] [abbrv] hadoop git commit: HDFS-11446.
TestMaintenanceState#testWithNNAndDNRestart fails intermittently. Contributed
by Yiqun Lin.
HDFS-11446. TestMaintenanceState#testWithNNAndDNRestart fails intermittently. Contributed by Yiqun Lin.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/31058b24
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/31058b24
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/31058b24
Branch: refs/heads/YARN-5734
Commit: 31058b243e9575d90f038bb2fdf5a556710f6f7f
Parents: 89bb8bf
Author: Yiqun Lin <yq...@apache.org>
Authored: Sun May 28 11:23:32 2017 +0800
Committer: Yiqun Lin <yq...@apache.org>
Committed: Sun May 28 11:23:32 2017 +0800
----------------------------------------------------------------------
.../hadoop/hdfs/TestMaintenanceState.java | 128 ++++++++++---------
1 file changed, 66 insertions(+), 62 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/31058b24/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMaintenanceState.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMaintenanceState.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMaintenanceState.java
index a37bdb8..b49fba0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMaintenanceState.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMaintenanceState.java
@@ -18,7 +18,6 @@
package org.apache.hadoop.hdfs;
import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
@@ -30,12 +29,7 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
-import com.google.common.collect.Lists;
import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
-import org.junit.Assert;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.client.HdfsDataInputStream;
@@ -48,8 +42,16 @@ import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
+import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.util.Time;
+import org.junit.Assert;
import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Supplier;
+import com.google.common.collect.Lists;
/**
* This class tests node maintenance.
@@ -125,8 +127,8 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
// When node is in ENTERING_MAINTENANCE state, it can still serve read
// requests
- assertNull(checkWithRetry(ns, fileSys, file, replicas, null,
- nodeOutofService));
+ checkWithRetry(ns, fileSys, file, replicas, null,
+ nodeOutofService);
putNodeInService(0, nodeOutofService.getDatanodeUuid());
@@ -387,8 +389,8 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
// The block should be replicated to another datanode to meet
// expected replication count.
- assertNull(checkWithRetry(ns, fileSys, file, expectedReplicasInRead,
- nodeOutofService));
+ checkWithRetry(ns, fileSys, file, expectedReplicasInRead,
+ nodeOutofService);
cleanupFile(fileSys, file);
teardown();
@@ -548,19 +550,19 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
client.datanodeReport(DatanodeReportType.LIVE).length);
// test 1, verify the replica in IN_MAINTENANCE state isn't in LocatedBlock
- assertNull(checkWithRetry(ns, fileSys, file, replicas - 1,
- nodeOutofService));
+ checkWithRetry(ns, fileSys, file, replicas - 1,
+ nodeOutofService);
takeNodeOutofService(0, nodeOutofService.getDatanodeUuid(), 0, null,
AdminStates.DECOMMISSIONED);
// test 2 after decommission has completed, the replication count is
// replicas + 1 which includes the decommissioned node.
- assertNull(checkWithRetry(ns, fileSys, file, replicas + 1, null));
+ checkWithRetry(ns, fileSys, file, replicas + 1, null);
// test 3, put the node in service, replication count should restore.
putNodeInService(0, nodeOutofService.getDatanodeUuid());
- assertNull(checkWithRetry(ns, fileSys, file, replicas, null));
+ checkWithRetry(ns, fileSys, file, replicas, null);
cleanupFile(fileSys, file);
}
@@ -587,8 +589,8 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
takeNodeOutofService(0, nodeOutofService.getDatanodeUuid(), Long.MAX_VALUE,
null, AdminStates.IN_MAINTENANCE);
- assertNull(checkWithRetry(ns, fileSys, file, replicas - 1,
- nodeOutofService));
+ checkWithRetry(ns, fileSys, file, replicas - 1,
+ nodeOutofService);
cleanupFile(fileSys, file);
}
@@ -631,10 +633,10 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
takeNodeOutofService(0, decommissionDNUuid, 0, null, maintenanceNodes,
AdminStates.DECOMMISSIONED);
// Out of the replicas returned, one is the decommissioned node.
- assertNull(checkWithRetry(ns, fileSys, file, repl, maintenanceDN));
+ checkWithRetry(ns, fileSys, file, repl, maintenanceDN);
putNodeInService(0, maintenanceDN);
- assertNull(checkWithRetry(ns, fileSys, file, repl + 1, null));
+ checkWithRetry(ns, fileSys, file, repl + 1, null);
cleanupFile(fileSys, file);
teardown();
@@ -663,7 +665,7 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
AdminStates.IN_MAINTENANCE);
// Verify file replication matches maintenance state min replication
- assertNull(checkWithRetry(ns, fileSys, file, 1, null, nodes[0]));
+ checkWithRetry(ns, fileSys, file, 1, null, nodes[0]);
// Put the maintenance nodes back in service
for (DatanodeInfo datanodeInfo : maintenanceDN) {
@@ -671,7 +673,7 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
}
// Verify file replication catching up to the old state
- assertNull(checkWithRetry(ns, fileSys, file, repl, null));
+ checkWithRetry(ns, fileSys, file, repl, null);
cleanupFile(fileSys, file);
}
@@ -720,19 +722,19 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
// Verify that the nodeOutofService remains in blocksMap and
// # of live replicas For read operation is expected.
- assertNull(checkWithRetry(ns, fileSys, file, oldFactor - 1,
- nodeOutofService));
+ checkWithRetry(ns, fileSys, file, oldFactor - 1,
+ nodeOutofService);
final DFSClient client = getDfsClient(0);
client.setReplication(file.toString(), (short)newFactor);
// Verify that the nodeOutofService remains in blocksMap and
// # of live replicas for read operation.
- assertNull(checkWithRetry(ns, fileSys, file, expectedLiveReplicas,
- nodeOutofService));
+ checkWithRetry(ns, fileSys, file, expectedLiveReplicas,
+ nodeOutofService);
putNodeInService(0, nodeOutofService.getDatanodeUuid());
- assertNull(checkWithRetry(ns, fileSys, file, newFactor, null));
+ checkWithRetry(ns, fileSys, file, newFactor, null);
cleanupFile(fileSys, file);
teardown();
@@ -765,8 +767,8 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
getFirstBlockFirstReplicaUuid(fileSys, file), Long.MAX_VALUE, null,
AdminStates.IN_MAINTENANCE);
- assertNull(checkWithRetry(ns, fileSys, file, replicas - 1,
- nodeOutofService));
+ checkWithRetry(ns, fileSys, file, replicas - 1,
+ nodeOutofService);
final DFSClient client = getDfsClient(0);
assertEquals("All datanodes must be alive", numDatanodes,
@@ -779,16 +781,16 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
client.datanodeReport(DatanodeReportType.LIVE).length);
// Dead maintenance node's blocks should remain in block map.
- assertNull(checkWithRetry(ns, fileSys, file, replicas - 1,
- nodeOutofService));
+ checkWithRetry(ns, fileSys, file, replicas - 1,
+ nodeOutofService);
// When dead maintenance mode is transitioned to out of maintenance mode,
// its blocks should be removed from block map.
// This will then trigger replication to restore the live replicas back
// to replication factor.
putNodeInService(0, nodeOutofService.getDatanodeUuid());
- assertNull(checkWithRetry(ns, fileSys, file, replicas, nodeOutofService,
- null));
+ checkWithRetry(ns, fileSys, file, replicas, nodeOutofService,
+ null);
cleanupFile(fileSys, file);
}
@@ -821,8 +823,8 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
getFirstBlockFirstReplicaUuid(fileSys, file), Long.MAX_VALUE, null,
AdminStates.IN_MAINTENANCE);
- assertNull(checkWithRetry(ns, fileSys, file, replicas - 1,
- nodeOutofService));
+ checkWithRetry(ns, fileSys, file, replicas - 1,
+ nodeOutofService);
DFSClient client = getDfsClient(0);
assertEquals("All datanodes must be alive", numDatanodes,
@@ -836,23 +838,23 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
client.datanodeReport(DatanodeReportType.LIVE).length);
// Dead maintenance node's blocks should remain in block map.
- assertNull(checkWithRetry(ns, fileSys, file, replicas - 1,
- nodeOutofService));
+ checkWithRetry(ns, fileSys, file, replicas - 1,
+ nodeOutofService);
// restart nn, nn will restore 3 live replicas given it doesn't
// know the maintenance node has the replica.
getCluster().restartNameNode(0);
ns = getCluster().getNamesystem(0);
- assertNull(checkWithRetry(ns, fileSys, file, replicas, null));
+ checkWithRetry(ns, fileSys, file, replicas, null);
// restart dn, nn has 1 maintenance replica and 3 live replicas.
getCluster().restartDataNode(dnProp, true);
getCluster().waitActive();
- assertNull(checkWithRetry(ns, fileSys, file, replicas, nodeOutofService));
+ checkWithRetry(ns, fileSys, file, replicas, nodeOutofService);
// Put the node in service, a redundant replica should be removed.
putNodeInService(0, nodeOutofService.getDatanodeUuid());
- assertNull(checkWithRetry(ns, fileSys, file, replicas, null));
+ checkWithRetry(ns, fileSys, file, replicas, null);
cleanupFile(fileSys, file);
}
@@ -878,12 +880,12 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
writeFile(fileSys, file, replicas, 2);
// Verify nodeOutofService wasn't chosen for write operation.
- assertNull(checkWithRetry(ns, fileSys, file, replicas - 1,
- nodeOutofService, null));
+ checkWithRetry(ns, fileSys, file, replicas - 1,
+ nodeOutofService, null);
// Put the node back to service, live replicas should be restored.
putNodeInService(0, nodeOutofService.getDatanodeUuid());
- assertNull(checkWithRetry(ns, fileSys, file, replicas, null));
+ checkWithRetry(ns, fileSys, file, replicas, null);
cleanupFile(fileSys, file);
}
@@ -934,12 +936,12 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
client.setReplication(file.toString(), (short) 1);
// Verify the nodeOutofService remains in blocksMap.
- assertNull(checkWithRetry(ns, fileSys, file, 1, nodeOutofService));
+ checkWithRetry(ns, fileSys, file, 1, nodeOutofService);
// Restart NN and verify the nodeOutofService remains in blocksMap.
getCluster().restartNameNode(0);
ns = getCluster().getNamesystem(0);
- assertNull(checkWithRetry(ns, fileSys, file, 1, nodeOutofService));
+ checkWithRetry(ns, fileSys, file, 1, nodeOutofService);
cleanupFile(fileSys, file);
}
@@ -1081,30 +1083,32 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
return null;
}
- static String checkWithRetry(FSNamesystem ns, FileSystem fileSys,
- Path name, int repl, DatanodeInfo inMaintenanceNode)
- throws IOException {
- return checkWithRetry(ns, fileSys, name, repl, inMaintenanceNode,
+ static void checkWithRetry(FSNamesystem ns, FileSystem fileSys, Path name,
+ int repl, DatanodeInfo inMaintenanceNode) {
+ checkWithRetry(ns, fileSys, name, repl, inMaintenanceNode,
inMaintenanceNode);
}
- static String checkWithRetry(FSNamesystem ns, FileSystem fileSys,
- Path name, int repl, DatanodeInfo excludedNode,
- DatanodeInfo underMaintenanceNode) throws IOException {
- int tries = 0;
- String output = null;
- while (tries++ < 200) {
- try {
- Thread.sleep(100);
- output = checkFile(ns, fileSys, name, repl, excludedNode,
- underMaintenanceNode);
- if (output == null) {
- break;
+ static void checkWithRetry(final FSNamesystem ns, final FileSystem fileSys,
+ final Path name, final int repl, final DatanodeInfo excludedNode,
+ final DatanodeInfo underMaintenanceNode) {
+ try {
+ GenericTestUtils.waitFor(new Supplier<Boolean>() {
+
+ @Override
+ public Boolean get() {
+ String output = null;
+ try {
+ output = checkFile(ns, fileSys, name, repl, excludedNode,
+ underMaintenanceNode);
+ } catch (Exception ignored) {
+ }
+
+ return (output == null);
}
- } catch (InterruptedException ie) {
- }
+ }, 100, 60000);
+ } catch (Exception ignored) {
}
- return output;
}
static private DatanodeInfo[] getFirstBlockReplicasDatanodeInfos(
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org