You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by jb...@apache.org on 2020/12/02 20:53:33 UTC

[hadoop] branch branch-3.2 updated: HDFS-15695. NN should not let the balancer run in safemode (#2489). Contributed by Daryn Sharp and Ahmed Hussein

This is an automated email from the ASF dual-hosted git repository.

jbrennan pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
     new 6cc0824  HDFS-15695. NN should not let the balancer run in safemode (#2489). Contributed by Daryn Sharp and Ahmed Hussein
6cc0824 is described below

commit 6cc08240722d181972df35f4b551fe422ed22f18
Author: Ahmed Hussein <50...@users.noreply.github.com>
AuthorDate: Wed Dec 2 13:59:00 2020 -0600

    HDFS-15695. NN should not let the balancer run in safemode (#2489). Contributed by Daryn Sharp and Ahmed Hussein
    
    (cherry picked from commit 2b5b556dd7f17f095548b4a4485e7b7218af74be)
---
 .../hdfs/server/namenode/NameNodeRpcServer.java    |   1 +
 .../java/org/apache/hadoop/hdfs/TestGetBlocks.java | 208 +++++++++++----------
 2 files changed, 109 insertions(+), 100 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
index 3a8cc40..aea48ce 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
@@ -660,6 +660,7 @@ public class NameNodeRpcServer implements NamenodeProtocols {
     }
     checkNNStartup();
     namesystem.checkSuperuserPrivilege();
+    namesystem.checkNameNodeSafeMode("Cannot execute getBlocks");
     return namesystem.getBlocks(datanode, size, minBlockSize);
   }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestGetBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestGetBlocks.java
index e3a1763..e82b990 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestGetBlocks.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestGetBlocks.java
@@ -19,8 +19,8 @@ package org.apache.hadoop.hdfs;
 
 import static org.junit.Assert.*;
 
-import java.io.IOException;
 import java.net.InetSocketAddress;
+import java.util.Collection;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
@@ -28,37 +28,50 @@ import java.util.Map;
 import java.util.Random;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.BlockLocation;
 import org.apache.hadoop.fs.CommonConfigurationKeys;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.hdfs.client.HdfsDataInputStream;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
+import org.apache.hadoop.hdfs.server.blockmanagement.TestBlockManager;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
 import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations.BlockWithLocations;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
 import org.apache.hadoop.ipc.RemoteException;
+import org.apache.hadoop.test.LambdaTestUtils;
+
 import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * This class tests if getblocks request works correctly.
  */
 public class TestGetBlocks {
-  private static final int blockSize = 8192;
-  private static final String racks[] = new String[] { "/d1/r1", "/d1/r1",
-      "/d1/r2", "/d1/r2", "/d1/r2", "/d2/r3", "/d2/r3" };
-  private static final int numDatanodes = racks.length;
+  private static final Logger LOG =
+      LoggerFactory.getLogger(TestBlockManager.class);
+
+  private static final int BLOCK_SIZE = 8192;
+  private static final String[] RACKS = new String[]{"/d1/r1", "/d1/r1",
+      "/d1/r2", "/d1/r2", "/d1/r2", "/d2/r3", "/d2/r3"};
+  private static final int NUM_DATA_NODES = RACKS.length;
 
   /**
    * Stop the heartbeat of a datanode in the MiniDFSCluster
@@ -96,7 +109,7 @@ public class TestGetBlocks {
     conf.setLong(DFSConfigKeys.DFS_NAMENODE_STALE_DATANODE_INTERVAL_KEY,
         staleInterval);
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
-        .numDataNodes(numDatanodes).racks(racks).build();
+        .numDataNodes(NUM_DATA_NODES).racks(RACKS).build();
 
     cluster.waitActive();
     InetSocketAddress addr = new InetSocketAddress("localhost",
@@ -105,7 +118,7 @@ public class TestGetBlocks {
     List<DatanodeDescriptor> nodeInfoList = cluster.getNameNode()
         .getNamesystem().getBlockManager().getDatanodeManager()
         .getDatanodeListForReport(DatanodeReportType.LIVE);
-    assertEquals("Unexpected number of datanodes", numDatanodes,
+    assertEquals("Unexpected number of datanodes", NUM_DATA_NODES,
         nodeInfoList.size());
     FileSystem fileSys = cluster.getFileSystem();
     FSDataOutputStream stm = null;
@@ -116,14 +129,14 @@ public class TestGetBlocks {
       stm = fileSys.create(fileName, true,
           fileSys.getConf().getInt(
               CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_KEY, 4096),
-          (short) 3, blockSize);
-      stm.write(new byte[(blockSize * 3) / 2]);
+          (short) 3, BLOCK_SIZE);
+      stm.write(new byte[(BLOCK_SIZE * 3) / 2]);
       // We do not close the stream so that
       // the writing seems to be still ongoing
       stm.hflush();
 
       LocatedBlocks blocks = client.getNamenode().getBlockLocations(
-          fileName.toString(), 0, blockSize);
+          fileName.toString(), 0, BLOCK_SIZE);
       DatanodeInfo[] nodes = blocks.get(0).getLocations();
       assertEquals(nodes.length, 3);
       DataNode staleNode = null;
@@ -139,7 +152,7 @@ public class TestGetBlocks {
           -(staleInterval + 1));
 
       LocatedBlocks blocksAfterStale = client.getNamenode().getBlockLocations(
-          fileName.toString(), 0, blockSize);
+          fileName.toString(), 0, BLOCK_SIZE);
       DatanodeInfo[] nodesAfterStale = blocksAfterStale.get(0).getLocations();
       assertEquals(nodesAfterStale.length, 3);
       assertEquals(nodesAfterStale[2].getHostName(), nodes[0].getHostName());
@@ -175,133 +188,128 @@ public class TestGetBlocks {
     }
   }
 
-  /** test getBlocks */
+  /**
+   * Test getBlocks.
+   */
   @Test
   public void testGetBlocks() throws Exception {
-    final Configuration CONF = new HdfsConfiguration();
-
-    final short REPLICATION_FACTOR = (short) 2;
-    final int DEFAULT_BLOCK_SIZE = 1024;
+    DistributedFileSystem fs = null;
+    Path testFile = null;
+    BlockWithLocations[] locs;
+    final int blkSize = 1024;
+    final String filePath = "/tmp.txt";
+    final int blkLocsSize = 13;
+    long fileLen = 12 * blkSize + 1;
+    final short replicationFactor = (short) 2;
+    final Configuration config = new HdfsConfiguration();
+
+    // set configurations
+    config.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blkSize);
+    config.setLong(DFSConfigKeys.DFS_BALANCER_GETBLOCKS_MIN_BLOCK_SIZE_KEY,
+        blkSize);
+
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(config)
+        .numDataNodes(replicationFactor)
+        .storagesPerDatanode(4)
+        .build();
 
-    CONF.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, DEFAULT_BLOCK_SIZE);
-    CONF.setLong(DFSConfigKeys.DFS_BALANCER_GETBLOCKS_MIN_BLOCK_SIZE_KEY,
-      DEFAULT_BLOCK_SIZE);
-
-    MiniDFSCluster cluster = new MiniDFSCluster.Builder(CONF)
-              .numDataNodes(REPLICATION_FACTOR)
-              .storagesPerDatanode(4)
-              .build();
     try {
       cluster.waitActive();
       // the third block will not be visible to getBlocks
-      long fileLen = 12 * DEFAULT_BLOCK_SIZE + 1;
-      DFSTestUtil.createFile(cluster.getFileSystem(), new Path("/tmp.txt"),
-          fileLen, REPLICATION_FACTOR, 0L);
+      testFile = new Path(filePath);
+      DFSTestUtil.createFile(cluster.getFileSystem(), testFile,
+          fileLen, replicationFactor, 0L);
 
       // get blocks & data nodes
-      List<LocatedBlock> locatedBlocks;
-      DatanodeInfo[] dataNodes = null;
-      boolean notWritten;
-      final DFSClient dfsclient = new DFSClient(
-          DFSUtilClient.getNNAddress(CONF), CONF);
-      do {
-        locatedBlocks = dfsclient.getNamenode()
-            .getBlockLocations("/tmp.txt", 0, fileLen).getLocatedBlocks();
-        assertEquals(13, locatedBlocks.size());
-        notWritten = false;
-        for (int i = 0; i < 2; i++) {
-          dataNodes = locatedBlocks.get(i).getLocations();
-          if (dataNodes.length != REPLICATION_FACTOR) {
-            notWritten = true;
-            try {
-              Thread.sleep(10);
-            } catch (InterruptedException e) {
-            }
-            break;
-          }
-        }
-      } while (notWritten);
-      dfsclient.close();
-
+      fs = cluster.getFileSystem();
+      DFSTestUtil.waitForReplication(fs, testFile, replicationFactor, 60000);
+      RemoteIterator<LocatedFileStatus> it = fs.listLocatedStatus(testFile);
+      LocatedFileStatus stat = it.next();
+      BlockLocation[] blockLocations = stat.getBlockLocations();
+      assertEquals(blkLocsSize, blockLocations.length);
+      HdfsDataInputStream dis = (HdfsDataInputStream) fs.open(testFile);
+      Collection<LocatedBlock> dinfo = dis.getAllBlocks();
+      dis.close();
+      DatanodeInfo[] dataNodes = dinfo.iterator().next().getLocations();
       // get RPC client to namenode
       InetSocketAddress addr = new InetSocketAddress("localhost",
           cluster.getNameNodePort());
-      NamenodeProtocol namenode = NameNodeProxies.createProxy(CONF,
+      NamenodeProtocol namenode = NameNodeProxies.createProxy(config,
           DFSUtilClient.getNNUri(addr), NamenodeProtocol.class).getProxy();
 
-      // get blocks of size fileLen from dataNodes[0], with minBlockSize as
-      // fileLen
-      BlockWithLocations[] locs;
-
       // Should return all 13 blocks, as minBlockSize is not passed
-      locs = namenode.getBlocks(dataNodes[0], fileLen, 0)
-          .getBlocks();
-      assertEquals(13, locs.length);
-      assertEquals(locs[0].getStorageIDs().length, 2);
-      assertEquals(locs[1].getStorageIDs().length, 2);
-
-      // Should return 12 blocks, as minBlockSize is DEFAULT_BLOCK_SIZE
-      locs = namenode.getBlocks(dataNodes[0], fileLen, DEFAULT_BLOCK_SIZE)
-          .getBlocks();
-      assertEquals(12, locs.length);
-      assertEquals(locs[0].getStorageIDs().length, 2);
-      assertEquals(locs[1].getStorageIDs().length, 2);
+      locs = namenode.getBlocks(dataNodes[0], fileLen, 0).getBlocks();
+      assertEquals(blkLocsSize, locs.length);
+
+      assertEquals(locs[0].getStorageIDs().length, replicationFactor);
+      assertEquals(locs[1].getStorageIDs().length, replicationFactor);
+
+      // Should return 12 blocks, as minBlockSize is blkSize
+      locs = namenode.getBlocks(dataNodes[0], fileLen, blkSize).getBlocks();
+      assertEquals(blkLocsSize - 1, locs.length);
+      assertEquals(locs[0].getStorageIDs().length, replicationFactor);
+      assertEquals(locs[1].getStorageIDs().length, replicationFactor);
 
       // get blocks of size BlockSize from dataNodes[0]
-      locs = namenode.getBlocks(dataNodes[0], DEFAULT_BLOCK_SIZE,
-          DEFAULT_BLOCK_SIZE).getBlocks();
+      locs = namenode.getBlocks(dataNodes[0], blkSize,
+          blkSize).getBlocks();
       assertEquals(locs.length, 1);
-      assertEquals(locs[0].getStorageIDs().length, 2);
+      assertEquals(locs[0].getStorageIDs().length, replicationFactor);
 
       // get blocks of size 1 from dataNodes[0]
       locs = namenode.getBlocks(dataNodes[0], 1, 1).getBlocks();
       assertEquals(locs.length, 1);
-      assertEquals(locs[0].getStorageIDs().length, 2);
+      assertEquals(locs[0].getStorageIDs().length, replicationFactor);
 
       // get blocks of size 0 from dataNodes[0]
-      getBlocksWithException(namenode, dataNodes[0], 0, 0);
+      getBlocksWithException(namenode, dataNodes[0], 0, 0,
+          RemoteException.class, "IllegalArgumentException");
 
       // get blocks of size -1 from dataNodes[0]
-      getBlocksWithException(namenode, dataNodes[0], -1, 0);
+      getBlocksWithException(namenode, dataNodes[0], -1, 0,
+          RemoteException.class, "IllegalArgumentException");
 
       // minBlockSize is -1
-      getBlocksWithException(namenode, dataNodes[0], DEFAULT_BLOCK_SIZE, -1);
+      getBlocksWithException(namenode, dataNodes[0], blkSize, -1,
+          RemoteException.class, "IllegalArgumentException");
 
       // get blocks of size BlockSize from a non-existent datanode
       DatanodeInfo info = DFSTestUtil.getDatanodeInfo("1.2.3.4");
-      getBlocksWithIncorrectDatanodeException(namenode, info, 2, 0);
-
+      getBlocksWithException(namenode, info, replicationFactor, 0,
+          RemoteException.class, "HadoopIllegalArgumentException");
 
       testBlockIterator(cluster);
-    } finally {
+
+      // Namenode should refuse to provide block locations to the balancer
+      // while in safemode.
+      locs = namenode.getBlocks(dataNodes[0], fileLen, 0).getBlocks();
+      assertEquals(blkLocsSize, locs.length);
+      assertFalse(fs.isInSafeMode());
+      LOG.info("Entering safe mode");
+      fs.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
+      LOG.info("Entered safe mode");
+      assertTrue(fs.isInSafeMode());
+      getBlocksWithException(namenode, info, replicationFactor, 0,
+          RemoteException.class,
+          "Cannot execute getBlocks. Name node is in safe mode.");
+      fs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
+      assertFalse(fs.isInSafeMode());
+    }  finally {
+      if (fs != null) {
+        fs.delete(testFile, true);
+        fs.close();
+      }
       cluster.shutdown();
     }
   }
 
   private void getBlocksWithException(NamenodeProtocol namenode,
-      DatanodeInfo datanode, long size, long minBlockSize) throws IOException {
-    boolean getException = false;
-    try {
-      namenode.getBlocks(datanode, size, minBlockSize);
-    } catch (RemoteException e) {
-      getException = true;
-      assertTrue(e.getClassName().contains("IllegalArgumentException"));
-    }
-    assertTrue(getException);
-  }
+      DatanodeInfo datanode, long size, long minBlkSize, Class exClass,
+      String msg) throws Exception {
 
-  private void getBlocksWithIncorrectDatanodeException(
-      NamenodeProtocol namenode, DatanodeInfo datanode, long size,
-      long minBlockSize)
-      throws IOException {
-    boolean getException = false;
-    try {
-      namenode.getBlocks(datanode, size, minBlockSize);
-    } catch (RemoteException e) {
-      getException = true;
-      assertTrue(e.getClassName().contains("HadoopIllegalArgumentException"));
-    }
-    assertTrue(getException);
+    // Namenode should refuse should fail
+    LambdaTestUtils.intercept(exClass,
+        msg, () -> namenode.getBlocks(datanode, size, minBlkSize));
   }
 
   /**


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org