You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by vi...@apache.org on 2017/06/01 23:01:49 UTC

hadoop git commit: HDFS-11673. [READ] Handle failures of Datanode with PROVIDED storage

Repository: hadoop
Updated Branches:
  refs/heads/HDFS-9806 fc467d6bc -> 0e579b4be


HDFS-11673. [READ] Handle failures of Datanode with PROVIDED storage


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/0e579b4b
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/0e579b4b
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/0e579b4b

Branch: refs/heads/HDFS-9806
Commit: 0e579b4be293808d7c1a7ed674f639b39e052b66
Parents: fc467d6
Author: Virajith Jalaparti <vi...@apache.org>
Authored: Thu Jun 1 16:01:31 2017 -0700
Committer: Virajith Jalaparti <vi...@apache.org>
Committed: Thu Jun 1 16:01:31 2017 -0700

----------------------------------------------------------------------
 .../hdfs/server/blockmanagement/BlockInfo.java  | 12 +++-
 .../server/blockmanagement/BlockManager.java    |  5 +-
 .../server/blockmanagement/BlockProvider.java   | 18 +++--
 .../blockmanagement/ProvidedStorageMap.java     | 54 +++++++++++++--
 .../blockmanagement/TestProvidedStorageMap.java | 10 ++-
 .../TestNameNodeProvidedImplementation.java     | 72 +++++++++++++++++++-
 6 files changed, 150 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/0e579b4b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfo.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfo.java
index df9cdc3..3875fcc 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfo.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfo.java
@@ -24,6 +24,7 @@ import java.util.NoSuchElementException;
 
 import com.google.common.base.Preconditions;
 import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.fs.StorageType;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.BlockType;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
@@ -189,8 +190,15 @@ public abstract class BlockInfo extends Block
     int len = getCapacity();
     for(int idx = 0; idx < len; idx++) {
       DatanodeStorageInfo cur = getStorageInfo(idx);
-      if(cur != null && cur.getDatanodeDescriptor() == dn) {
-        return cur;
+      if(cur != null) {
+        if (cur.getStorageType() == StorageType.PROVIDED) {
+          //if block resides on provided storage, only match the storage ids
+          if (dn.getStorageInfo(cur.getStorageID()) != null) {
+            return cur;
+          }
+        } else if (cur.getDatanodeDescriptor() == dn) {
+          return cur;
+        }
       }
     }
     return null;

http://git-wip-us.apache.org/repos/asf/hadoop/blob/0e579b4b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index c5b9c8b..a3e30fe 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -1424,6 +1424,7 @@ public class BlockManager implements BlockStatsMXBean {
    
   /** Remove the blocks associated to the given datanode. */
   void removeBlocksAssociatedTo(final DatanodeDescriptor node) {
+    providedStorageMap.removeDatanode(node);
     for (DatanodeStorageInfo storage : node.getStorageInfos()) {
       final Iterator<BlockInfo> it = storage.getBlockIterator();
       //add the BlockInfos to a new collection as the
@@ -2356,7 +2357,7 @@ public class BlockManager implements BlockStatsMXBean {
       // !#! Register DN with provided storage, not with storage owned by DN
       // !#! DN should still have a ref to the DNStorageInfo
       DatanodeStorageInfo storageInfo =
-          providedStorageMap.getStorage(node, storage);
+          providedStorageMap.getStorage(node, storage, context);
 
       if (storageInfo == null) {
         // We handle this for backwards compatibility.
@@ -2488,7 +2489,7 @@ public class BlockManager implements BlockStatsMXBean {
     }
   }
   
-  private Collection<Block> processReport(
+  Collection<Block> processReport(
       final DatanodeStorageInfo storageInfo,
       final BlockListAsLongs report,
       BlockReportContext context) throws IOException {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/0e579b4b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockProvider.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockProvider.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockProvider.java
index d8bed16..2214868 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockProvider.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockProvider.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hdfs.server.blockmanagement;
 import java.io.IOException;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.server.blockmanagement.ProvidedStorageMap.ProvidedBlockList;
+import org.apache.hadoop.hdfs.server.protocol.BlockReportContext;
 import org.apache.hadoop.hdfs.util.RwLock;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -52,14 +53,23 @@ public abstract class BlockProvider implements Iterable<Block> {
    * start the processing of block report for provided blocks.
    * @throws IOException
    */
-  void start() throws IOException {
+  void start(BlockReportContext context) throws IOException {
     assert lock.hasWriteLock() : "Not holding write lock";
     if (hasDNs) {
       return;
     }
-    LOG.info("Calling process first blk report from storage: " + storage);
-    // first pass; periodic refresh should call bm.processReport
-    bm.processFirstBlockReport(storage, new ProvidedBlockList(iterator()));
+    if (storage.getBlockReportCount() == 0) {
+      LOG.info("Calling process first blk report from storage: " + storage);
+      // first pass; periodic refresh should call bm.processReport
+      bm.processFirstBlockReport(storage, new ProvidedBlockList(iterator()));
+    } else {
+      bm.processReport(storage, new ProvidedBlockList(iterator()), context);
+    }
     hasDNs = true;
   }
+
+  void stop() {
+    assert lock.hasWriteLock() : "Not holding write lock";
+    hasDNs = false;
+  }
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/0e579b4b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ProvidedStorageMap.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ProvidedStorageMap.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ProvidedStorageMap.java
index 0faf16d..5717e0c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ProvidedStorageMap.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ProvidedStorageMap.java
@@ -40,6 +40,7 @@ import org.apache.hadoop.hdfs.protocol.DatanodeInfoWithStorage;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
+import org.apache.hadoop.hdfs.server.protocol.BlockReportContext;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage.State;
 import org.apache.hadoop.hdfs.util.RwLock;
@@ -103,17 +104,18 @@ public class ProvidedStorageMap {
   /**
    * @param dn datanode descriptor
    * @param s data node storage
+   * @param context the block report context
    * @return the {@link DatanodeStorageInfo} for the specified datanode.
    * If {@code s} corresponds to a provided storage, the storage info
    * representing provided storage is returned.
    * @throws IOException
    */
-  DatanodeStorageInfo getStorage(DatanodeDescriptor dn, DatanodeStorage s)
-      throws IOException {
+  DatanodeStorageInfo getStorage(DatanodeDescriptor dn, DatanodeStorage s,
+      BlockReportContext context) throws IOException {
     if (providedEnabled && storageId.equals(s.getStorageID())) {
       if (StorageType.PROVIDED.equals(s.getStorageType())) {
         // poll service, initiate
-        blockProvider.start();
+        blockProvider.start(context);
         dn.injectStorage(providedStorageInfo);
         return providedDescriptor.getProvidedStorage(dn, s);
       }
@@ -134,6 +136,15 @@ public class ProvidedStorageMap {
     return new ProvidedBlocksBuilder(maxValue);
   }
 
+  public void removeDatanode(DatanodeDescriptor dnToRemove) {
+    if (providedDescriptor != null) {
+      int remainingDatanodes = providedDescriptor.remove(dnToRemove);
+      if (remainingDatanodes == 0) {
+        blockProvider.stop();
+      }
+    }
+  }
+
   /**
    * Builder used for creating {@link LocatedBlocks} when a block is provided.
    */
@@ -282,7 +293,7 @@ public class ProvidedStorageMap {
 
     DatanodeStorageInfo createProvidedStorage(DatanodeStorage ds) {
       assert null == storageMap.get(ds.getStorageID());
-      DatanodeStorageInfo storage = new DatanodeStorageInfo(this, ds);
+      DatanodeStorageInfo storage = new ProvidedDatanodeStorageInfo(this, ds);
       storage.setHeartbeatedSinceFailover(true);
       storageMap.put(storage.getStorageID(), storage);
       return storage;
@@ -381,6 +392,22 @@ public class ProvidedStorageMap {
       }
     }
 
+    int remove(DatanodeDescriptor dnToRemove) {
+      // this operation happens under the FSNamesystem lock;
+      // no additional synchronization required.
+      if (dnToRemove != null) {
+        DatanodeDescriptor storedDN = dns.get(dnToRemove.getDatanodeUuid());
+        if (storedDN != null) {
+          dns.remove(dnToRemove.getDatanodeUuid());
+        }
+      }
+      return dns.size();
+    }
+
+    int activeProvidedDatanodes() {
+      return dns.size();
+    }
+
     @Override
     public boolean equals(Object obj) {
       return (this == obj) || super.equals(obj);
@@ -393,6 +420,25 @@ public class ProvidedStorageMap {
   }
 
   /**
+   * The DatanodeStorageInfo used for the provided storage.
+   */
+  static class ProvidedDatanodeStorageInfo extends DatanodeStorageInfo {
+
+    ProvidedDatanodeStorageInfo(ProvidedDescriptor dn, DatanodeStorage ds) {
+      super(dn, ds);
+    }
+
+    @Override
+    boolean removeBlock(BlockInfo b) {
+      ProvidedDescriptor dn = (ProvidedDescriptor) getDatanodeDescriptor();
+      if (dn.activeProvidedDatanodes() == 0) {
+        return super.removeBlock(b);
+      } else {
+        return false;
+      }
+    }
+  }
+  /**
    * Used to emulate block reports for provided blocks.
    */
   static class ProvidedBlockList extends BlockListAsLongs {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/0e579b4b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestProvidedStorageMap.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestProvidedStorageMap.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestProvidedStorageMap.java
index 50e2fed..2296c82 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestProvidedStorageMap.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestProvidedStorageMap.java
@@ -119,9 +119,9 @@ public class TestProvidedStorageMap {
 
     when(nameSystemLock.hasWriteLock()).thenReturn(true);
     DatanodeStorageInfo dns1Provided = providedMap.getStorage(dn1,
-            dn1ProvidedStorage);
+            dn1ProvidedStorage, null);
     DatanodeStorageInfo dns1Disk = providedMap.getStorage(dn1,
-            dn1DiskStorage);
+            dn1DiskStorage, null);
 
     assertTrue("The provided storages should be equal",
             dns1Provided == providedMapStorage);
@@ -131,7 +131,7 @@ public class TestProvidedStorageMap {
     DatanodeStorageInfo dnsDisk = new DatanodeStorageInfo(dn1, dn1DiskStorage);
     dn1.injectStorage(dnsDisk);
     assertTrue("Disk storage must match the injected storage info",
-            dnsDisk == providedMap.getStorage(dn1, dn1DiskStorage));
+            dnsDisk == providedMap.getStorage(dn1, dn1DiskStorage, null));
 
     //create a 2nd datanode
     DatanodeDescriptor dn2 = createDatanodeDescriptor(5010);
@@ -142,12 +142,10 @@ public class TestProvidedStorageMap {
             StorageType.PROVIDED);
 
     DatanodeStorageInfo dns2Provided = providedMap.getStorage(
-            dn2, dn2ProvidedStorage);
+            dn2, dn2ProvidedStorage, null);
     assertTrue("The provided storages should be equal",
             dns2Provided == providedMapStorage);
     assertTrue("The DatanodeDescriptor should contain the provided storage",
             dn2.getStorageInfo(providedStorageID) == providedMapStorage);
-
-
   }
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/0e579b4b/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeProvidedImplementation.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeProvidedImplementation.java b/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeProvidedImplementation.java
index e171557..60b306f 100644
--- a/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeProvidedImplementation.java
+++ b/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeProvidedImplementation.java
@@ -45,11 +45,14 @@ import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockFormatProvider;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockProvider;
 import org.apache.hadoop.hdfs.server.common.BlockFormat;
 import org.apache.hadoop.hdfs.server.common.FileRegionProvider;
 import org.apache.hadoop.hdfs.server.common.TextFileRegionFormat;
 import org.apache.hadoop.hdfs.server.common.TextFileRegionProvider;
+import org.apache.hadoop.hdfs.server.datanode.DataNode;
+
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY;
 
 import org.junit.After;
@@ -406,9 +409,9 @@ public class TestNameNodeProvidedImplementation {
     createImage(new FSTreeWalk(NAMEPATH, conf), NNDIRPATH,
         FixedBlockResolver.class);
     startCluster(NNDIRPATH, 2, null,
-        new StorageType[][] {
-                {StorageType.PROVIDED},
-                {StorageType.DISK}},
+        new StorageType[][]{
+            {StorageType.PROVIDED},
+            {StorageType.DISK}},
         false);
 
     String filename = "/" + filePrefix + (numFiles - 1) + fileSuffix;
@@ -433,4 +436,67 @@ public class TestNameNodeProvidedImplementation {
     assertEquals(cluster.getDataNodes().get(0).getDatanodeUuid(),
         infos[0].getDatanodeUuid());
   }
+
+  @Test
+  public void testProvidedDatanodeFailures() throws Exception {
+    createImage(new FSTreeWalk(NAMEPATH, conf), NNDIRPATH,
+            FixedBlockResolver.class);
+    startCluster(NNDIRPATH, 3, null,
+        new StorageType[][] {
+            {StorageType.PROVIDED},
+            {StorageType.PROVIDED},
+            {StorageType.DISK}},
+        false);
+
+    DataNode providedDatanode1 = cluster.getDataNodes().get(0);
+    DataNode providedDatanode2 = cluster.getDataNodes().get(1);
+
+    DFSClient client = new DFSClient(new InetSocketAddress("localhost",
+            cluster.getNameNodePort()), cluster.getConfiguration(0));
+
+    if (numFiles >= 1) {
+      String filename = "/" + filePrefix + (numFiles - 1) + fileSuffix;
+
+      DatanodeInfo[] dnInfos = getAndCheckBlockLocations(client, filename, 1);
+      //the location should be one of the provided DNs available
+      assertTrue(
+          dnInfos[0].getDatanodeUuid().equals(
+              providedDatanode1.getDatanodeUuid())
+          || dnInfos[0].getDatanodeUuid().equals(
+              providedDatanode2.getDatanodeUuid()));
+
+      //stop the 1st provided datanode
+      MiniDFSCluster.DataNodeProperties providedDNProperties1 =
+          cluster.stopDataNode(0);
+
+      //make NameNode detect that datanode is down
+      BlockManagerTestUtil.noticeDeadDatanode(
+          cluster.getNameNode(),
+          providedDatanode1.getDatanodeId().getXferAddr());
+
+      //should find the block on the 2nd provided datanode
+      dnInfos = getAndCheckBlockLocations(client, filename, 1);
+      assertEquals(providedDatanode2.getDatanodeUuid(),
+          dnInfos[0].getDatanodeUuid());
+
+      //stop the 2nd provided datanode
+      cluster.stopDataNode(1);
+      // make NameNode detect that datanode is down
+      BlockManagerTestUtil.noticeDeadDatanode(
+          cluster.getNameNode(),
+          providedDatanode2.getDatanodeId().getXferAddr());
+
+      getAndCheckBlockLocations(client, filename, 0);
+
+      //restart the provided datanode
+      cluster.restartDataNode(providedDNProperties1, true);
+      cluster.waitActive();
+
+      //should find the block on the 1st provided datanode now
+      dnInfos = getAndCheckBlockLocations(client, filename, 1);
+      //not comparing UUIDs as the datanode can now have a different one.
+      assertEquals(providedDatanode1.getDatanodeId().getXferAddr(),
+          dnInfos[0].getXferAddr());
+    }
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org