You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cm...@apache.org on 2015/06/02 20:41:16 UTC

hadoop git commit: HDFS-8486. DN startup may cause severe data loss (Daryn Sharp via Colin P. McCabe)

Repository: hadoop
Updated Branches:
  refs/heads/trunk a2bd6217e -> 03fb5c642


HDFS-8486. DN startup may cause severe data loss (Daryn Sharp via Colin P.  McCabe)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/03fb5c64
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/03fb5c64
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/03fb5c64

Branch: refs/heads/trunk
Commit: 03fb5c642589dec4e663479771d0ae1782038b63
Parents: a2bd621
Author: Colin Patrick Mccabe <cm...@cloudera.com>
Authored: Tue Jun 2 11:40:37 2015 -0700
Committer: Colin Patrick Mccabe <cm...@cloudera.com>
Committed: Tue Jun 2 11:40:37 2015 -0700

----------------------------------------------------------------------
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt     |  3 ++
 .../hadoop/hdfs/server/datanode/DataNode.java   |  2 +-
 .../datanode/fsdataset/impl/BlockPoolSlice.java | 28 ++++++++++++----
 .../fsdataset/impl/TestFsDatasetImpl.java       | 34 ++++++++++++++++++++
 4 files changed, 60 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/03fb5c64/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index fa28a16..0822f90 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -924,6 +924,9 @@ Release 2.7.1 - UNRELEASED
     HDFS-8451. DFSClient probe for encryption testing interprets empty URI
     property for "enabled". (Steve Loughran via xyao)
 
+    HDFS-8486. DN startup may cause severe data loss (Daryn Sharp via Colin P.
+    McCabe)
+
 Release 2.7.0 - 2015-04-20
 
   INCOMPATIBLE CHANGES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/03fb5c64/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
index d2b2939..f73eb66 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
@@ -1370,9 +1370,9 @@ public class DataNode extends ReconfigurableBase
     // failures.
     checkDiskError();
 
-    initDirectoryScanner(conf);
     data.addBlockPool(nsInfo.getBlockPoolID(), conf);
     blockScanner.enableBlockPoolId(bpos.getBlockPoolId());
+    initDirectoryScanner(conf);
   }
 
   List<BPOfferService> getAllBpOs() {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/03fb5c64/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java
index 951c759..94aaf21 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java
@@ -550,10 +550,28 @@ class BlockPoolSlice {
       // Leave both block replicas in place.
       return replica1;
     }
+    final ReplicaInfo replicaToDelete =
+        selectReplicaToDelete(replica1, replica2);
+    final ReplicaInfo replicaToKeep =
+        (replicaToDelete != replica1) ? replica1 : replica2;
+    // Update volumeMap and delete the replica
+    volumeMap.add(bpid, replicaToKeep);
+    if (replicaToDelete != null) {
+      deleteReplica(replicaToDelete);
+    }
+    return replicaToKeep;
+  }
 
+  static ReplicaInfo selectReplicaToDelete(final ReplicaInfo replica1,
+      final ReplicaInfo replica2) {
     ReplicaInfo replicaToKeep;
     ReplicaInfo replicaToDelete;
 
+    // it's the same block so don't ever delete it, even if GS or size
+    // differs.  caller should keep the one it just discovered on disk
+    if (replica1.getBlockFile().equals(replica2.getBlockFile())) {
+      return null;
+    }
     if (replica1.getGenerationStamp() != replica2.getGenerationStamp()) {
       replicaToKeep = replica1.getGenerationStamp() > replica2.getGenerationStamp()
           ? replica1 : replica2;
@@ -573,10 +591,10 @@ class BlockPoolSlice {
       LOG.debug("resolveDuplicateReplicas decide to keep " + replicaToKeep
           + ".  Will try to delete " + replicaToDelete);
     }
+    return replicaToDelete;
+  }
 
-    // Update volumeMap.
-    volumeMap.add(bpid, replicaToKeep);
-
+  private void deleteReplica(final ReplicaInfo replicaToDelete) {
     // Delete the files on disk. Failure here is okay.
     final File blockFile = replicaToDelete.getBlockFile();
     if (!blockFile.delete()) {
@@ -586,10 +604,8 @@ class BlockPoolSlice {
     if (!metaFile.delete()) {
       LOG.warn("Failed to delete meta file " + metaFile);
     }
-
-    return replicaToKeep;
   }
-  
+
   /**
    * Find out the number of bytes in the block that match its crc.
    * 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/03fb5c64/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java
index 9f4f700..59c7ade 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java
@@ -51,6 +51,7 @@ import org.apache.hadoop.util.StringUtils;
 import org.junit.Before;
 import org.junit.Test;
 import org.mockito.Matchers;
+import org.mockito.Mockito;
 import org.mockito.invocation.InvocationOnMock;
 import org.mockito.stubbing.Answer;
 
@@ -66,6 +67,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOUR
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertSame;
 import static org.junit.Assert.fail;
 import static org.mockito.Matchers.any;
 import static org.mockito.Matchers.anyListOf;
@@ -411,4 +414,35 @@ public class TestFsDatasetImpl {
       cluster.shutdown();
     }
   }
+
+  @Test
+  public void testDuplicateReplicaResolution() throws IOException {
+    FsVolumeImpl fsv1 = Mockito.mock(FsVolumeImpl.class);
+    FsVolumeImpl fsv2 = Mockito.mock(FsVolumeImpl.class);
+
+    File f1 = new File("d1/block");
+    File f2 = new File("d2/block");
+
+    ReplicaInfo replicaOlder = new FinalizedReplica(1,1,1,fsv1,f1);
+    ReplicaInfo replica = new FinalizedReplica(1,2,2,fsv1,f1);
+    ReplicaInfo replicaSame = new FinalizedReplica(1,2,2,fsv1,f1);
+    ReplicaInfo replicaNewer = new FinalizedReplica(1,3,3,fsv1,f1);
+
+    ReplicaInfo replicaOtherOlder = new FinalizedReplica(1,1,1,fsv2,f2);
+    ReplicaInfo replicaOtherSame = new FinalizedReplica(1,2,2,fsv2,f2);
+    ReplicaInfo replicaOtherNewer = new FinalizedReplica(1,3,3,fsv2,f2);
+
+    // equivalent path so don't remove either
+    assertNull(BlockPoolSlice.selectReplicaToDelete(replicaSame, replica));
+    assertNull(BlockPoolSlice.selectReplicaToDelete(replicaOlder, replica));
+    assertNull(BlockPoolSlice.selectReplicaToDelete(replicaNewer, replica));
+
+    // keep latest found replica
+    assertSame(replica,
+        BlockPoolSlice.selectReplicaToDelete(replicaOtherSame, replica));
+    assertSame(replicaOtherOlder,
+        BlockPoolSlice.selectReplicaToDelete(replicaOtherOlder, replica));
+    assertSame(replica,
+        BlockPoolSlice.selectReplicaToDelete(replicaOtherNewer, replica));
+  }
 }