You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by xi...@apache.org on 2017/09/07 23:30:14 UTC

hadoop git commit: HDFS-12369. Edit log corruption due to hard lease recovery of not-closed file which has snapshots.

Repository: hadoop
Updated Branches:
  refs/heads/trunk b0b535d9d -> 52b894db3


HDFS-12369. Edit log corruption due to hard lease recovery of not-closed file which has snapshots.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/52b894db
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/52b894db
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/52b894db

Branch: refs/heads/trunk
Commit: 52b894db33bc68b46eec5cdf2735dfcf4030853a
Parents: b0b535d
Author: Xiao Chen <xi...@apache.org>
Authored: Thu Sep 7 15:54:52 2017 -0700
Committer: Xiao Chen <xi...@apache.org>
Committed: Thu Sep 7 16:30:12 2017 -0700

----------------------------------------------------------------------
 .../hdfs/server/namenode/FSNamesystem.java      |  1 +
 .../hdfs/server/namenode/LeaseManager.java      |  6 ++
 .../hdfs/server/namenode/TestDeleteRace.java    | 84 ++++++++++++++++++++
 3 files changed, 91 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/52b894db/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index c30999b..dedb11e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -4993,6 +4993,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
   }
 
   boolean isFileDeleted(INodeFile file) {
+    assert hasReadLock();
     // Not in the inodeMap or in the snapshot but marked deleted.
     if (dir.getInode(file.getId()) == null) {
       return true;

http://git-wip-us.apache.org/repos/asf/hadoop/blob/52b894db/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java
index 35ec063..45699cb 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java
@@ -557,6 +557,12 @@ public class LeaseManager {
           if (!p.startsWith("/")) {
             throw new IOException("Invalid path in the lease " + p);
           }
+          final INodeFile lastINode = iip.getLastINode().asFile();
+          if (fsnamesystem.isFileDeleted(lastINode)) {
+            // INode referred by the lease could have been deleted.
+            removeLease(lastINode.getId());
+            continue;
+          }
           boolean completed = false;
           try {
             completed = fsnamesystem.internalReleaseLease(

http://git-wip-us.apache.org/repos/asf/hadoop/blob/52b894db/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeleteRace.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeleteRace.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeleteRace.java
index 133a18e..a13574f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeleteRace.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeleteRace.java
@@ -20,11 +20,13 @@ package org.apache.hadoop.hdfs.server.namenode;
 import java.io.FileNotFoundException;
 import java.util.AbstractMap;
 import java.util.ArrayList;
+import java.util.Comparator;
 import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.TreeSet;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -49,16 +51,21 @@ import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
 import org.apache.hadoop.hdfs.server.datanode.InternalDataNodeTestUtils;
+import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotTestHelper;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.net.Node;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.test.GenericTestUtils.DelayAnswer;
 import org.junit.Assert;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.Timeout;
 import org.mockito.Mockito;
 import org.mockito.internal.util.reflection.Whitebox;
 
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_DEFAULT;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_KEY;
 
 /**
  * Test race between delete and other operations.  For now only addBlock()
@@ -71,6 +78,9 @@ public class TestDeleteRace {
   private static final Configuration conf = new HdfsConfiguration();
   private MiniDFSCluster cluster;
 
+  @Rule
+  public Timeout timeout = new Timeout(60000 * 3);
+
   @Test  
   public void testDeleteAddBlockRace() throws Exception {
     testDeleteAddBlockRace(false);
@@ -358,4 +368,78 @@ public class TestDeleteRace {
       throws Exception {
     testDeleteAndCommitBlockSynchronizationRace(true);
   }
+
+
+  /**
+   * Test the sequence of deleting a file that has snapshot,
+   * and lease manager's hard limit recovery.
+   */
+  @Test
+  public void testDeleteAndLeaseRecoveryHardLimitSnapshot() throws Exception {
+    final Path rootPath = new Path("/");
+    final Configuration config = new Configuration();
+    // Disable permissions so that another user can recover the lease.
+    config.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
+    config.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
+    FSDataOutputStream stm = null;
+    try {
+      cluster = new MiniDFSCluster.Builder(config).numDataNodes(3).build();
+      cluster.waitActive();
+
+      final DistributedFileSystem fs = cluster.getFileSystem();
+      final Path testPath = new Path("/testfile");
+      stm = fs.create(testPath);
+      LOG.info("test on " + testPath);
+
+      // write a half block
+      AppendTestUtil.write(stm, 0, BLOCK_SIZE / 2);
+      stm.hflush();
+
+      // create a snapshot, so delete does not release the file's inode.
+      SnapshotTestHelper.createSnapshot(fs, rootPath, "snap");
+
+      // delete the file without closing it.
+      fs.delete(testPath, false);
+
+      // write enough bytes to trigger an addBlock, which would fail in
+      // the streamer.
+      AppendTestUtil.write(stm, 0, BLOCK_SIZE);
+
+      // Mock a scenario that the lease reached hard limit.
+      final LeaseManager lm = (LeaseManager) Whitebox
+          .getInternalState(cluster.getNameNode().getNamesystem(),
+              "leaseManager");
+      final TreeSet<Lease> leases =
+          (TreeSet<Lease>) Whitebox.getInternalState(lm, "sortedLeases");
+      final TreeSet<Lease> spyLeases = new TreeSet<>(new Comparator<Lease>() {
+        @Override
+        public int compare(Lease o1, Lease o2) {
+          return Long.signum(o1.getLastUpdate() - o2.getLastUpdate());
+        }
+      });
+      while (!leases.isEmpty()) {
+        final Lease lease = leases.first();
+        final Lease spyLease = Mockito.spy(lease);
+        Mockito.doReturn(true).when(spyLease).expiredHardLimit();
+        spyLeases.add(spyLease);
+        leases.remove(lease);
+      }
+      Whitebox.setInternalState(lm, "sortedLeases", spyLeases);
+
+      // wait for lease manager's background 'Monitor' class to check leases.
+      Thread.sleep(2 * conf.getLong(DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_KEY,
+          DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_DEFAULT));
+
+      LOG.info("Now check we can restart");
+      cluster.restartNameNodes();
+      LOG.info("Restart finished");
+    } finally {
+      if (stm != null) {
+        IOUtils.closeStream(stm);
+      }
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+    }
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org