You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by xi...@apache.org on 2017/09/07 23:30:14 UTC
hadoop git commit: HDFS-12369. Edit log corruption due to hard lease
recovery of not-closed file which has snapshots.
Repository: hadoop
Updated Branches:
refs/heads/trunk b0b535d9d -> 52b894db3
HDFS-12369. Edit log corruption due to hard lease recovery of not-closed file which has snapshots.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/52b894db
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/52b894db
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/52b894db
Branch: refs/heads/trunk
Commit: 52b894db33bc68b46eec5cdf2735dfcf4030853a
Parents: b0b535d
Author: Xiao Chen <xi...@apache.org>
Authored: Thu Sep 7 15:54:52 2017 -0700
Committer: Xiao Chen <xi...@apache.org>
Committed: Thu Sep 7 16:30:12 2017 -0700
----------------------------------------------------------------------
.../hdfs/server/namenode/FSNamesystem.java | 1 +
.../hdfs/server/namenode/LeaseManager.java | 6 ++
.../hdfs/server/namenode/TestDeleteRace.java | 84 ++++++++++++++++++++
3 files changed, 91 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/52b894db/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index c30999b..dedb11e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -4993,6 +4993,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
}
boolean isFileDeleted(INodeFile file) {
+ assert hasReadLock();
// Not in the inodeMap or in the snapshot but marked deleted.
if (dir.getInode(file.getId()) == null) {
return true;
http://git-wip-us.apache.org/repos/asf/hadoop/blob/52b894db/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java
index 35ec063..45699cb 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java
@@ -557,6 +557,12 @@ public class LeaseManager {
if (!p.startsWith("/")) {
throw new IOException("Invalid path in the lease " + p);
}
+ final INodeFile lastINode = iip.getLastINode().asFile();
+ if (fsnamesystem.isFileDeleted(lastINode)) {
+ // INode referred by the lease could have been deleted.
+ removeLease(lastINode.getId());
+ continue;
+ }
boolean completed = false;
try {
completed = fsnamesystem.internalReleaseLease(
http://git-wip-us.apache.org/repos/asf/hadoop/blob/52b894db/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeleteRace.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeleteRace.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeleteRace.java
index 133a18e..a13574f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeleteRace.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeleteRace.java
@@ -20,11 +20,13 @@ package org.apache.hadoop.hdfs.server.namenode;
import java.io.FileNotFoundException;
import java.util.AbstractMap;
import java.util.ArrayList;
+import java.util.Comparator;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.TreeSet;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -49,16 +51,21 @@ import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.apache.hadoop.hdfs.server.datanode.InternalDataNodeTestUtils;
+import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotTestHelper;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.net.Node;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.test.GenericTestUtils.DelayAnswer;
import org.junit.Assert;
+import org.junit.Rule;
import org.junit.Test;
+import org.junit.rules.Timeout;
import org.mockito.Mockito;
import org.mockito.internal.util.reflection.Whitebox;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_DEFAULT;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_KEY;
/**
* Test race between delete and other operations. For now only addBlock()
@@ -71,6 +78,9 @@ public class TestDeleteRace {
private static final Configuration conf = new HdfsConfiguration();
private MiniDFSCluster cluster;
+ @Rule
+ public Timeout timeout = new Timeout(60000 * 3);
+
@Test
public void testDeleteAddBlockRace() throws Exception {
testDeleteAddBlockRace(false);
@@ -358,4 +368,78 @@ public class TestDeleteRace {
throws Exception {
testDeleteAndCommitBlockSynchronizationRace(true);
}
+
+
+ /**
+ * Test the sequence of deleting a file that has snapshot,
+ * and lease manager's hard limit recovery.
+ */
+ @Test
+ public void testDeleteAndLeaseRecoveryHardLimitSnapshot() throws Exception {
+ final Path rootPath = new Path("/");
+ final Configuration config = new Configuration();
+ // Disable permissions so that another user can recover the lease.
+ config.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
+ config.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
+ FSDataOutputStream stm = null;
+ try {
+ cluster = new MiniDFSCluster.Builder(config).numDataNodes(3).build();
+ cluster.waitActive();
+
+ final DistributedFileSystem fs = cluster.getFileSystem();
+ final Path testPath = new Path("/testfile");
+ stm = fs.create(testPath);
+ LOG.info("test on " + testPath);
+
+ // write a half block
+ AppendTestUtil.write(stm, 0, BLOCK_SIZE / 2);
+ stm.hflush();
+
+ // create a snapshot, so delete does not release the file's inode.
+ SnapshotTestHelper.createSnapshot(fs, rootPath, "snap");
+
+ // delete the file without closing it.
+ fs.delete(testPath, false);
+
+ // write enough bytes to trigger an addBlock, which would fail in
+ // the streamer.
+ AppendTestUtil.write(stm, 0, BLOCK_SIZE);
+
+ // Mock a scenario that the lease reached hard limit.
+ final LeaseManager lm = (LeaseManager) Whitebox
+ .getInternalState(cluster.getNameNode().getNamesystem(),
+ "leaseManager");
+ final TreeSet<Lease> leases =
+ (TreeSet<Lease>) Whitebox.getInternalState(lm, "sortedLeases");
+ final TreeSet<Lease> spyLeases = new TreeSet<>(new Comparator<Lease>() {
+ @Override
+ public int compare(Lease o1, Lease o2) {
+ return Long.signum(o1.getLastUpdate() - o2.getLastUpdate());
+ }
+ });
+ while (!leases.isEmpty()) {
+ final Lease lease = leases.first();
+ final Lease spyLease = Mockito.spy(lease);
+ Mockito.doReturn(true).when(spyLease).expiredHardLimit();
+ spyLeases.add(spyLease);
+ leases.remove(lease);
+ }
+ Whitebox.setInternalState(lm, "sortedLeases", spyLeases);
+
+ // wait for lease manager's background 'Monitor' class to check leases.
+ Thread.sleep(2 * conf.getLong(DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_KEY,
+ DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_DEFAULT));
+
+ LOG.info("Now check we can restart");
+ cluster.restartNameNodes();
+ LOG.info("Restart finished");
+ } finally {
+ if (stm != null) {
+ IOUtils.closeStream(stm);
+ }
+ if (cluster != null) {
+ cluster.shutdown();
+ }
+ }
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org