You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by he...@apache.org on 2020/07/13 06:22:13 UTC
[hadoop] branch trunk updated: HDFS-14498 LeaseManager can loop
forever on the file for which create has failed. Contributed by Stephen
O'Donnell.
This is an automated email from the ASF dual-hosted git repository.
hexiaoqiao pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push:
new b97fea6 HDFS-14498 LeaseManager can loop forever on the file for which create has failed. Contributed by Stephen O'Donnell.
b97fea6 is described below
commit b97fea65e70bee4f5ea81c544396f8e9fa860ab0
Author: He Xiaoqiao <he...@apache.org>
AuthorDate: Mon Jul 13 14:12:48 2020 +0800
HDFS-14498 LeaseManager can loop forever on the file for which create has failed. Contributed by Stephen O'Donnell.
---
.../hadoop/hdfs/server/namenode/FSNamesystem.java | 11 +++
.../org/apache/hadoop/hdfs/TestLeaseRecovery.java | 107 +++++++++++++++++++++
2 files changed, 118 insertions(+)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 5e50b58..993c283 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -3677,6 +3677,17 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
" internalReleaseLease: Committed blocks are minimally" +
" replicated, lease removed, file" + src + " closed.");
return true; // closed!
+ } else if (penultimateBlockMinStorage && lastBlock.getNumBytes() == 0) {
+ // HDFS-14498 - this is a file with a final block of zero bytes and was
+ // likely left in this state by a client which exited unexpectedly
+ pendingFile.removeLastBlock(lastBlock);
+ finalizeINodeFileUnderConstruction(src, pendingFile,
+ iip.getLatestSnapshotId(), false);
+ NameNode.stateChangeLog.warn("BLOCK*" +
+ " internalReleaseLease: Committed last block is zero bytes with" +
+ " insufficient replicas. Final block removed, lease removed, file "
+ + src + " closed.");
+ return true;
}
// Cannot close file right now, since some blocks
// are not yet minimally replicated.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java
index c82b47c..a1cce3e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java
@@ -24,15 +24,18 @@ import java.io.IOException;
import java.util.EnumSet;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.crypto.CryptoProtocolVersion;
import org.apache.hadoop.fs.CreateFlag;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
+import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
@@ -43,6 +46,7 @@ import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
import org.apache.hadoop.io.EnumSetWritable;
import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.test.GenericTestUtils;
import org.junit.After;
import org.junit.Test;
@@ -314,4 +318,107 @@ public class TestLeaseRecovery {
}
}
}
+
+ /**
+ * HDFS-14498 - test lease can be recovered for a file where the final
+ * block was never registered with the DNs, and hence the IBRs will never
+ * be received. In this case the final block should be zero bytes and can
+ * be removed.
+ */
+ @Test
+ public void testLeaseRecoveryEmptyCommittedLastBlock() throws Exception {
+ Configuration conf = new Configuration();
+ DFSClient client = null;
+ try {
+ cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
+ DistributedFileSystem dfs = cluster.getFileSystem();
+ client =
+ new DFSClient(cluster.getNameNode().getServiceRpcAddress(), conf);
+ String file = "/test/f1";
+ Path filePath = new Path(file);
+
+ createCommittedNotCompleteFile(client, file);
+
+ // Ensure a different client cannot append the file
+ try {
+ dfs.append(filePath);
+ fail("Append to a file(lease is held by another client) should fail");
+ } catch (RemoteException e) {
+ assertTrue(e.getMessage().contains("file lease is currently owned"));
+ }
+
+ // Ensure the lease can be recovered on the first try
+ boolean recovered = client.recoverLease(file);
+ assertEquals(true, recovered);
+
+ // Ensure the recovered file can now be written
+ FSDataOutputStream append = dfs.append(filePath);
+ append.write("test".getBytes());
+ append.close();
+ } finally {
+ if (cluster != null) {
+ cluster.shutdown();
+ cluster = null;
+ }
+ if (client != null) {
+ client.close();
+ }
+ }
+ }
+
+ /**
+ * HDFS-14498 - similar to testLeaseRecoveryEmptyCommittedLastBlock except
+ * we wait for the lease manager to recover the lease automatically.
+ */
+ @Test
+ public void testLeaseManagerRecoversEmptyCommittedLastBlock()
+ throws Exception {
+ Configuration conf = new Configuration();
+ DFSClient client = null;
+ try {
+ cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
+ client =
+ new DFSClient(cluster.getNameNode().getServiceRpcAddress(), conf);
+ String file = "/test/f1";
+
+ createCommittedNotCompleteFile(client, file);
+ waitLeaseRecovery(cluster);
+
+ GenericTestUtils.waitFor(() -> {
+ String holder = NameNodeAdapter
+ .getLeaseHolderForPath(cluster.getNameNode(), file);
+ return holder == null;
+ }, 100, 10000);
+
+ } finally {
+ if (cluster != null) {
+ cluster.shutdown();
+ cluster = null;
+ }
+ if (client != null) {
+ client.close();
+ }
+ }
+ }
+
+ private void createCommittedNotCompleteFile(DFSClient client, String file)
+ throws IOException {
+ HdfsFileStatus stat = client.getNamenode()
+ .create(file, new FsPermission("777"), "test client",
+ new EnumSetWritable<CreateFlag>(EnumSet.of(CreateFlag.CREATE)),
+ true, (short) 1, 1024 * 1024 * 128L,
+ new CryptoProtocolVersion[0], null, null);
+ // Add a block to the file
+ LocatedBlock blk = client.getNamenode()
+ .addBlock(file, "test client", null,
+ new DatanodeInfo[0], stat.getFileId(), new String[0], null);
+ // Without writing anything to the file, or setting up the DN pipeline
+ // attempt to close the file. This will fail (return false) as the NN will
+ // be expecting the registered block to be reported from the DNs via IBR,
+ // but that will never happen, as the pipeline was never established
+ boolean closed = client.getNamenode().complete(
+ file, "test client", blk.getBlock(), stat.getFileId());
+ assertEquals(false, closed);
+ }
+
}
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org