You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by he...@apache.org on 2020/07/13 06:41:59 UTC

[hadoop] branch branch-3.1 updated: HDFS-14498 LeaseManager can loop forever on the file for which create has failed. Contributed by Stephen O'Donnell.

This is an automated email from the ASF dual-hosted git repository.

hexiaoqiao pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new 0bfd1b5  HDFS-14498 LeaseManager can loop forever on the file for which create has failed. Contributed by Stephen O'Donnell.
0bfd1b5 is described below

commit 0bfd1b5a945ec341a36678aa3c5318f1598fc716
Author: He Xiaoqiao <he...@apache.org>
AuthorDate: Mon Jul 13 14:12:48 2020 +0800

    HDFS-14498 LeaseManager can loop forever on the file for which create has failed. Contributed by Stephen O'Donnell.
---
 .../hadoop/hdfs/server/namenode/FSNamesystem.java  |  11 +++
 .../org/apache/hadoop/hdfs/TestLeaseRecovery.java  | 107 +++++++++++++++++++++
 2 files changed, 118 insertions(+)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 28be228..ddecab8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -3421,6 +3421,17 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
             " internalReleaseLease: Committed blocks are minimally" +
             " replicated, lease removed, file" + src + " closed.");
         return true;  // closed!
+      } else if (penultimateBlockMinStorage && lastBlock.getNumBytes() == 0) {
+        // HDFS-14498 - this is a file with a final block of zero bytes and was
+        // likely left in this state by a client which exited unexpectedly
+        pendingFile.removeLastBlock(lastBlock);
+        finalizeINodeFileUnderConstruction(src, pendingFile,
+            iip.getLatestSnapshotId(), false);
+        NameNode.stateChangeLog.warn("BLOCK*" +
+            " internalReleaseLease: Committed last block is zero bytes with" +
+            " insufficient replicas. Final block removed, lease removed, file "
+            + src + " closed.");
+        return true;
       }
       // Cannot close file right now, since some blocks 
       // are not yet minimally replicated.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java
index c82b47c..a1cce3e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java
@@ -24,15 +24,18 @@ import java.io.IOException;
 import java.util.EnumSet;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.crypto.CryptoProtocolVersion;
 import org.apache.hadoop.fs.CreateFlag;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
+import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
@@ -43,6 +46,7 @@ import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
 import org.apache.hadoop.io.EnumSetWritable;
 import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.test.GenericTestUtils;
 import org.junit.After;
 import org.junit.Test;
 
@@ -314,4 +318,107 @@ public class TestLeaseRecovery {
       }
     }
   }
+
+  /**
+   *  HDFS-14498 - test lease can be recovered for a file where the final
+   *  block was never registered with the DNs, and hence the IBRs will never
+   *  be received. In this case the final block should be zero bytes and can
+   *  be removed.
+   */
+  @Test
+  public void testLeaseRecoveryEmptyCommittedLastBlock() throws Exception {
+    Configuration conf = new Configuration();
+    DFSClient client = null;
+    try {
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
+      DistributedFileSystem dfs = cluster.getFileSystem();
+      client =
+          new DFSClient(cluster.getNameNode().getServiceRpcAddress(), conf);
+      String file = "/test/f1";
+      Path filePath = new Path(file);
+
+      createCommittedNotCompleteFile(client, file);
+
+      // Ensure a different client cannot append the file
+      try {
+        dfs.append(filePath);
+        fail("Append to a file(lease is held by another client) should fail");
+      } catch (RemoteException e) {
+        assertTrue(e.getMessage().contains("file lease is currently owned"));
+      }
+
+      // Ensure the lease can be recovered on the first try
+      boolean recovered = client.recoverLease(file);
+      assertEquals(true, recovered);
+
+      // Ensure the recovered file can now be written
+      FSDataOutputStream append = dfs.append(filePath);
+      append.write("test".getBytes());
+      append.close();
+    } finally {
+      if (cluster != null) {
+        cluster.shutdown();
+        cluster = null;
+      }
+      if (client != null) {
+        client.close();
+      }
+    }
+  }
+
+  /**
+   *  HDFS-14498 - similar to testLeaseRecoveryEmptyCommittedLastBlock except
+   *  we wait for the lease manager to recover the lease automatically.
+   */
+  @Test
+  public void testLeaseManagerRecoversEmptyCommittedLastBlock()
+      throws Exception {
+    Configuration conf = new Configuration();
+    DFSClient client = null;
+    try {
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
+      client =
+          new DFSClient(cluster.getNameNode().getServiceRpcAddress(), conf);
+      String file = "/test/f1";
+
+      createCommittedNotCompleteFile(client, file);
+      waitLeaseRecovery(cluster);
+
+      GenericTestUtils.waitFor(() -> {
+        String holder = NameNodeAdapter
+            .getLeaseHolderForPath(cluster.getNameNode(), file);
+        return holder == null;
+      }, 100, 10000);
+
+    } finally {
+      if (cluster != null) {
+        cluster.shutdown();
+        cluster = null;
+      }
+      if (client != null) {
+        client.close();
+      }
+    }
+  }
+
+  private void createCommittedNotCompleteFile(DFSClient client, String file)
+      throws IOException {
+    HdfsFileStatus stat = client.getNamenode()
+        .create(file, new FsPermission("777"), "test client",
+            new EnumSetWritable<CreateFlag>(EnumSet.of(CreateFlag.CREATE)),
+            true, (short) 1, 1024 * 1024 * 128L,
+            new CryptoProtocolVersion[0], null, null);
+    // Add a block to the file
+    LocatedBlock blk = client.getNamenode()
+        .addBlock(file, "test client", null,
+            new DatanodeInfo[0], stat.getFileId(), new String[0], null);
+    // Without writing anything to the file, or setting up the DN pipeline
+    // attempt to close the file. This will fail (return false) as the NN will
+    // be expecting the registered block to be reported from the DNs via IBR,
+    // but that will never happen, as the pipeline was never established
+    boolean closed = client.getNamenode().complete(
+        file, "test client", blk.getBlock(), stat.getFileId());
+    assertEquals(false, closed);
+  }
+
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org