You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by st...@apache.org on 2011/06/07 19:44:27 UTC
svn commit: r1133110 - in /hadoop/hdfs/branches/branch-0.22: ./
src/java/org/apache/hadoop/hdfs/ src/java/org/apache/hadoop/hdfs/protocol/
src/java/org/apache/hadoop/hdfs/server/namenode/
src/test/hdfs/org/apache/hadoop/hdfs/
Author: stack
Date: Tue Jun 7 17:44:27 2011
New Revision: 1133110
URL: http://svn.apache.org/viewvc?rev=1133110&view=rev
Log:
HDFS-1948 Forward port 'hdfs-1520 lightweight namenode operation to trigger lease reccovery'
Modified:
hadoop/hdfs/branches/branch-0.22/CHANGES.txt
hadoop/hdfs/branches/branch-0.22/src/java/org/apache/hadoop/hdfs/DFSClient.java
hadoop/hdfs/branches/branch-0.22/src/java/org/apache/hadoop/hdfs/DistributedFileSystem.java
hadoop/hdfs/branches/branch-0.22/src/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java
hadoop/hdfs/branches/branch-0.22/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
hadoop/hdfs/branches/branch-0.22/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
hadoop/hdfs/branches/branch-0.22/src/test/hdfs/org/apache/hadoop/hdfs/TestLeaseRecovery2.java
Modified: hadoop/hdfs/branches/branch-0.22/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hdfs/branches/branch-0.22/CHANGES.txt?rev=1133110&r1=1133109&r2=1133110&view=diff
==============================================================================
--- hadoop/hdfs/branches/branch-0.22/CHANGES.txt (original)
+++ hadoop/hdfs/branches/branch-0.22/CHANGES.txt Tue Jun 7 17:44:27 2011
@@ -257,6 +257,9 @@ Release 0.22.0 - Unreleased
HDFS-1619. Remove AC_TYPE* from the libhdfs. (Roman Shaposhnik via eli)
+ HDFS-1948 Forward port 'hdfs-1520 lightweight namenode operation to
+ trigger lease reccovery' (stack)
+
OPTIMIZATIONS
HDFS-1140. Speedup INode.getPathComponents. (Dmytro Molkov via shv)
Modified: hadoop/hdfs/branches/branch-0.22/src/java/org/apache/hadoop/hdfs/DFSClient.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/branches/branch-0.22/src/java/org/apache/hadoop/hdfs/DFSClient.java?rev=1133110&r1=1133109&r2=1133110&view=diff
==============================================================================
--- hadoop/hdfs/branches/branch-0.22/src/java/org/apache/hadoop/hdfs/DFSClient.java (original)
+++ hadoop/hdfs/branches/branch-0.22/src/java/org/apache/hadoop/hdfs/DFSClient.java Tue Jun 7 17:44:27 2011
@@ -462,6 +462,23 @@ public class DFSClient implements FSCons
}
/**
+ * Recover a file's lease
+ * @param src a file's path
+ * @return true if the file is already closed
+ * @throws IOException
+ */
+ boolean recoverLease(String src) throws IOException {
+ checkOpen();
+
+ try {
+ return namenode.recoverLease(src, clientName);
+ } catch (RemoteException re) {
+ throw re.unwrapRemoteException(FileNotFoundException.class,
+ AccessControlException.class);
+ }
+ }
+
+ /**
* Get block location info about file
*
* getBlockLocations() returns a list of hostnames that store
Modified: hadoop/hdfs/branches/branch-0.22/src/java/org/apache/hadoop/hdfs/DistributedFileSystem.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/branches/branch-0.22/src/java/org/apache/hadoop/hdfs/DistributedFileSystem.java?rev=1133110&r1=1133109&r2=1133110&view=diff
==============================================================================
--- hadoop/hdfs/branches/branch-0.22/src/java/org/apache/hadoop/hdfs/DistributedFileSystem.java (original)
+++ hadoop/hdfs/branches/branch-0.22/src/java/org/apache/hadoop/hdfs/DistributedFileSystem.java Tue Jun 7 17:44:27 2011
@@ -217,6 +217,17 @@ public class DistributedFileSystem exten
this.verifyChecksum = verifyChecksum;
}
+ /**
+ * Start the lease recovery of a file
+ *
+ * @param f a file
+ * @return true if the file is already closed
+ * @throws IOException if an error occurs
+ */
+ public boolean recoverLease(Path f) throws IOException {
+ return dfs.recoverLease(getPathName(f));
+ }
+
@SuppressWarnings("deprecation")
@Override
public FSDataInputStream open(Path f, int bufferSize) throws IOException {
Modified: hadoop/hdfs/branches/branch-0.22/src/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/branches/branch-0.22/src/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java?rev=1133110&r1=1133109&r2=1133110&view=diff
==============================================================================
--- hadoop/hdfs/branches/branch-0.22/src/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java (original)
+++ hadoop/hdfs/branches/branch-0.22/src/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java Tue Jun 7 17:44:27 2011
@@ -67,9 +67,9 @@ public interface ClientProtocol extends
* Compared to the previous version the following changes have been introduced:
* (Only the latest change is reflected.
* The log of historical changes can be retrieved from the svn).
- * 64: Remove ClientProtocol changes related to -list-corruptfiles
+ * 65: recoverLease return if the file is closed or not
*/
- public static final long versionID = 64L;
+ public static final long versionID = 65L;
///////////////////////////////////////
// File contents
@@ -513,6 +513,17 @@ public interface ClientProtocol extends
public void renewLease(String clientName) throws AccessControlException,
IOException;
+ /**
+ * Start lease recovery.
+ * Lightweight NameNode operation to trigger lease recovery
+ *
+ * @param src path of the file to start lease recovery
+ * @param clientName name of the current client
+ * @return true if the file is already closed
+ * @throws IOException
+ */
+ public boolean recoverLease(String src, String clientName) throws IOException;
+
public int GET_STATS_CAPACITY_IDX = 0;
public int GET_STATS_USED_IDX = 1;
public int GET_STATS_REMAINING_IDX = 2;
Modified: hadoop/hdfs/branches/branch-0.22/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/branches/branch-0.22/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1133110&r1=1133109&r2=1133110&view=diff
==============================================================================
--- hadoop/hdfs/branches/branch-0.22/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java (original)
+++ hadoop/hdfs/branches/branch-0.22/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Tue Jun 7 17:44:27 2011
@@ -1335,62 +1335,7 @@ public class FSNamesystem implements FSC
try {
INode myFile = dir.getFileINode(src);
- if (myFile != null && myFile.isUnderConstruction()) {
- INodeFileUnderConstruction pendingFile = (INodeFileUnderConstruction) myFile;
- //
- // If the file is under construction , then it must be in our
- // leases. Find the appropriate lease record.
- //
- Lease lease = leaseManager.getLeaseByPath(src);
- if (lease == null) {
- throw new AlreadyBeingCreatedException(
- "failed to create file " + src + " for " + holder +
- " on client " + clientMachine +
- " because pendingCreates is non-null but no leases found.");
- }
- //
- // We found the lease for this file. And surprisingly the original
- // holder is trying to recreate this file. This should never occur.
- //
- if (lease.getHolder().equals(holder)) {
- throw new AlreadyBeingCreatedException(
- "failed to create file " + src + " for " + holder +
- " on client " + clientMachine +
- " because current leaseholder is trying to recreate file.");
- }
- assert lease.getHolder().equals(pendingFile.getClientName()) :
- "Current lease holder " + lease.getHolder() +
- " does not match file creator " + pendingFile.getClientName();
- //
- // Current lease holder is different from the requester.
- // If the original holder has not renewed in the last SOFTLIMIT
- // period, then start lease recovery, otherwise fail.
- //
- if (lease.expiredSoftLimit()) {
- LOG.info("startFile: recover lease " + lease + ", src=" + src);
- boolean isClosed = internalReleaseLease(lease, src, null);
- if(!isClosed)
- throw new RecoveryInProgressException(
- "Failed to close file " + src +
- ". Lease recovery is in progress. Try again later.");
-
- } else {
- BlockInfoUnderConstruction lastBlock=pendingFile.getLastBlock();
- if(lastBlock != null && lastBlock.getBlockUCState() ==
- BlockUCState.UNDER_RECOVERY) {
- throw new RecoveryInProgressException(
- "Recovery in progress, file [" + src + "], " +
- "lease owner [" + lease.getHolder() + "]");
- } else {
- throw new AlreadyBeingCreatedException(
- "Failed to create file [" + src + "] for [" + holder +
- "] on client [" + clientMachine +
- "], because this file is already being created by [" +
- pendingFile.getClientName() + "] on [" +
- pendingFile.getClientMachine() + "]");
- }
- }
- }
+ recoverLeaseInternal(myFile, src, holder, clientMachine, false);
try {
blockManager.verifyReplication(src, replication, clientMachine);
@@ -1484,6 +1429,120 @@ public class FSNamesystem implements FSC
}
/**
+ * Recover lease;
+ * Immediately revoke the lease of the current lease holder and start lease
+ * recovery so that the file can be forced to be closed.
+ *
+ * @param src the path of the file to start lease recovery
+ * @param holder the lease holder's name
+ * @param clientMachine the client machine's name
+ * @return true if the file is already closed
+ * @throws IOException
+ */
+ synchronized boolean recoverLease(String src, String holder, String clientMachine)
+ throws IOException {
+ if (isInSafeMode()) {
+ throw new SafeModeException(
+ "Cannot recover the lease of " + src, safeMode);
+ }
+ if (!DFSUtil.isValidName(src)) {
+ throw new IOException("Invalid file name: " + src);
+ }
+
+ INode inode = dir.getFileINode(src);
+ if (inode == null) {
+ throw new FileNotFoundException("File not found " + src);
+ }
+
+ if (!inode.isUnderConstruction()) {
+ return true;
+ }
+ if (isPermissionEnabled) {
+ checkPathAccess(src, FsAction.WRITE);
+ }
+
+ recoverLeaseInternal(inode, src, holder, clientMachine, true);
+ return false;
+ }
+
+ private void recoverLeaseInternal(INode fileInode,
+ String src, String holder, String clientMachine, boolean force)
+ throws IOException {
+ if (fileInode != null && fileInode.isUnderConstruction()) {
+ INodeFileUnderConstruction pendingFile = (INodeFileUnderConstruction) fileInode;
+ //
+ // If the file is under construction , then it must be in our
+ // leases. Find the appropriate lease record.
+ //
+ Lease lease = leaseManager.getLease(holder);
+ //
+ // We found the lease for this file. And surprisingly the original
+ // holder is trying to recreate this file. This should never occur.
+ //
+ if (!force && lease != null) {
+ Lease leaseFile = leaseManager.getLeaseByPath(src);
+ if ((leaseFile != null && leaseFile.equals(lease)) ||
+ lease.getHolder().equals(holder)) {
+ throw new AlreadyBeingCreatedException(
+ "failed to create file " + src + " for " + holder +
+ " on client " + clientMachine +
+ " because current leaseholder is trying to recreate file.");
+ }
+ }
+ //
+ // Find the original holder.
+ //
+ lease = leaseManager.getLease(pendingFile.getClientName());
+ if (lease == null) {
+ throw new AlreadyBeingCreatedException(
+ "failed to create file " + src + " for " + holder +
+ " on client " + clientMachine +
+ " because pendingCreates is non-null but no leases found.");
+ }
+ if (force) {
+ // close now: no need to wait for soft lease expiration and
+ // close only the file src
+ LOG.info("recoverLease: recover lease " + lease + ", src=" + src +
+ " from client " + pendingFile.getClientName());
+ internalReleaseLease(lease, src, holder);
+ } else {
+ assert lease.getHolder().equals(pendingFile.getClientName()) :
+ "Current lease holder " + lease.getHolder() +
+ " does not match file creator " + pendingFile.getClientName();
+ //
+ // If the original holder has not renewed in the last SOFTLIMIT
+ // period, then start lease recovery.
+ //
+ if (lease.expiredSoftLimit()) {
+ LOG.info("startFile: recover lease " + lease + ", src=" + src +
+ " from client " + pendingFile.getClientName());
+ boolean isClosed = internalReleaseLease(lease, src, null);
+ if(!isClosed)
+ throw new RecoveryInProgressException(
+ "Failed to close file " + src +
+ ". Lease recovery is in progress. Try again later.");
+ } else {
+ BlockInfoUnderConstruction lastBlock=pendingFile.getLastBlock();
+ if(lastBlock != null && lastBlock.getBlockUCState() ==
+ BlockUCState.UNDER_RECOVERY) {
+ throw new RecoveryInProgressException(
+ "Recovery in progress, file [" + src + "], " +
+ "lease owner [" + lease.getHolder() + "]");
+ } else {
+ throw new AlreadyBeingCreatedException(
+ "Failed to create file [" + src + "] for [" + holder +
+ "] on client [" + clientMachine +
+ "], because this file is already being created by [" +
+ pendingFile.getClientName() + "] on [" +
+ pendingFile.getClientMachine() + "]");
+ }
+ }
+ }
+ }
+
+ }
+
+ /**
* Append to an existing file in the namespace.
*/
LocatedBlock appendFile(String src, String holder, String clientMachine)
Modified: hadoop/hdfs/branches/branch-0.22/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/branches/branch-0.22/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java?rev=1133110&r1=1133109&r2=1133110&view=diff
==============================================================================
--- hadoop/hdfs/branches/branch-0.22/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java (original)
+++ hadoop/hdfs/branches/branch-0.22/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java Tue Jun 7 17:44:27 2011
@@ -786,6 +786,12 @@ public class NameNode implements Namenod
}
/** {@inheritDoc} */
+ public boolean recoverLease(String src, String clientName) throws IOException {
+ String clientMachine = getClientMachine();
+ return namesystem.recoverLease(src, clientName, clientMachine);
+ }
+
+ /** {@inheritDoc} */
public boolean setReplication(String src, short replication)
throws IOException {
return namesystem.setReplication(src, replication);
Modified: hadoop/hdfs/branches/branch-0.22/src/test/hdfs/org/apache/hadoop/hdfs/TestLeaseRecovery2.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/branches/branch-0.22/src/test/hdfs/org/apache/hadoop/hdfs/TestLeaseRecovery2.java?rev=1133110&r1=1133109&r2=1133110&view=diff
==============================================================================
--- hadoop/hdfs/branches/branch-0.22/src/test/hdfs/org/apache/hadoop/hdfs/TestLeaseRecovery2.java (original)
+++ hadoop/hdfs/branches/branch-0.22/src/test/hdfs/org/apache/hadoop/hdfs/TestLeaseRecovery2.java Tue Jun 7 17:44:27 2011
@@ -17,7 +17,9 @@
*/
package org.apache.hadoop.hdfs;
-import static org.junit.Assert.*;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
import java.io.IOException;
import java.util.HashMap;
@@ -25,6 +27,7 @@ import java.util.Map;
import org.apache.commons.logging.impl.Log4JLogger;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -86,7 +89,147 @@ public class TestLeaseRecovery2 {
IOUtils.closeStream(dfs);
if (cluster != null) {cluster.shutdown();}
}
-
+
+ /**
+ * Test the NameNode's revoke lease on current lease holder function.
+ * @throws Exception
+ */
+ @Test
+ public void testImmediateRecoveryOfLease() throws Exception {
+ //create a file
+ // write bytes into the file.
+ byte [] actual = new byte[FILE_SIZE];
+ int size = AppendTestUtil.nextInt(FILE_SIZE);
+ Path filepath = createFile("/immediateRecoverLease-shortlease", size, true);
+ // set the soft limit to be 1 second so that the
+ // namenode triggers lease recovery on next attempt to write-for-open.
+ cluster.setLeasePeriod(SHORT_LEASE_PERIOD, LONG_LEASE_PERIOD);
+
+ recoverLeaseUsingCreate(filepath);
+ verifyFile(dfs, filepath, actual, size);
+
+ //test recoverLease
+ // set the soft limit to be 1 hour but recoverLease should
+ // close the file immediately
+ cluster.setLeasePeriod(LONG_LEASE_PERIOD, LONG_LEASE_PERIOD);
+ size = AppendTestUtil.nextInt(FILE_SIZE);
+ filepath = createFile("/immediateRecoverLease-longlease", size, false);
+
+ // test recoverLese from a different client
+ recoverLease(filepath, null);
+ verifyFile(dfs, filepath, actual, size);
+
+ // test recoverlease from the same client
+ size = AppendTestUtil.nextInt(FILE_SIZE);
+ filepath = createFile("/immediateRecoverLease-sameclient", size, false);
+
+ // create another file using the same client
+ Path filepath1 = new Path(filepath.toString() + AppendTestUtil.nextInt());
+ FSDataOutputStream stm = dfs.create(filepath1, true, BUF_SIZE,
+ REPLICATION_NUM, BLOCK_SIZE);
+
+ // recover the first file
+ recoverLease(filepath, dfs);
+ verifyFile(dfs, filepath, actual, size);
+
+ // continue to write to the second file
+ stm.write(buffer, 0, size);
+ stm.close();
+ verifyFile(dfs, filepath1, actual, size);
+ }
+
+ private Path createFile(final String filestr, final int size,
+ final boolean triggerLeaseRenewerInterrupt)
+ throws IOException, InterruptedException {
+ AppendTestUtil.LOG.info("filestr=" + filestr);
+ Path filepath = new Path(filestr);
+ FSDataOutputStream stm = dfs.create(filepath, true, BUF_SIZE,
+ REPLICATION_NUM, BLOCK_SIZE);
+ assertTrue(dfs.dfs.exists(filestr));
+
+ AppendTestUtil.LOG.info("size=" + size);
+ stm.write(buffer, 0, size);
+
+ // hflush file
+ AppendTestUtil.LOG.info("hflush");
+ stm.hflush();
+
+ if (triggerLeaseRenewerInterrupt) {
+ AppendTestUtil.LOG.info("leasechecker.interruptAndJoin()");
+ dfs.dfs.leasechecker.interruptAndJoin();
+ }
+ return filepath;
+ }
+
+ private void recoverLease(Path filepath, DistributedFileSystem dfs)
+ throws Exception {
+ if (dfs == null) {
+ dfs = (DistributedFileSystem)getFSAsAnotherUser(conf);
+ }
+
+ while (!dfs.recoverLease(filepath)) {
+ AppendTestUtil.LOG.info("sleep " + 5000 + "ms");
+ Thread.sleep(5000);
+ }
+ }
+
+ private FileSystem getFSAsAnotherUser(final Configuration c)
+ throws IOException, InterruptedException {
+ return FileSystem.get(FileSystem.getDefaultUri(c), c,
+ UserGroupInformation.createUserForTesting(fakeUsername,
+ new String [] {fakeGroup}).getUserName());
+ }
+
+ private void recoverLeaseUsingCreate(Path filepath)
+ throws IOException, InterruptedException {
+ FileSystem dfs2 = getFSAsAnotherUser(conf);
+
+ boolean done = false;
+ for(int i = 0; i < 10 && !done; i++) {
+ AppendTestUtil.LOG.info("i=" + i);
+ try {
+ dfs2.create(filepath, false, BUF_SIZE, (short)1, BLOCK_SIZE);
+ fail("Creation of an existing file should never succeed.");
+ } catch (IOException ioe) {
+ final String message = ioe.getMessage();
+ if (message.contains("file exists")) {
+ AppendTestUtil.LOG.info("done", ioe);
+ done = true;
+ }
+ else if (message.contains(AlreadyBeingCreatedException.class.getSimpleName())) {
+ AppendTestUtil.LOG.info("GOOD! got " + message);
+ }
+ else {
+ AppendTestUtil.LOG.warn("UNEXPECTED IOException", ioe);
+ }
+ }
+
+ if (!done) {
+ AppendTestUtil.LOG.info("sleep " + 5000 + "ms");
+ try {Thread.sleep(5000);} catch (InterruptedException e) {}
+ }
+ }
+ assertTrue(done);
+ }
+
+ private void verifyFile(FileSystem dfs, Path filepath, byte[] actual,
+ int size) throws IOException {
+ AppendTestUtil.LOG.info("Lease for file " + filepath + " is recovered. "
+ + "Validating its contents now...");
+
+ // verify that file-size matches
+ assertTrue("File should be " + size + " bytes, but is actually " +
+ " found to be " + dfs.getFileStatus(filepath).getLen() +
+ " bytes",
+ dfs.getFileStatus(filepath).getLen() == size);
+
+ // verify that there is enough data to read.
+ System.out.println("File size is good. Now validating sizes from datanodes...");
+ FSDataInputStream stmin = dfs.open(filepath);
+ stmin.readFully(0, actual, 0, size);
+ stmin.close();
+ }
+
/**
* This test makes the client does not renew its lease and also
* set the hard lease expiration period to be short 1s. Thus triggering