You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cu...@apache.org on 2006/10/26 21:55:36 UTC

svn commit: r468107 - in /lucene/hadoop/trunk: CHANGES.txt src/java/org/apache/hadoop/dfs/DFSClient.java src/java/org/apache/hadoop/dfs/FSConstants.java src/java/org/apache/hadoop/dfs/FSNamesystem.java

Author: cutting
Date: Thu Oct 26 12:55:35 2006
New Revision: 468107

URL: http://svn.apache.org/viewvc?view=rev&rev=468107
Log:
HADOOP-563. Improve NameNode lease policy.  Contributed by Dhruba.

Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSConstants.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?view=diff&rev=468107&r1=468106&r2=468107
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Thu Oct 26 12:55:35 2006
@@ -50,6 +50,14 @@
 14. HADOOP-627.  Fix some synchronization problems in MiniMRCluster
     that sometimes caused unit tests to fail.  (Nigel Daley via cutting)
 
+15. HADOOP-563.  Improve the NameNode's lease policy so that leases
+    are held for one hour without renewal (instead of one minute).
+    However another attempt to create the same file will still succeed
+    if the lease has not been renewed within a minute.  This prevents
+    communication or scheduling problems from causing a write to fail
+    for up to an hour, barring some other process trying to create the
+    same file.  (Dhruba Borthakur via cutting)
+
 
 Release 0.7.2 - 2006-10-18
 

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java?view=diff&rev=468107&r1=468106&r2=468107
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java Thu Oct 26 12:55:35 2006
@@ -445,7 +445,7 @@
         public void run() {
             long lastRenewed = 0;
             while (running) {
-                if (System.currentTimeMillis() - lastRenewed > (LEASE_PERIOD / 2)) {
+                if (System.currentTimeMillis() - lastRenewed > (LEASE_SOFTLIMIT_PERIOD / 2)) {
                     try {
                       if( pendingCreates.size() > 0 )
                         namenode.renewLease(clientName);
@@ -1004,7 +1004,7 @@
                   // wait and try again.
                   LOG.info(StringUtils.stringifyException(e));
                   try {
-                    Thread.sleep(LEASE_PERIOD);
+                    Thread.sleep(LEASE_SOFTLIMIT_PERIOD);
                   } catch (InterruptedException ie) {
                   }
                 }

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSConstants.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSConstants.java?view=diff&rev=468107&r1=468106&r2=468107
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSConstants.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSConstants.java Thu Oct 26 12:55:35 2006
@@ -103,7 +103,8 @@
     public static long HEARTBEAT_INTERVAL = 3;
     public static long EXPIRE_INTERVAL = 10 * 60 * 1000;
     public static long BLOCKREPORT_INTERVAL = 60 * 60 * 1000;
-    public static long LEASE_PERIOD = 60 * 1000;
+    public static final long LEASE_SOFTLIMIT_PERIOD = 60 * 1000;
+    public static final long LEASE_HARDLIMIT_PERIOD = 60 * LEASE_SOFTLIMIT_PERIOD;
     public static int READ_TIMEOUT = 60 * 1000;
 
     // We need to limit the length and depth of a path in the filesystem.  HADOOP-438

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java?view=diff&rev=468107&r1=468106&r2=468107
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java Thu Oct 26 12:55:35 2006
@@ -414,11 +414,54 @@
         throw new IOException("Invalid file name: " + src);      	  
       }
       try {
-        if (pendingCreates.get(src) != null) {
-           throw new AlreadyBeingCreatedException(
-                   "failed to create file " + src + " for " + holder +
-                   " on client " + clientMachine + 
-                   " because pendingCreates is non-null.");
+        FileUnderConstruction pendingFile = (FileUnderConstruction ) 
+                                              pendingCreates.get(src);
+        if (pendingFile != null) {
+          //
+          // If the file exists in pendingCreate, then it must be in our
+          // leases. Find the appropriate lease record.
+          //
+          Lease lease = (Lease) leases.get(holder);
+          //
+          // We found the lease for this file. And surprisingly the original
+          // holder is trying to recreate this file. This should never occur.
+          //
+          if (lease != null) {
+            throw new AlreadyBeingCreatedException(
+                  "failed to create file " + src + " for " + holder +
+                  " on client " + clientMachine + 
+                  " because current leaseholder is trying to recreate file.");
+          }
+          //
+          // Find the original holder.
+          //
+          UTF8 oldholder = pendingFile.getClientName();
+          lease = (Lease) leases.get(oldholder);
+          if (lease == null) {
+            throw new AlreadyBeingCreatedException(
+                  "failed to create file " + src + " for " + holder +
+                  " on client " + clientMachine + 
+                  " because pendingCreates is non-null but no leases found.");
+          }
+          //
+          // If the original holder has not renewed in the last SOFTLIMIT 
+          // period, then reclaim all resources and allow this request 
+          // to proceed. Otherwise, prevent this request from creating file.
+          //
+          if (lease.expiredSoftLimit()) {
+            lease.releaseLocks();
+            leases.remove(lease.holder);
+            LOG.info("Removing lease " + lease + " ");
+            if (!sortedLeases.remove(lease)) {
+              LOG.error("Unknown failure trying to remove " + lease + 
+                       " from lease set.");
+            }
+          } else  {
+            throw new AlreadyBeingCreatedException(
+                  "failed to create file " + src + " for " + holder +
+                  " on client " + clientMachine + 
+                  " because pendingCreates is non-null.");
+          }
         }
 
         try {
@@ -929,12 +972,23 @@
         public void renew() {
             this.lastUpdate = now();
         }
-        public boolean expired() {
-            if (now() - lastUpdate > LEASE_PERIOD) {
+        /**
+         * Returns true if the Hard Limit Timer has expired
+         */
+        public boolean expiredHardLimit() {
+            if (now() - lastUpdate > LEASE_HARDLIMIT_PERIOD) {
                 return true;
-            } else {
-                return false;
             }
+            return false;
+        }
+        /**
+         * Returns true if the Soft Limit Timer has expired
+         */
+        public boolean expiredSoftLimit() {
+            if (now() - lastUpdate > LEASE_SOFTLIMIT_PERIOD) {
+                return true;
+            }
+            return false;
         }
         public void obtained(UTF8 src) {
             locks.add(src);
@@ -999,7 +1053,7 @@
                         Lease top;
                         while ((sortedLeases.size() > 0) &&
                                ((top = (Lease) sortedLeases.first()) != null)) {
-                            if (top.expired()) {
+                            if (top.expiredHardLimit()) {
                                 top.releaseLocks();
                                 leases.remove(top.holder);
                                 LOG.info("Removing lease " + top + ", leases remaining: " + sortedLeases.size());