You are viewing a plain text version of this content. The canonical link for it is here.
Posted to yarn-commits@hadoop.apache.org by jl...@apache.org on 2013/04/19 07:13:50 UTC

svn commit: r1469686 - in /hadoop/common/branches/branch-0.23/hadoop-yarn-project: ./ hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ hadoop-yarn/hadoop-yarn-server/hadoop-yarn-serv...

Author: jlowe
Date: Fri Apr 19 05:13:49 2013
New Revision: 1469686

URL: http://svn.apache.org/r1469686
Log:
svn merge -c 1460808 FIXES: YARN-71. Fix the NodeManager to clean up local-dirs on restart. Contributed by Xuan Gong

Added:
    hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerReboot.java
      - copied unchanged from r1460808, hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerReboot.java
Modified:
    hadoop/common/branches/branch-0.23/hadoop-yarn-project/CHANGES.txt
    hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
    hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java

Modified: hadoop/common/branches/branch-0.23/hadoop-yarn-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-yarn-project/CHANGES.txt?rev=1469686&r1=1469685&r2=1469686&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-yarn-project/CHANGES.txt (original)
+++ hadoop/common/branches/branch-0.23/hadoop-yarn-project/CHANGES.txt Fri Apr 19 05:13:49 2013
@@ -15,6 +15,9 @@ Release 0.23.8 - UNRELEASED
     YARN-476. ProcfsBasedProcessTree info message confuses users. 
     (sandyr via tucu)
 
+    YARN-71. Fix the NodeManager to clean up local-dirs on restart.
+    (Xuan Gong via sseth)
+
 Release 0.23.7 - 2013-04-18
 
   INCOMPATIBLE CHANGES

Modified: hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java?rev=1469686&r1=1469685&r2=1469686&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java (original)
+++ hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java Fri Apr 19 05:13:49 2013
@@ -58,6 +58,8 @@ import org.apache.hadoop.yarn.service.Co
 import org.apache.hadoop.yarn.service.Service;
 import org.apache.hadoop.yarn.util.Records;
 
+import com.google.common.annotations.VisibleForTesting;
+
 public class NodeManager extends CompositeService 
     implements EventHandler<NodeManagerEvent> {
 
@@ -113,6 +115,10 @@ public class NodeManager extends Composi
     return new WebServer(nmContext, resourceView, aclsManager, dirsHandler);
   }
 
+  protected DeletionService createDeletionService(ContainerExecutor exec) {
+    return new DeletionService(exec);
+  }
+
   protected void doSecureLogin() throws IOException {
     SecurityUtil.login(getConfig(), YarnConfiguration.NM_KEYTAB,
         YarnConfiguration.NM_PRINCIPAL);
@@ -143,7 +149,7 @@ public class NodeManager extends Composi
     } catch (IOException e) {
       throw new YarnException("Failed to initialize container executor", e);
     }    
-    DeletionService del = new DeletionService(exec);
+    DeletionService del = createDeletionService(exec);
     addService(del);
 
     // NodeManager level dispatcher
@@ -351,6 +357,11 @@ public class NodeManager extends Composi
     return containerManager;
   }
 
+  @VisibleForTesting
+  Context getNMContext() {
+    return this.context;
+  }
+
   public static void main(String[] args) {
     Thread.setDefaultUncaughtExceptionHandler(new YarnUncaughtExceptionHandler());
     StringUtils.startupShutdownMessage(NodeManager.class, args, LOG);

Modified: hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java?rev=1469686&r1=1469685&r2=1469686&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java (original)
+++ hadoop/common/branches/branch-0.23/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java Fri Apr 19 05:13:49 2013
@@ -22,6 +22,7 @@ import static org.apache.hadoop.fs.Creat
 
 import java.io.DataOutputStream;
 import java.io.File;
+import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.net.InetSocketAddress;
 import java.net.URISyntaxException;
@@ -53,8 +54,10 @@ import org.apache.commons.logging.LogFac
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.fs.FileContext;
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.ipc.Server;
 import org.apache.hadoop.security.Credentials;
@@ -175,9 +178,11 @@ public class ResourceLocalizationService
     this.recordFactory = RecordFactoryProvider.getRecordFactory(conf);
 
     try {
-      // TODO queue deletions here, rather than NM init?
       FileContext lfs = getLocalFileContext(conf);
       lfs.setUMask(new FsPermission((short)FsPermission.DEFAULT_UMASK));
+
+      cleanUpLocalDir(lfs,delService);
+
       List<String> localDirs = dirsHandler.getLocalDirs();
       for (String localDir : localDirs) {
         // $local/usercache
@@ -926,4 +931,76 @@ public class ResourceLocalizationService
 
   }
 
+  private void cleanUpLocalDir(FileContext lfs, DeletionService del) {
+    long currentTimeStamp = System.currentTimeMillis();
+    for (String localDir : dirsHandler.getLocalDirs()) {
+      renameLocalDir(lfs, localDir, ContainerLocalizer.USERCACHE,
+          currentTimeStamp);
+      renameLocalDir(lfs, localDir, ContainerLocalizer.FILECACHE,
+          currentTimeStamp);
+      renameLocalDir(lfs, localDir, ResourceLocalizationService.NM_PRIVATE_DIR,
+          currentTimeStamp);
+      try {
+        deleteLocalDir(lfs, del, localDir);
+      } catch (IOException e) {
+        // Do nothing, just give the warning
+        LOG.warn("Failed to delete localDir: " + localDir);
+      }
+    }
+  }
+
+  private void renameLocalDir(FileContext lfs, String localDir,
+      String localSubDir, long currentTimeStamp) {
+    try {
+      lfs.rename(new Path(localDir, localSubDir), new Path(
+          localDir, localSubDir + "_DEL_" + currentTimeStamp));
+    } catch (FileNotFoundException ex) {
+      // No need to handle this exception
+      // localSubDir may not be exist
+    } catch (Exception ex) {
+      // Do nothing, just give the warning
+      LOG.warn("Failed to rename the local file under " +
+          localDir + "/" + localSubDir);
+    }
+  }
+
+  private void deleteLocalDir(FileContext lfs, DeletionService del,
+      String localDir) throws IOException {
+    RemoteIterator<FileStatus> fileStatus = lfs.listStatus(new Path(localDir));
+    if (fileStatus != null) {
+      while (fileStatus.hasNext()) {
+        FileStatus status = fileStatus.next();
+        try {
+          if (status.getPath().getName().matches(".*" +
+              ContainerLocalizer.USERCACHE + "_DEL_.*")) {
+            cleanUpFilesFromSubDir(lfs, del, status.getPath());
+          } else if (status.getPath().getName()
+              .matches(".*" + NM_PRIVATE_DIR + "_DEL_.*")
+              ||
+              status.getPath().getName()
+                  .matches(".*" + ContainerLocalizer.FILECACHE + "_DEL_.*")) {
+            del.delete(null, status.getPath(), new Path[] {});
+          }
+        } catch (IOException ex) {
+          // Do nothing, just give the warning
+          LOG.warn("Failed to delete this local Directory: " +
+              status.getPath().getName());
+        }
+      }
+    }
+  }
+
+  private void cleanUpFilesFromSubDir(FileContext lfs, DeletionService del,
+      Path dirPath) throws IOException {
+    RemoteIterator<FileStatus> fileStatus = lfs.listStatus(dirPath);
+    if (fileStatus != null) {
+      while (fileStatus.hasNext()) {
+        FileStatus status = fileStatus.next();
+        String owner = status.getOwner();
+        del.delete(owner, status.getPath(), new Path[] {});
+      }
+    }
+    del.delete(null, dirPath, new Path[] {});
+  }
+
 }