You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by tu...@apache.org on 2013/01/10 01:52:16 UTC

svn commit: r1431167 - in /hadoop/common/branches/branch-2/hadoop-mapreduce-project: ./ hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/ hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/o...

Author: tucu
Date: Thu Jan 10 00:52:15 2013
New Revision: 1431167

URL: http://svn.apache.org/viewvc?rev=1431167&view=rev
Log:
MAPREDUCE-4907. TrackerDistributedCacheManager issues too many getFileStatus calls. (sandyr via tucu)

Modified:
    hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt
    hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobSubmitter.java
    hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/filecache/ClientDistributedCacheManager.java

Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt?rev=1431167&r1=1431166&r2=1431167&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt (original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt Thu Jan 10 00:52:15 2013
@@ -39,6 +39,9 @@ Release 2.0.3-alpha - Unreleased
     MAPREDUCE-4920. Use security token protobuf definition from hadoop common.
     (Suresh Srinivas via vinodkv)
 
+    MAPREDUCE-4907. TrackerDistributedCacheManager issues too many getFileStatus
+    calls. (sandyr via tucu)
+
   OPTIMIZATIONS
 
   BUG FIXES

Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobSubmitter.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobSubmitter.java?rev=1431167&r1=1431166&r2=1431167&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobSubmitter.java (original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobSubmitter.java Thu Jan 10 00:52:15 2013
@@ -251,9 +251,8 @@ class JobSubmitter {
     }
 
     //  set the timestamps of the archives and files
-    ClientDistributedCacheManager.determineTimestamps(conf);
     //  set the public/private visibility of the archives and files
-    ClientDistributedCacheManager.determineCacheVisibilities(conf);
+    ClientDistributedCacheManager.determineTimestampsAndCacheVisibilities(conf);
     // get DelegationToken for each cached file
     ClientDistributedCacheManager.getDelegationTokens(conf, job
         .getCredentials());

Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/filecache/ClientDistributedCacheManager.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/filecache/ClientDistributedCacheManager.java?rev=1431167&r1=1431166&r2=1431167&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/filecache/ClientDistributedCacheManager.java (original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/filecache/ClientDistributedCacheManager.java Thu Jan 10 00:52:15 2013
@@ -19,6 +19,8 @@ package org.apache.hadoop.mapreduce.file
 
 import java.io.IOException;
 import java.net.URI;
+import java.util.HashMap;
+import java.util.Map;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
@@ -39,6 +41,25 @@ public class ClientDistributedCacheManag
 
   /**
    * Determines timestamps of files to be cached, and stores those
+   * in the configuration. Determines the visibilities of the distributed cache
+   * files and archives. The visibility of a cache path is "public" if the leaf
+   * component has READ permissions for others, and the parent subdirs have 
+   * EXECUTE permissions for others.
+   * 
+   * This is an internal method!
+   * 
+   * @param job
+   * @throws IOException
+   */
+  public static void determineTimestampsAndCacheVisibilities(Configuration job)
+  throws IOException {
+    Map<URI, FileStatus> statCache = new HashMap<URI, FileStatus>();
+    determineTimestamps(job, statCache);
+    determineCacheVisibilities(job, statCache);
+  }
+  
+  /**
+   * Determines timestamps of files to be cached, and stores those
    * in the configuration.  This is intended to be used internally by JobClient
    * after all cache files have been added.
    * 
@@ -47,16 +68,17 @@ public class ClientDistributedCacheManag
    * @param job Configuration of a job.
    * @throws IOException
    */
-  public static void determineTimestamps(Configuration job) throws IOException {
+  public static void determineTimestamps(Configuration job,
+      Map<URI, FileStatus> statCache) throws IOException {
     URI[] tarchives = DistributedCache.getCacheArchives(job);
     if (tarchives != null) {
-      FileStatus status = getFileStatus(job, tarchives[0]);
+      FileStatus status = getFileStatus(job, tarchives[0], statCache);
       StringBuilder archiveFileSizes =
         new StringBuilder(String.valueOf(status.getLen()));
       StringBuilder archiveTimestamps =
         new StringBuilder(String.valueOf(status.getModificationTime()));
       for (int i = 1; i < tarchives.length; i++) {
-        status = getFileStatus(job, tarchives[i]);
+        status = getFileStatus(job, tarchives[i], statCache);
         archiveFileSizes.append(",");
         archiveFileSizes.append(String.valueOf(status.getLen()));
         archiveTimestamps.append(",");
@@ -68,13 +90,13 @@ public class ClientDistributedCacheManag
   
     URI[] tfiles = DistributedCache.getCacheFiles(job);
     if (tfiles != null) {
-      FileStatus status = getFileStatus(job, tfiles[0]);
+      FileStatus status = getFileStatus(job, tfiles[0], statCache);
       StringBuilder fileSizes =
         new StringBuilder(String.valueOf(status.getLen()));
       StringBuilder fileTimestamps = new StringBuilder(String.valueOf(
         status.getModificationTime()));
       for (int i = 1; i < tfiles.length; i++) {
-        status = getFileStatus(job, tfiles[i]);
+        status = getFileStatus(job, tfiles[i], statCache);
         fileSizes.append(",");
         fileSizes.append(String.valueOf(status.getLen()));
         fileTimestamps.append(",");
@@ -123,25 +145,25 @@ public class ClientDistributedCacheManag
    * @param job
    * @throws IOException
    */
-  public static void determineCacheVisibilities(Configuration job) 
-  throws IOException {
+  public static void determineCacheVisibilities(Configuration job,
+      Map<URI, FileStatus> statCache) throws IOException {
     URI[] tarchives = DistributedCache.getCacheArchives(job);
     if (tarchives != null) {
       StringBuilder archiveVisibilities =
-        new StringBuilder(String.valueOf(isPublic(job, tarchives[0])));
+        new StringBuilder(String.valueOf(isPublic(job, tarchives[0], statCache)));
       for (int i = 1; i < tarchives.length; i++) {
         archiveVisibilities.append(",");
-        archiveVisibilities.append(String.valueOf(isPublic(job, tarchives[i])));
+        archiveVisibilities.append(String.valueOf(isPublic(job, tarchives[i], statCache)));
       }
       setArchiveVisibilities(job, archiveVisibilities.toString());
     }
     URI[] tfiles = DistributedCache.getCacheFiles(job);
     if (tfiles != null) {
       StringBuilder fileVisibilities =
-        new StringBuilder(String.valueOf(isPublic(job, tfiles[0])));
+        new StringBuilder(String.valueOf(isPublic(job, tfiles[0], statCache)));
       for (int i = 1; i < tfiles.length; i++) {
         fileVisibilities.append(",");
-        fileVisibilities.append(String.valueOf(isPublic(job, tfiles[i])));
+        fileVisibilities.append(String.valueOf(isPublic(job, tfiles[i], statCache)));
       }
       setFileVisibilities(job, fileVisibilities.toString());
     }
@@ -193,19 +215,13 @@ public class ClientDistributedCacheManag
   }
 
   /**
-   * Returns {@link FileStatus} of a given cache file on hdfs.
-   * 
-   * @param conf configuration
-   * @param cache cache file 
-   * @return {@link FileStatus} of a given cache file on hdfs
-   * @throws IOException
+   * Gets the file status for the given URI.  If the URI is in the cache,
+   * returns it.  Otherwise, fetches it and adds it to the cache.
    */
-  static FileStatus getFileStatus(Configuration conf, URI cache)
-    throws IOException {
-    FileSystem fileSystem = FileSystem.get(cache, conf);
-    Path filePath = new Path(cache.getPath());
-
-    return fileSystem.getFileStatus(filePath);
+  private static FileStatus getFileStatus(Configuration job, URI uri,
+      Map<URI, FileStatus> statCache) throws IOException {
+    FileSystem fileSystem = FileSystem.get(uri, job);
+    return getFileStatus(fileSystem, uri, statCache);
   }
 
   /**
@@ -216,14 +232,15 @@ public class ClientDistributedCacheManag
    * @return true if the path in the uri is visible to all, false otherwise
    * @throws IOException
    */
-  static boolean isPublic(Configuration conf, URI uri) throws IOException {
+  static boolean isPublic(Configuration conf, URI uri,
+      Map<URI, FileStatus> statCache) throws IOException {
     FileSystem fs = FileSystem.get(uri, conf);
     Path current = new Path(uri.getPath());
     //the leaf level file should be readable by others
-    if (!checkPermissionOfOther(fs, current, FsAction.READ)) {
+    if (!checkPermissionOfOther(fs, current, FsAction.READ, statCache)) {
       return false;
     }
-    return ancestorsHaveExecutePermissions(fs, current.getParent());
+    return ancestorsHaveExecutePermissions(fs, current.getParent(), statCache);
   }
 
   /**
@@ -231,12 +248,12 @@ public class ClientDistributedCacheManag
    * permission set for all users (i.e. that other users can traverse
    * the directory heirarchy to the given path)
    */
-  static boolean ancestorsHaveExecutePermissions(FileSystem fs, Path path)
-    throws IOException {
+  static boolean ancestorsHaveExecutePermissions(FileSystem fs, Path path,
+      Map<URI, FileStatus> statCache) throws IOException {
     Path current = path;
     while (current != null) {
       //the subdirs in the path should have execute permissions for others
-      if (!checkPermissionOfOther(fs, current, FsAction.EXECUTE)) {
+      if (!checkPermissionOfOther(fs, current, FsAction.EXECUTE, statCache)) {
         return false;
       }
       current = current.getParent();
@@ -254,8 +271,8 @@ public class ClientDistributedCacheManag
    * @throws IOException
    */
   private static boolean checkPermissionOfOther(FileSystem fs, Path path,
-      FsAction action) throws IOException {
-    FileStatus status = fs.getFileStatus(path);
+      FsAction action, Map<URI, FileStatus> statCache) throws IOException {
+    FileStatus status = getFileStatus(fs, path.toUri(), statCache);
     FsPermission perms = status.getPermission();
     FsAction otherAction = perms.getOtherAction();
     if (otherAction.implies(action)) {
@@ -263,4 +280,14 @@ public class ClientDistributedCacheManag
     }
     return false;
   }
+
+  private static FileStatus getFileStatus(FileSystem fs, URI uri,
+      Map<URI, FileStatus> statCache) throws IOException {
+    FileStatus stat = statCache.get(uri);
+    if (stat == null) {
+      stat = fs.getFileStatus(new Path(uri));
+      statCache.put(uri, stat);
+    }
+    return stat;
+  }
 }