You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by om...@apache.org on 2008/08/12 23:20:41 UTC

svn commit: r685320 - in /hadoop/core/trunk: CHANGES.txt src/core/org/apache/hadoop/fs/LocalDirAllocator.java src/test/org/apache/hadoop/fs/TestLocalDirAllocator.java

Author: omalley
Date: Tue Aug 12 14:20:41 2008
New Revision: 685320

URL: http://svn.apache.org/viewvc?rev=685320&view=rev
Log:
HADOOP-9. Use roulette scheduling for temporary space when the size
is not known. (Ari Rabkin via omalley)

Modified:
    hadoop/core/trunk/CHANGES.txt
    hadoop/core/trunk/src/core/org/apache/hadoop/fs/LocalDirAllocator.java
    hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestLocalDirAllocator.java

Modified: hadoop/core/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=685320&r1=685319&r2=685320&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Tue Aug 12 14:20:41 2008
@@ -157,8 +157,11 @@
     the end of the heartbeat rpc, rather than the start. This causes better
     behavior if the JobTracker is overloaded. (acmurthy via omalley)
 
-		HADOOP-3853. Move multiple input format (HADOOP-372) extension to 
-		library package. (tomwhite via johan)
+    HADOOP-3853. Move multiple input format (HADOOP-372) extension to 
+    library package. (tomwhite via johan)
+
+    HADOOP-9. Use roulette scheduling for temporary space when the size
+    is not known. (Ari Rabkin via omalley)
 
   OPTIMIZATIONS
 

Modified: hadoop/core/trunk/src/core/org/apache/hadoop/fs/LocalDirAllocator.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/core/org/apache/hadoop/fs/LocalDirAllocator.java?rev=685320&r1=685319&r2=685320&view=diff
==============================================================================
--- hadoop/core/trunk/src/core/org/apache/hadoop/fs/LocalDirAllocator.java (original)
+++ hadoop/core/trunk/src/core/org/apache/hadoop/fs/LocalDirAllocator.java Tue Aug 12 14:20:41 2008
@@ -267,18 +267,22 @@
     }
     
     /** Get a path from the local FS. This method should be used if the size of 
-     *  the file is not known apriori. We go round-robin over the set of disks
-     *  (via the configured dirs) and return the first complete path where
-     *  we could create the parent directory of the passed path. 
+     *  the file is not known a priori. 
+     *  
+     *  It will use roulette selection, picking directories
+     *  with probability proportional to their available space. 
      */
     public synchronized Path getLocalPathForWrite(String path, 
         Configuration conf) throws IOException {
       return getLocalPathForWrite(path, -1, conf);
     }
 
-    /** Get a path from the local FS. Pass size as -1 if not known apriori. We
+    /** Get a path from the local FS. If size is known, we go
      *  round-robin over the set of disks (via the configured dirs) and return
-     *  the first complete path which has enough space 
+     *  the first complete path which has enough space.
+     *  
+     *  If size is not known, use roulette selection -- pick directories
+     *  with probability proportional to their available space.
      */
     public synchronized Path getLocalPathForWrite(String pathStr, long size, 
         Configuration conf) throws IOException {
@@ -291,20 +295,38 @@
         pathStr = pathStr.substring(1);
       }
       Path returnPath = null;
-      while (numDirsSearched < numDirs && returnPath == null) {
-        if (size >= 0) {
+      
+      if(size == -1) {  //do roulette selection: pick dir with probability 
+                    //proportional to available size
+        long[] availableOnDisk = new long[dirDF.length];
+        long totalAvailable = 0;
+        
+            //build the "roulette wheel"
+        for(int i =0; i < dirDF.length; ++i) {
+          availableOnDisk[i] = dirDF[i].getAvailable();
+          totalAvailable += availableOnDisk[i];
+        }
+            // "roll the ball" -- pick a directory
+        Random r = new java.util.Random();
+        long randomPosition = Math.abs(r.nextLong()) % totalAvailable;
+        int dir=0;
+        while(randomPosition > availableOnDisk[dir]) {
+          randomPosition -= availableOnDisk[dir];
+          dir++;
+        }
+        dirNumLastAccessed = dir;
+        returnPath = createPath(pathStr);
+      } else {
+        while (numDirsSearched < numDirs && returnPath == null) {
           long capacity = dirDF[dirNumLastAccessed].getAvailable();
           if (capacity > size) {
             returnPath = createPath(pathStr);
           }
-        } else {
-          returnPath = createPath(pathStr);
-        }
-        dirNumLastAccessed++;
-        dirNumLastAccessed = dirNumLastAccessed % numDirs; 
-        numDirsSearched++;
-      } 
-
+          dirNumLastAccessed++;
+          dirNumLastAccessed = dirNumLastAccessed % numDirs; 
+          numDirsSearched++;
+        } 
+      }
       if (returnPath != null) {
         return returnPath;
       }

Modified: hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestLocalDirAllocator.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestLocalDirAllocator.java?rev=685320&r1=685319&r2=685320&view=diff
==============================================================================
--- hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestLocalDirAllocator.java (original)
+++ hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestLocalDirAllocator.java Tue Aug 12 14:20:41 2008
@@ -39,10 +39,12 @@
   final static private File BUFFER_ROOT = new File(BUFFER_DIR_ROOT);
   final static private String BUFFER_DIR[] = new String[] {
     BUFFER_DIR_ROOT+"/tmp0",  BUFFER_DIR_ROOT+"/tmp1", BUFFER_DIR_ROOT+"/tmp2",
-    BUFFER_DIR_ROOT+"/tmp3", BUFFER_DIR_ROOT+"/tmp4"};
+    BUFFER_DIR_ROOT+"/tmp3", BUFFER_DIR_ROOT+"/tmp4", BUFFER_DIR_ROOT+"/tmp5",
+    BUFFER_DIR_ROOT+"/tmp6"};
   final static private Path BUFFER_PATH[] = new Path[] {
     new Path(BUFFER_DIR[0]), new Path(BUFFER_DIR[1]), new Path(BUFFER_DIR[2]),
-    new Path(BUFFER_DIR[3]), new Path(BUFFER_DIR[4])};
+    new Path(BUFFER_DIR[3]), new Path(BUFFER_DIR[4]), new Path(BUFFER_DIR[5]),
+    new Path(BUFFER_DIR[6])};
   final static private String CONTEXT = "dfs.client.buffer.dir";
   final static private String FILENAME = "block";
   final static private LocalDirAllocator dirAllocator = 
@@ -50,7 +52,7 @@
   static LocalFileSystem localFs;
   final static private boolean isWindows =
     System.getProperty("os.name").startsWith("Windows");
-  
+  final static int SMALL_FILE_SIZE = 100;
   static {
     try {
       localFs = FileSystem.getLocal(conf);
@@ -68,7 +70,7 @@
   }
   
   private void validateTempDirCreation(int i) throws IOException {
-    File result = createTempFile();
+    File result = createTempFile(SMALL_FILE_SIZE);
     assertTrue("Checking for " + BUFFER_DIR[i] + " in " + result + " - FAILED!", 
         result.getPath().startsWith(new File(BUFFER_DIR[i], FILENAME).getPath()));
   }
@@ -78,6 +80,13 @@
     result.delete();
     return result;
   }
+  
+  private File createTempFile(long size) throws IOException {
+    File result = dirAllocator.createTmpFileForWrite(FILENAME, size, conf);
+    result.delete();
+    return result;
+  }
+  
   /** Two buffer dirs. The first dir does not exist & is on a read-only disk; 
    * The second dir exists & is RW
    * @throws Exception
@@ -122,7 +131,7 @@
       conf.set(CONTEXT, BUFFER_DIR[2]+","+BUFFER_DIR[3]);
 
       // create the first file, and then figure the round-robin sequence
-      createTempFile();
+      createTempFile(SMALL_FILE_SIZE);
       int firstDirIdx = (dirAllocator.getCurrentDirectoryIndex() == 0) ? 2 : 3;
       int secondDirIdx = (firstDirIdx == 2) ? 3 : 2;
       
@@ -146,8 +155,8 @@
       assertTrue(localFs.mkdirs(BUFFER_PATH[3]));
       assertTrue(localFs.mkdirs(BUFFER_PATH[4]));
       
-      // create the first file, and then figure the round-robin sequence
-      createTempFile();
+      // create the first file with size, and then figure the round-robin sequence
+      createTempFile(SMALL_FILE_SIZE);
 
       int nextDirIdx = (dirAllocator.getCurrentDirectoryIndex() == 0) ? 3 : 4;
       validateTempDirCreation(nextDirIdx);
@@ -160,4 +169,43 @@
       rmBufferDirs();
     }
   }
+  
+  /**
+   * Two buffer dirs, on read-write disk.
+   * 
+   * Try to create a whole bunch of files.
+   *  Verify that they do indeed all get created where they should.
+   *  
+   *  Would ideally check statistical properties of distribution, but
+   *  we don't have the nerve to risk false-positives here.
+   * 
+   * @throws Exception
+   */
+  static final int TRIALS = 100;
+  public void test4() throws Exception {
+    if (isWindows) return;
+    try {
+
+      conf.set(CONTEXT, BUFFER_DIR[5]+","+BUFFER_DIR[6]);
+      assertTrue(localFs.mkdirs(BUFFER_PATH[5]));
+      assertTrue(localFs.mkdirs(BUFFER_PATH[6]));
+        
+      int inDir5=0, inDir6=0;
+      for(int i = 0; i < TRIALS; ++i) {
+        File result = createTempFile();
+        if(result.getPath().startsWith(new File(BUFFER_DIR[5], FILENAME).getPath())) {
+          inDir5++;
+        } else  if(result.getPath().startsWith(new File(BUFFER_DIR[6], FILENAME).getPath())) {
+          inDir6++;
+        }
+        result.delete();
+      }
+      
+      assertTrue( inDir5 + inDir6 == TRIALS);
+        
+    } finally {
+      rmBufferDirs();
+    }
+  }
+  
 }