You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by sz...@apache.org on 2008/09/11 22:23:18 UTC

svn commit: r694458 [2/2] - in /hadoop/core/trunk: ./ docs/ src/docs/src/documentation/content/xdocs/ src/test/org/apache/hadoop/fs/loadGenerator/

Added: hadoop/core/trunk/src/test/org/apache/hadoop/fs/loadGenerator/LoadGenerator.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/org/apache/hadoop/fs/loadGenerator/LoadGenerator.java?rev=694458&view=auto
==============================================================================
--- hadoop/core/trunk/src/test/org/apache/hadoop/fs/loadGenerator/LoadGenerator.java (added)
+++ hadoop/core/trunk/src/test/org/apache/hadoop/fs/loadGenerator/LoadGenerator.java Thu Sep 11 13:23:09 2008
@@ -0,0 +1,466 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.loadGenerator;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+import java.util.ArrayList;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/** The load generator is a tool for testing NameNode behavior under
+ * different client loads.
+ * It allows the user to generate different mixes of read, write,
+ * and list requests by specifying the probabilities of read and
+ * write. The user controls the the intensity of the load by
+ * adjusting parameters for the number of worker threads and the delay
+ * between operations. While load generators are running, the user
+ * can profile and monitor the running of the NameNode. When a load
+ * generator exits, it print some NameNode statistics like the average
+ * execution time of each kind of operations and the NameNode
+ * throughput.
+ * 
+ * After command line argument parsing and data initialization,
+ * the load generator spawns the number of worker threads 
+ * as specified by the user.
+ * Each thread sends a stream of requests to the NameNode.
+ * For each iteration, it first decides if it is going to read a file,
+ * create a file, or listing a directory following the read and write 
+ * probabilities specified by the user.
+ * When reading, it randomly picks a file in the test space and reads
+ * the entire file. When writing, it randomly picks a directory in the
+ * test space and creates a file whose name consists of the current 
+ * machine's host name and the thread id. The length of the file
+ * follows Gaussian distribution with an average size of 2 blocks and
+ * the standard deviation of 1 block. The new file is filled with 'a'.
+ * Immediately after the file creation completes, the file is deleted
+ * from the test space.
+ * While listing, it randomly picks a directory in the test space and
+ * list the directory content.
+ * Between two consecutive operations, the thread pauses for a random
+ * amount of time in the range of [0, maxDelayBetweenOps] 
+ * if the specified max delay is not zero.
+ * All threads are stopped when the specified elapsed time is passed.
+ * Before exiting, the program prints the average execution for 
+ * each kind of NameNode operations, and the number of requests
+ * served by the NameNode.
+ *
+ * The synopsis of the command is
+ * java LoadGenerator
+ *   -readProbability <read probability>: read probability [0, 1]
+ *                                        with a default value of 0.3333. 
+ *   -writeProbability <write probability>: write probability [0, 1]
+ *                                         with a default value of 0.3333.
+ *   -root <root>: test space with a default value of /testLoadSpace
+ *   -maxDelayBetweenOps <maxDelayBetweenOpsInMillis>: 
+ *      Max delay in the unit of milliseconds between two operations with a 
+ *      default value of 0 indicating no delay.
+ *   -numOfThreads <numOfThreads>: 
+ *      number of threads to spawn with a default value of 200.
+ *   -elapsedTime <elapsedTimeInSecs>: 
+ *      the elapsed time of program with a default value of 0 
+ *      indicating running forever
+ *   -startTime <startTimeInMillis> : when the threads start to run.
+ */
+public class LoadGenerator extends Configured implements Tool {
+  private volatile boolean shouldRun = true;
+  private Path root = DataGenerator.DEFAULT_ROOT;
+  private FileSystem fs;
+  private int maxDelayBetweenOps = 0;
+  private int numOfThreads = 200;
+  private double readPr = 0.3333;
+  private double writePr = 0.3333;
+  private long elapsedTime = 0;
+  private long startTime = System.currentTimeMillis()+10000;
+  final static private int BLOCK_SIZE = 10;
+  private ArrayList<String> files = new ArrayList<String>();  // a table of file names
+  private ArrayList<String> dirs = new ArrayList<String>(); // a table of directory names
+  private Random r = null;
+  final private static String USAGE = "java LoadGenerator\n" +
+  	"-readProbability <read probability>\n" +
+    "-writeProbability <write probability>\n" +
+    "-root <root>\n" +
+    "-maxDelayBetweenOps <maxDelayBetweenOpsInMillis>\n" +
+    "-numOfThreads <numOfThreads>\n" +
+    "-elapsedTime <elapsedTimeInSecs>\n" +
+    "-startTime <startTimeInMillis>";
+  final private String hostname;
+  
+  /** Constructor */
+  public LoadGenerator() throws IOException, UnknownHostException {
+    InetAddress addr = InetAddress.getLocalHost();
+    hostname = addr.getHostName();
+  }
+
+  private final static int OPEN = 0;
+  private final static int LIST = 1;
+  private final static int CREATE = 2;
+  private final static int WRITE_CLOSE = 3;
+  private final static int DELETE = 4;
+  private final static int TOTAL_OP_TYPES =5;
+  private long [] executionTime = new long[TOTAL_OP_TYPES];
+  private long [] totalNumOfOps = new long[TOTAL_OP_TYPES];
+  
+  /** A thread sends a stream of requests to the NameNode.
+   * At each iteration, it first decides if it is going to read a file,
+   * create a file, or listing a directory following the read
+   * and write probabilities.
+   * When reading, it randomly picks a file in the test space and reads
+   * the entire file. When writing, it randomly picks a directory in the
+   * test space and creates a file whose name consists of the current 
+   * machine's host name and the thread id. The length of the file
+   * follows Gaussian distribution with an average size of 2 blocks and
+   * the standard deviation of 1 block. The new file is filled with 'a'.
+   * Immediately after the file creation completes, the file is deleted
+   * from the test space.
+   * While listing, it randomly picks a directory in the test space and
+   * list the directory content.
+   * Between two consecutive operations, the thread pauses for a random
+   * amount of time in the range of [0, maxDelayBetweenOps] 
+   * if the specified max delay is not zero.
+   * A thread runs for the specified elapsed time if the time isn't zero.
+   * Otherwise, it runs forever.
+   */
+  private class DFSClientThread extends Thread {
+    private int id;
+    private long [] executionTime = new long[TOTAL_OP_TYPES];
+    private long [] totalNumOfOps = new long[TOTAL_OP_TYPES];
+    private byte[] buffer = new byte[1024];
+    
+    private DFSClientThread(int id) {
+      this.id = id;
+    }
+    
+    /** Main loop
+     * Each iteration decides what's the next operation and then pauses.
+     */
+    public void run() {
+      try {
+        while (shouldRun) {
+          nextOp();
+          delay();
+        }
+      } catch (Exception ioe) {
+        System.err.println(ioe.getLocalizedMessage());
+        ioe.printStackTrace();
+      }
+    }
+    
+    /** Let the thread pause for a random amount of time in the range of
+     * [0, maxDelayBetweenOps] if the delay is not zero. Otherwise, no pause.
+     */
+    private void delay() throws InterruptedException {
+      if (maxDelayBetweenOps>0) {
+        int delay = r.nextInt(maxDelayBetweenOps);
+        Thread.sleep(delay);
+      }
+    }
+    
+    /** Perform the next operation. 
+     * 
+     * Depending on the read and write probabilities, the next
+     * operation could be either read, write, or list.
+     */
+    private void nextOp() throws IOException {
+      double rn = r.nextDouble();
+      if (rn < readPr) {
+        read();
+      } else if (rn < readPr+writePr) {
+        write();
+      } else {
+        list();
+      }
+    }
+    
+    /** Read operation randomly picks a file in the test space and reads
+     * the entire file */
+    private void read() throws IOException {
+      String fileName = files.get(r.nextInt(files.size()));
+      long startTime = System.currentTimeMillis();
+      InputStream in = fs.open(new Path(fileName));
+      executionTime[OPEN] += (System.currentTimeMillis()-startTime);
+      totalNumOfOps[OPEN]++;
+      while (in.read(buffer) != -1) {}
+      in.close();
+    }
+    
+    /** The write operation randomly picks a directory in the
+     * test space and creates a file whose name consists of the current 
+     * machine's host name and the thread id. The length of the file
+     * follows Gaussian distribution with an average size of 2 blocks and
+     * the standard deviation of 1 block. The new file is filled with 'a'.
+     * Immediately after the file creation completes, the file is deleted
+     * from the test space.
+     */
+    private void write() throws IOException {
+      String dirName = dirs.get(r.nextInt(dirs.size()));
+      Path file = new Path(dirName, hostname+id);
+      double fileSize = 0;
+      while ((fileSize = r.nextGaussian()+2)<=0) {}
+      genFile(file, (long)(fileSize*BLOCK_SIZE));
+      long startTime = System.currentTimeMillis();
+      fs.delete(file, true);
+      executionTime[DELETE] += (System.currentTimeMillis()-startTime);
+      totalNumOfOps[DELETE]++;
+    }
+    
+    /** The list operation randomly picks a directory in the test space and
+     * list the directory content.
+     */
+    private void list() throws IOException {
+      String dirName = dirs.get(r.nextInt(dirs.size()));
+      long startTime = System.currentTimeMillis();
+      fs.listStatus(new Path(dirName));
+      executionTime[LIST] += (System.currentTimeMillis()-startTime);
+      totalNumOfOps[LIST]++;
+    }
+  }
+  
+  /** Main function:
+   * It first initializes data by parsing the command line arguments.
+   * It then starts the number of DFSClient threads as specified by
+   * the user.
+   * It stops all the threads when the specified elapsed time is passed.
+   * Before exiting, it prints the average execution for 
+   * each operation and operation throughput.
+   */
+  public int run(String[] args) throws Exception {
+    int exitCode = init(args);
+    if (exitCode != 0) {
+      return exitCode;
+    }
+    
+    barrier();
+    
+    DFSClientThread[] threads = new DFSClientThread[numOfThreads];
+    for (int i=0; i<numOfThreads; i++) {
+      threads[i] = new DFSClientThread(i); 
+      threads[i].start();
+    }
+    if (elapsedTime>0) {
+      Thread.sleep(elapsedTime*1000);
+      shouldRun = false;
+    } 
+    for (DFSClientThread thread : threads) {
+      thread.join();
+      for (int i=0; i<TOTAL_OP_TYPES; i++) {
+        executionTime[i] += thread.executionTime[i];
+        totalNumOfOps[i] += thread.totalNumOfOps[i];
+      }
+    }
+    long totalOps = 0;
+    for (int i=0; i<TOTAL_OP_TYPES; i++) {
+      totalOps += totalNumOfOps[i];
+    }
+    
+    if (totalNumOfOps[OPEN] != 0) {
+      System.out.println("Average open execution time: " + 
+          (double)executionTime[OPEN]/totalNumOfOps[OPEN] + "ms");
+    }
+    if (totalNumOfOps[LIST] != 0) {
+      System.out.println("Average list execution time: " + 
+          (double)executionTime[LIST]/totalNumOfOps[LIST] + "ms");
+    }
+    if (totalNumOfOps[DELETE] != 0) {
+      System.out.println("Average deletion execution time: " + 
+          (double)executionTime[DELETE]/totalNumOfOps[DELETE] + "ms");
+      System.out.println("Average create execution time: " + 
+          (double)executionTime[CREATE]/totalNumOfOps[CREATE] + "ms");
+      System.out.println("Average write_close execution time: " + 
+          (double)executionTime[WRITE_CLOSE]/totalNumOfOps[WRITE_CLOSE] + "ms");
+    }
+    if (elapsedTime != 0) { 
+      System.out.println("Average operations per second: " + 
+          (double)totalOps/elapsedTime +"ops/s");
+    }
+    System.out.println();
+    return exitCode;
+  }
+
+  /** Parse the command line arguments and initialize the data */
+  private int init(String[] args) throws IOException {
+    try {
+      fs = FileSystem.get(getConf());
+    } catch (IOException ioe) {
+      System.err.println("Can not initialize the file system: " + 
+          ioe.getLocalizedMessage());
+      return -1;
+    }
+    int hostHashCode = hostname.hashCode();
+    try {
+      for (int i = 0; i < args.length; i++) { // parse command line
+        if (args[i].equals("-readProbability")) {
+          readPr = Double.parseDouble(args[++i]);
+          if (readPr<0 || readPr>1) {
+            System.err.println( 
+                "The read probability must be [0, 1]: " + readPr);
+            return -1;
+          }
+        } else if (args[i].equals("-writeProbability")) {
+          writePr = Double.parseDouble(args[++i]);
+          if (writePr<0 || writePr>1) {
+            System.err.println( 
+                "The write probability must be [0, 1]: " + writePr);
+            return -1;
+          }
+        } else if (args[i].equals("-root")) {
+          root = new Path(args[++i]);
+        } else if (args[i].equals("-maxDelayBetweenOps")) {
+          maxDelayBetweenOps = Integer.parseInt(args[++i]); // in milliseconds
+        } else if (args[i].equals("-numOfThreads")) {
+          numOfThreads = Integer.parseInt(args[++i]);
+          if (numOfThreads <= 0) {
+            System.err.println(
+                "Number of threads must be positive: " + numOfThreads);
+            return -1;
+          }
+        } else if (args[i].equals("-startTime")) {
+          startTime = Long.parseLong(args[++i]);
+        } else if (args[i].equals("-elapsedTime")) {
+          elapsedTime = Long.parseLong(args[++i]);
+        } else if (args[i].equals("-seed")) {
+          r = new Random(Long.parseLong(args[++i])+hostHashCode);
+        } else {
+          System.err.println(USAGE);
+          ToolRunner.printGenericCommandUsage(System.err);
+          return -1;
+        }
+      }
+    } catch (NumberFormatException e) {
+      System.err.println("Illegal parameter: " + e.getLocalizedMessage());
+      System.err.println(USAGE);
+      return -1;
+    }
+
+    if (readPr+writePr <0 || readPr+writePr>1) {
+      System.err.println(
+          "The sum of read probability and write probability must be [0, 1]: " +
+          readPr + " "+writePr);
+      return -1;
+    }
+    
+    if (r==null) {
+      r = new Random(System.currentTimeMillis()+hostHashCode);
+    }
+    
+    return initFileDirTables();
+  }
+  
+  /** Create a table that contains all directories under root and
+   * another table that contains all files under root.
+   */
+  private int initFileDirTables() {
+    try {
+      initFileDirTables(root);
+    } catch (IOException e) {
+      System.err.println(e.getLocalizedMessage());
+      e.printStackTrace();
+      return -1;
+    }
+    if (dirs.isEmpty()) {
+      System.err.println("The test space " + root + " is empty");
+      return -1;
+    }
+    if (files.isEmpty()) {
+      System.err.println("The test space " + root + 
+          " does not have any file");
+      return -1;
+    }
+    return 0;
+  }
+  
+  /** Create a table that contains all directories under the specified path and
+   * another table that contains all files under the specified path and
+   * whose name starts with "_file_".
+   */
+  private void initFileDirTables(Path path) throws IOException {
+    FileStatus[] stats = fs.listStatus(path);
+    if (stats != null) { 
+      for (FileStatus stat : stats) {
+        if (stat.isDir()) {
+          dirs.add(stat.getPath().toString());
+          initFileDirTables(stat.getPath());
+        } else {
+          Path filePath = stat.getPath();
+          if (filePath.getName().startsWith(StructureGenerator.FILE_NAME_PREFIX)) {
+            files.add(filePath.toString());
+          }
+        }
+      }
+    }
+  }
+  
+  /** Returns when the current number of seconds from the epoch equals
+   * the command line argument given by <code>-startTime</code>.
+   * This allows multiple instances of this program, running on clock
+   * synchronized nodes, to start at roughly the same time.
+   */
+  private void barrier() {
+    long sleepTime;
+    while ((sleepTime = startTime - System.currentTimeMillis()) > 0) {
+      try {
+        Thread.sleep(sleepTime);
+      } catch (InterruptedException ex) {
+      }
+    }
+  }
+
+  /** Create a file with a length of <code>fileSize</code>.
+   * The file is filled with 'a'.
+   */
+  private void genFile(Path file, long fileSize) throws IOException {
+    long startTime = System.currentTimeMillis();
+    FSDataOutputStream out = fs.create(file, true, 
+        getConf().getInt("io.file.buffer.size", 4096),
+        (short)getConf().getInt("dfs.replication", 3),
+        fs.getDefaultBlockSize());
+    executionTime[CREATE] += (System.currentTimeMillis()-startTime);
+    totalNumOfOps[CREATE]++;
+
+    for (long i=0; i<fileSize; i++) {
+      out.writeByte('a');
+    }
+    startTime = System.currentTimeMillis();
+    out.close();
+    executionTime[WRITE_CLOSE] += (System.currentTimeMillis()-startTime);
+    totalNumOfOps[WRITE_CLOSE]++;
+  }
+  
+  /** Main program
+   * 
+   * @param args command line arguments
+   * @throws Exception
+   */
+  public static void main(String[] args) throws Exception {
+    int res = ToolRunner.run(new Configuration(),
+        new LoadGenerator(), args);
+    System.exit(res);
+  }
+
+}

Added: hadoop/core/trunk/src/test/org/apache/hadoop/fs/loadGenerator/StructureGenerator.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/org/apache/hadoop/fs/loadGenerator/StructureGenerator.java?rev=694458&view=auto
==============================================================================
--- hadoop/core/trunk/src/test/org/apache/hadoop/fs/loadGenerator/StructureGenerator.java (added)
+++ hadoop/core/trunk/src/test/org/apache/hadoop/fs/loadGenerator/StructureGenerator.java Thu Sep 11 13:23:09 2008
@@ -0,0 +1,307 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.loadGenerator;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * This program generates a random namespace structure with the following
+ * constraints:
+ * 1. The number of subdirectories is a random number in [minWidth, maxWidth].
+ * 2. The maximum depth of each subdirectory is a random number 
+ *    [2*maxDepth/3, maxDepth].
+ * 3. Files are randomly placed in the empty directories. The size of each
+ *    file follows Gaussian distribution.
+ * The generated namespace structure is described by two files in the output
+ * directory. Each line of the first file 
+ * contains the full name of a leaf directory.  
+ * Each line of the second file contains
+ * the full name of a file and its size, separated by a blank.
+ * 
+ * The synopsis of the command is
+ * java StructureGenerator
+    -maxDepth <maxDepth> : maximum depth of the directory tree; default is 5.
+    -minWidth <minWidth> : minimum number of subdirectories per directories; default is 1
+    -maxWidth <maxWidth> : maximum number of subdirectories per directories; default is 5
+    -numOfFiles <#OfFiles> : the total number of files; default is 10.
+    -avgFileSize <avgFileSizeInBlocks>: average size of blocks; default is 1.
+    -outDir <outDir>: output directory; default is the current directory.
+    -seed <seed>: random number generator seed; default is the current time.
+ */
+public class StructureGenerator {
+  private int maxDepth = 5;
+  private int minWidth = 1;
+  private int maxWidth = 5;
+  private int numOfFiles = 10;
+  private double avgFileSize = 1;
+  private File outDir = DEFAULT_STRUCTURE_DIRECTORY;
+  final static private String USAGE = "java StructureGenerator\n" +
+  	"-maxDepth <maxDepth>\n" +
+    "-minWidth <minWidth>\n" +
+    "-maxWidth <maxWidth>\n" +
+    "-numOfFiles <#OfFiles>\n" +
+    "-avgFileSize <avgFileSizeInBlocks>\n" +
+    "-outDir <outDir>\n" +
+    "-seed <seed>";
+  
+  private Random r = null; 
+  
+  /** Default directory for storing file/directory structure */
+  final static File DEFAULT_STRUCTURE_DIRECTORY = new File(".");
+  /** The name of the file for storing directory structure */
+  final static String DIR_STRUCTURE_FILE_NAME = "dirStructure";
+  /** The name of the file for storing file structure */
+  final static String FILE_STRUCTURE_FILE_NAME = "fileStructure";
+  /** The name prefix for the files created by this program */
+  final static String FILE_NAME_PREFIX = "_file_";
+  
+  /**
+   * The main function first parses the command line arguments,
+   * then generates in-memory directory structure and outputs to a file,
+   * last generates in-memory files and outputs them to a file.
+   */
+  public int run(String[] args) throws Exception {
+    int exitCode = 0;
+    exitCode = init(args);
+    if (exitCode != 0) {
+      return exitCode;
+    }
+    genDirStructure();
+    output(new File(outDir, DIR_STRUCTURE_FILE_NAME));
+    genFileStructure();
+    outputFiles(new File(outDir, FILE_STRUCTURE_FILE_NAME));
+    return exitCode;
+  }
+
+  /** Parse the command line arguments and initialize the data */
+  private int init(String[] args) {
+    try {
+      for (int i = 0; i < args.length; i++) { // parse command line
+        if (args[i].equals("-maxDepth")) {
+          maxDepth = Integer.parseInt(args[++i]);
+          if (maxDepth<1) {
+            System.err.println("maxDepth must be positive: " + maxDepth);
+            return -1;
+          }
+        } else if (args[i].equals("-minWidth")) {
+          minWidth = Integer.parseInt(args[++i]);
+          if (minWidth<0) {
+            System.err.println("minWidth must be positive: " + minWidth);
+            return -1;
+          }
+        } else if (args[i].equals("-maxWidth")) {
+          maxWidth = Integer.parseInt(args[++i]);
+        } else if (args[i].equals("-numOfFiles")) {
+          numOfFiles = Integer.parseInt(args[++i]);
+          if (numOfFiles<1) {
+            System.err.println("NumOfFiles must be positive: " + numOfFiles);
+            return -1;
+          }
+        } else if (args[i].equals("-avgFileSize")) {
+          avgFileSize = Double.parseDouble(args[++i]);
+          if (avgFileSize<=0) {
+            System.err.println("AvgFileSize must be positive: " + avgFileSize);
+            return -1;
+          }
+        } else if (args[i].equals("-outDir")) {
+          outDir = new File(args[++i]);
+        } else if (args[i].equals("-seed")) {
+          r = new Random(Long.parseLong(args[++i]));
+        } else {
+          System.err.println(USAGE);
+          ToolRunner.printGenericCommandUsage(System.err);
+          return -1;
+        }
+      }
+    } catch (NumberFormatException e) {
+      System.err.println("Illegal parameter: " + e.getLocalizedMessage());
+      System.err.println(USAGE);
+      return -1;
+    }
+    
+    if (maxWidth < minWidth) {
+      System.err.println(
+          "maxWidth must be bigger than minWidth: " + maxWidth);
+      return -1;
+    }
+    
+    if (r==null) {
+      r = new Random();
+    }
+    return 0;
+  }
+  
+  /** In memory representation of a directory */
+  private static class INode {
+    private String name;
+    private List<INode> children = new ArrayList<INode>();
+    
+    /** Constructor */
+    private INode(String name) {
+      this.name = name;
+    }
+    
+    /** Add a child (subdir/file) */
+    private void addChild(INode child) {
+      children.add(child);
+    }
+    
+    /** Output the subtree rooted at the current node. 
+     * Only the leaves are printed.
+     */
+    private void output(PrintStream out, String prefix) {
+      prefix = prefix==null?name:prefix+"/"+name;
+      if (children.isEmpty()) {
+        out.println(prefix);
+      } else {
+        for (INode child : children) {
+          child.output(out, prefix);
+        }
+      }
+    }
+    
+    /** Output the files in the subtree rooted at this node */
+    protected void outputFiles(PrintStream out, String prefix) {
+      prefix = prefix==null?name:prefix+"/"+name;
+      for (INode child : children) {
+        child.outputFiles(out, prefix);
+      }
+    }
+    
+    /** Add all the leaves in the subtree to the input list */
+    private void getLeaves(List<INode> leaves) {
+      if (children.isEmpty()) {
+        leaves.add(this);
+      } else {
+        for (INode child : children) {
+          child.getLeaves(leaves);
+        }
+      }
+    }
+  }
+  
+  /** In memory representation of a file */
+  private static class FileINode extends INode {
+    private double numOfBlocks;
+
+    /** constructor */
+    private FileINode(String name, double numOfBlocks) {
+      super(name);
+      this.numOfBlocks = numOfBlocks;
+    }
+    
+    /** Output a file attribute */
+    protected void outputFiles(PrintStream out, String prefix) {
+      prefix = (prefix == null)?super.name: prefix + "/"+super.name;
+      out.println(prefix + " " + numOfBlocks);
+    }
+  }
+
+  private INode root;
+  
+  /** Generates a directory tree with a max depth of <code>maxDepth</code> */
+  private void genDirStructure() {
+    root = genDirStructure("", maxDepth);
+  }
+  
+  /** Generate a directory tree rooted at <code>rootName</code>
+   * The number of subtree is in the range of [minWidth, maxWidth].
+   * The maximum depth of each subtree is in the range of
+   * [2*maxDepth/3, maxDepth].
+   */
+  private INode genDirStructure(String rootName, int maxDepth) {
+    INode root = new INode(rootName);
+    
+    if (maxDepth>0) {
+      maxDepth--;
+      int minDepth = maxDepth*2/3;
+      // Figure out the number of subdirectories to generate
+      int numOfSubDirs = minWidth + r.nextInt(maxWidth-minWidth+1);
+      // Expand the tree
+      for (int i=0; i<numOfSubDirs; i++) {
+        int childDepth = (maxDepth == 0)?0:
+          (r.nextInt(maxDepth-minDepth+1)+minDepth);
+        INode child = genDirStructure("dir"+i, childDepth);
+        root.addChild(child);
+      }
+    }
+    return root;
+  }
+  
+  /** Collects leaf nodes in the tree */
+  private List<INode> getLeaves() {
+    List<INode> leaveDirs = new ArrayList<INode>();
+    root.getLeaves(leaveDirs);
+    return leaveDirs;
+  }
+  
+  /** Decides where to place all the files and its length.
+   * It first collects all empty directories in the tree.
+   * For each file, it randomly chooses an empty directory to place the file.
+   * The file's length is generated using Gaussian distribution.
+   */
+  private void genFileStructure() {
+    List<INode> leaves = getLeaves();
+    int totalLeaves = leaves.size();
+    for (int i=0; i<numOfFiles; i++) {
+      int leaveNum = r.nextInt(totalLeaves);
+      double fileSize;
+      do {
+        fileSize = r.nextGaussian()+avgFileSize;
+      } while (fileSize<0);
+      leaves.get(leaveNum).addChild(
+          new FileINode(FILE_NAME_PREFIX+i, fileSize));
+    }
+  }
+  
+  /** Output directory structure to a file, each line of the file
+   * contains the directory name. Only empty directory names are printed. */
+  private void output(File outFile) throws FileNotFoundException {
+    System.out.println("Printing to " + outFile.toString());
+    PrintStream out = new PrintStream(outFile);
+    root.output(out, null);
+    out.close();
+  }
+  
+  /** Output all files' attributes to a file, each line of the output file
+   * contains a file name and its length. */
+  private void outputFiles(File outFile) throws FileNotFoundException {
+    System.out.println("Printing to " + outFile.toString());
+    PrintStream out = new PrintStream(outFile);
+    root.outputFiles(out, null);
+    out.close();
+  }
+  
+  /**
+   * Main program
+   * @param args Command line arguments
+   * @throws Exception
+   */
+  public static void main(String[] args) throws Exception {
+    StructureGenerator sg = new StructureGenerator();
+    System.exit(sg.run(args));
+  }
+}

Added: hadoop/core/trunk/src/test/org/apache/hadoop/fs/loadGenerator/TestLoadGenerator.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/org/apache/hadoop/fs/loadGenerator/TestLoadGenerator.java?rev=694458&view=auto
==============================================================================
--- hadoop/core/trunk/src/test/org/apache/hadoop/fs/loadGenerator/TestLoadGenerator.java (added)
+++ hadoop/core/trunk/src/test/org/apache/hadoop/fs/loadGenerator/TestLoadGenerator.java Thu Sep 11 13:23:09 2008
@@ -0,0 +1,217 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs.loadGenerator;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+
+import junit.framework.TestCase;
+/**
+ * This class tests if a balancer schedules tasks correctly.
+ */
+public class TestLoadGenerator extends TestCase {
+  private static final Configuration CONF = new Configuration();
+  private static final int DEFAULT_BLOCK_SIZE = 10;
+  private static final String OUT_DIR = 
+    System.getProperty("test.build.data","build/test/data");
+  private static final File DIR_STRUCTURE_FILE = 
+    new File(OUT_DIR, StructureGenerator.DIR_STRUCTURE_FILE_NAME);
+  private static final File FILE_STRUCTURE_FILE =
+    new File(OUT_DIR, StructureGenerator.FILE_STRUCTURE_FILE_NAME);
+  private static final String DIR_STRUCTURE_FIRST_LINE = "/dir0";
+  private static final String DIR_STRUCTURE_SECOND_LINE = "/dir1";
+  private static final String FILE_STRUCTURE_FIRST_LINE =
+    "/dir0/_file_0 0.3754598635933768";
+  private static final String FILE_STRUCTURE_SECOND_LINE =
+    "/dir1/_file_1 1.4729310851145203";
+  
+
+  static {
+    CONF.setLong("dfs.block.size", DEFAULT_BLOCK_SIZE);
+    CONF.setInt("io.bytes.per.checksum", DEFAULT_BLOCK_SIZE);
+    CONF.setLong("dfs.heartbeat.interval", 1L);
+  }
+
+  /** Test if the structure generator works fine */ 
+  public void testStructureGenerator() throws Exception {
+    StructureGenerator sg = new StructureGenerator();
+    String[] args = new String[]{"-maxDepth", "2", "-minWidth", "1",
+        "-maxWidth", "2", "-numOfFiles", "2",
+        "-avgFileSize", "1", "-outDir", OUT_DIR, "-seed", "1"};
+    
+    final int MAX_DEPTH = 1;
+    final int MIN_WIDTH = 3;
+    final int MAX_WIDTH = 5;
+    final int NUM_OF_FILES = 7;
+    final int AVG_FILE_SIZE = 9;
+    final int SEED = 13;
+    try {
+      // successful case
+      assertEquals(0, sg.run(args));
+      BufferedReader in = new BufferedReader(new FileReader(DIR_STRUCTURE_FILE));
+      assertEquals(DIR_STRUCTURE_FIRST_LINE, in.readLine());
+      assertEquals(DIR_STRUCTURE_SECOND_LINE, in.readLine());
+      assertEquals(null, in.readLine());
+      in.close();
+      
+      in = new BufferedReader(new FileReader(FILE_STRUCTURE_FILE));
+      assertEquals(FILE_STRUCTURE_FIRST_LINE, in.readLine());
+      assertEquals(FILE_STRUCTURE_SECOND_LINE, in.readLine());
+      assertEquals(null, in.readLine());
+      in.close();
+
+      String oldArg = args[MAX_DEPTH];
+      args[MAX_DEPTH] = "0";
+      assertEquals(-1, sg.run(args));
+      args[MAX_DEPTH] = oldArg;
+      
+      oldArg = args[MIN_WIDTH];
+      args[MIN_WIDTH] = "-1";
+      assertEquals(-1, sg.run(args));
+      args[MIN_WIDTH] = oldArg;
+      
+      oldArg = args[MAX_WIDTH];
+      args[MAX_WIDTH] = "-1";
+      assertEquals(-1, sg.run(args));
+      args[MAX_WIDTH] = oldArg;
+      
+      oldArg = args[NUM_OF_FILES];
+      args[NUM_OF_FILES] = "-1";
+      assertEquals(-1, sg.run(args));
+      args[NUM_OF_FILES] = oldArg;
+      
+      oldArg = args[NUM_OF_FILES];
+      args[NUM_OF_FILES] = "-1";
+      assertEquals(-1, sg.run(args));
+      args[NUM_OF_FILES] = oldArg;
+      
+      oldArg = args[AVG_FILE_SIZE];
+      args[AVG_FILE_SIZE] = "-1";
+      assertEquals(-1, sg.run(args));
+      args[AVG_FILE_SIZE] = oldArg;
+      
+      oldArg = args[SEED];
+      args[SEED] = "34.d4";
+      assertEquals(-1, sg.run(args));
+      args[SEED] = oldArg;
+    } finally {
+      DIR_STRUCTURE_FILE.delete();
+      FILE_STRUCTURE_FILE.delete();
+    }
+  }
+
+  /** Test if the load generator works fine */
+  public void testLoadGenerator() throws Exception {
+    final String TEST_SPACE_ROOT = "/test";
+
+    FileWriter writer = new FileWriter(DIR_STRUCTURE_FILE);
+    writer.write(DIR_STRUCTURE_FIRST_LINE+"\n");
+    writer.write(DIR_STRUCTURE_SECOND_LINE+"\n");
+    writer.close();
+    
+    writer = new FileWriter(FILE_STRUCTURE_FILE);
+    writer.write(FILE_STRUCTURE_FIRST_LINE+"\n");
+    writer.write(FILE_STRUCTURE_SECOND_LINE+"\n");
+    writer.close();
+    
+    MiniDFSCluster cluster = new MiniDFSCluster(CONF, 3, true, null);
+    cluster.waitActive();
+    
+    try {
+      DataGenerator dg = new DataGenerator();
+      dg.setConf(CONF);
+      String [] args = new String[] {"-inDir", OUT_DIR, "-root", TEST_SPACE_ROOT};
+      assertEquals(0, dg.run(args));
+
+      final int READ_PROBABILITY = 1;
+      final int WRITE_PROBABILITY = 3;
+      final int MAX_DELAY_BETWEEN_OPS = 7;
+      final int NUM_OF_THREADS = 9;
+      final int START_TIME = 11;
+      final int ELAPSED_TIME = 13;
+      
+      LoadGenerator lg = new LoadGenerator();
+      lg.setConf(CONF);
+      args = new String[] {"-readProbability", "0.3", "-writeProbability", "0.3",
+          "-root", TEST_SPACE_ROOT, "-maxDelayBetweenOps", "0",
+          "-numOfThreads", "1", "-startTime", 
+          Long.toString(System.currentTimeMillis()), "-elapsedTime", "10"};
+      
+      assertEquals(0, lg.run(args));
+
+      String oldArg = args[READ_PROBABILITY];
+      args[READ_PROBABILITY] = "1.1";
+      assertEquals(-1, lg.run(args));
+      args[READ_PROBABILITY] = "-1.1";
+      assertEquals(-1, lg.run(args));
+      args[READ_PROBABILITY] = oldArg;
+
+      oldArg = args[WRITE_PROBABILITY];
+      args[WRITE_PROBABILITY] = "1.1";
+      assertEquals(-1, lg.run(args));
+      args[WRITE_PROBABILITY] = "-1.1";
+      assertEquals(-1, lg.run(args));
+      args[WRITE_PROBABILITY] = "0.9";
+      assertEquals(-1, lg.run(args));
+      args[READ_PROBABILITY] = oldArg;
+
+      oldArg = args[MAX_DELAY_BETWEEN_OPS];
+      args[MAX_DELAY_BETWEEN_OPS] = "1.x1";
+      assertEquals(-1, lg.run(args));
+      args[MAX_DELAY_BETWEEN_OPS] = oldArg;
+      
+      oldArg = args[MAX_DELAY_BETWEEN_OPS];
+      args[MAX_DELAY_BETWEEN_OPS] = "1.x1";
+      assertEquals(-1, lg.run(args));
+      args[MAX_DELAY_BETWEEN_OPS] = oldArg;
+      
+      oldArg = args[NUM_OF_THREADS];
+      args[NUM_OF_THREADS] = "-1";
+      assertEquals(-1, lg.run(args));
+      args[NUM_OF_THREADS] = oldArg;
+      
+      oldArg = args[START_TIME];
+      args[START_TIME] = "-1";
+      assertEquals(-1, lg.run(args));
+      args[START_TIME] = oldArg;
+
+      oldArg = args[ELAPSED_TIME];
+      args[ELAPSED_TIME] = "-1";
+      assertEquals(-1, lg.run(args));
+      args[ELAPSED_TIME] = oldArg;
+    } finally {
+      cluster.shutdown();
+      DIR_STRUCTURE_FILE.delete();
+      FILE_STRUCTURE_FILE.delete();
+    }
+  }
+  
+  /**
+   * @param args
+   */
+  public static void main(String[] args) throws Exception {
+    TestLoadGenerator loadGeneratorTest = new TestLoadGenerator();
+    loadGeneratorTest.testStructureGenerator();
+    loadGeneratorTest.testLoadGenerator();
+  }
+}