You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by yh...@apache.org on 2009/05/28 08:37:33 UTC

svn commit: r779450 - in /hadoop/core/branches/branch-0.20: ./ src/core/org/apache/hadoop/util/ src/mapred/org/apache/hadoop/mapred/ src/test/org/apache/hadoop/mapred/ src/test/org/apache/hadoop/util/

Author: yhemanth
Date: Thu May 28 06:37:33 2009
New Revision: 779450

URL: http://svn.apache.org/viewvc?rev=779450&view=rev
Log:
HADOOP-5883. Fixed tasktracker memory monitoring to account for momentary spurts in memory usage due to java's fork() model. Contributed by Hemanth Yamijala.

Modified:
    hadoop/core/branches/branch-0.20/CHANGES.txt
    hadoop/core/branches/branch-0.20/src/core/org/apache/hadoop/util/ProcfsBasedProcessTree.java
    hadoop/core/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskMemoryManagerThread.java
    hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestTaskTrackerMemoryManager.java
    hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/util/TestProcfsBasedProcessTree.java

Modified: hadoop/core/branches/branch-0.20/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.20/CHANGES.txt?rev=779450&r1=779449&r2=779450&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.20/CHANGES.txt (original)
+++ hadoop/core/branches/branch-0.20/CHANGES.txt Thu May 28 06:37:33 2009
@@ -89,6 +89,10 @@
     HADOOP-4626. Correct the API links in hdfs forrest doc so that they
     point to the same version of hadoop.  (szetszwo)
 
+    HADOOP-5883. Fixed tasktracker memory monitoring to account for
+    momentary spurts in memory usage due to java's fork() model.
+    (yhemanth)
+
 Release 0.20.0 - 2009-04-15
 
   INCOMPATIBLE CHANGES

Modified: hadoop/core/branches/branch-0.20/src/core/org/apache/hadoop/util/ProcfsBasedProcessTree.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.20/src/core/org/apache/hadoop/util/ProcfsBasedProcessTree.java?rev=779450&r1=779449&r2=779450&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.20/src/core/org/apache/hadoop/util/ProcfsBasedProcessTree.java (original)
+++ hadoop/core/branches/branch-0.20/src/core/org/apache/hadoop/util/ProcfsBasedProcessTree.java Thu May 28 06:37:33 2009
@@ -52,14 +52,23 @@
   private static final Pattern PROCFS_STAT_FILE_FORMAT = Pattern
       .compile("^([0-9-]+)\\s([^\\s]+)\\s[^\\s]\\s([0-9-]+)\\s([0-9-]+)\\s([0-9-]+)\\s([0-9-]+\\s){16}([0-9]+)(\\s[0-9-]+){16}");
 
+  // to enable testing, using this variable which can be configured
+  // to a test directory.
+  private String procfsDir;
+  
   private Integer pid = -1;
 
   private Map<Integer, ProcessInfo> processTree = new HashMap<Integer, ProcessInfo>();
 
   public ProcfsBasedProcessTree(String pid) {
-    this.pid = getValidPID(pid);
+    this(pid, PROCFS);
   }
 
+  public ProcfsBasedProcessTree(String pid, String procfsDir) {
+    this.pid = getValidPID(pid);
+    this.procfsDir = procfsDir;
+  }
+  
   public void setSigKillInterval(long interval) {
     sleepTimeBeforeSigKill = interval;
   }
@@ -96,13 +105,17 @@
       List<Integer> processList = getProcessList();
 
       Map<Integer, ProcessInfo> allProcessInfo = new HashMap<Integer, ProcessInfo>();
+      
+      // cache the processTree to get the age for processes
+      Map<Integer, ProcessInfo> oldProcs = 
+              new HashMap<Integer, ProcessInfo>(processTree);
       processTree.clear();
 
       ProcessInfo me = null;
       for (Integer proc : processList) {
         // Get information for each process
         ProcessInfo pInfo = new ProcessInfo(proc);
-        if (constructProcessInfo(pInfo) != null) {
+        if (constructProcessInfo(pInfo, procfsDir) != null) {
           allProcessInfo.put(proc, pInfo);
           if (proc.equals(this.pid)) {
             me = pInfo; // cache 'me'
@@ -138,6 +151,16 @@
         pInfoQueue.addAll(pInfo.getChildren());
       }
 
+      // update age values.
+      for (Map.Entry<Integer, ProcessInfo> procs : processTree.entrySet()) {
+        ProcessInfo oldInfo = oldProcs.get(procs.getKey());
+        if (oldInfo != null) {
+          if (procs.getValue() != null) {
+            procs.getValue().updateAge(oldInfo);  
+          }
+        }
+      }
+
       if (LOG.isDebugEnabled()) {
         // Log.debug the ProcfsBasedProcessTree
         LOG.debug(this.toString());
@@ -197,9 +220,23 @@
    * @return cumulative virtual memory used by the process-tree in bytes.
    */
   public long getCumulativeVmem() {
+    // include all processes.. all processes will be older than 0.
+    return getCumulativeVmem(0);
+  }
+
+  /**
+   * Get the cumulative virtual memory used by all the processes in the
+   * process-tree that are older than the passed in age.
+   * 
+   * @param olderThanAge processes above this age are included in the
+   *                      memory addition
+   * @return cumulative virtual memory used by the process-tree in bytes,
+   *          for processes older than this age.
+   */
+  public long getCumulativeVmem(int olderThanAge) {
     long total = 0;
     for (ProcessInfo p : processTree.values()) {
-      if (p != null) {
+      if ((p != null) && (p.getAge() > olderThanAge)) {
         total += p.getVmem();
       }
     }
@@ -268,13 +305,13 @@
    * Get the list of all processes in the system.
    */
   private List<Integer> getProcessList() {
-    String[] processDirs = (new File(PROCFS)).list();
+    String[] processDirs = (new File(procfsDir)).list();
     List<Integer> processList = new ArrayList<Integer>();
 
     for (String dir : processDirs) {
       try {
         int pd = Integer.parseInt(dir);
-        if ((new File(PROCFS + dir)).isDirectory()) {
+        if ((new File(procfsDir, dir)).isDirectory()) {
           processList.add(Integer.valueOf(pd));
         }
       } catch (NumberFormatException n) {
@@ -292,12 +329,29 @@
    * same. Returns null on failing to read from procfs,
    */
   private ProcessInfo constructProcessInfo(ProcessInfo pinfo) {
+    return constructProcessInfo(pinfo, PROCFS);
+  }
+
+  /**
+   * Construct the ProcessInfo using the process' PID and procfs rooted at the
+   * specified directory and return the same. It is provided mainly to assist
+   * testing purposes.
+   * 
+   * Returns null on failing to read from procfs,
+   *
+   * @param pinfo ProcessInfo that needs to be updated
+   * @param procfsDir root of the proc file system
+   * @return updated ProcessInfo, null on errors.
+   */
+  private ProcessInfo constructProcessInfo(ProcessInfo pinfo, 
+                                                    String procfsDir) {
     ProcessInfo ret = null;
-    // Read "/proc/<pid>/stat" file
+    // Read "procfsDir/<pid>/stat" file
     BufferedReader in = null;
     FileReader fReader = null;
     try {
-      fReader = new FileReader(PROCFS + pinfo.getPid() + "/stat");
+      File pidDir = new File(procfsDir, String.valueOf(pinfo.getPid()));
+      fReader = new FileReader(new File(pidDir, "/stat"));
       in = new BufferedReader(fReader);
     } catch (FileNotFoundException f) {
       // The process vanished in the interim!
@@ -311,7 +365,7 @@
       boolean mat = m.find();
       if (mat) {
         // Set ( name ) ( ppid ) ( pgrpId ) (session ) (vsize )
-        pinfo.update(m.group(2), Integer.parseInt(m.group(3)), Integer
+        pinfo.updateProcessInfo(m.group(2), Integer.parseInt(m.group(3)), Integer
             .parseInt(m.group(4)), Integer.parseInt(m.group(5)), Long
             .parseLong(m.group(7)));
       }
@@ -338,7 +392,7 @@
 
     return ret;
   }
-
+  
   /**
    * Is the process with PID pid still alive?
    */
@@ -391,7 +445,6 @@
       }
     }
   }
-
   /**
    * Returns a string printing PIDs of process present in the
    * ProcfsBasedProcessTree. Output format : [pid pid ..]
@@ -417,10 +470,14 @@
     private Integer ppid; // parent process-id
     private Integer sessionId; // session-id
     private Long vmem; // virtual memory usage
+    // how many times has this process been seen alive
+    private int age; 
     private List<ProcessInfo> children = new ArrayList<ProcessInfo>(); // list of children
 
     public ProcessInfo(int pid) {
       this.pid = Integer.valueOf(pid);
+      // seeing this the first time.
+      this.age = 1;
     }
 
     public Integer getPid() {
@@ -447,6 +504,10 @@
       return vmem;
     }
 
+    public int getAge() {
+      return age;
+    }
+    
     public boolean isParent(ProcessInfo p) {
       if (pid.equals(p.getPpid())) {
         return true;
@@ -454,7 +515,7 @@
       return false;
     }
 
-    public void update(String name, Integer ppid, Integer pgrpId,
+    public void updateProcessInfo(String name, Integer ppid, Integer pgrpId,
         Integer sessionId, Long vmem) {
       this.name = name;
       this.ppid = ppid;
@@ -463,6 +524,10 @@
       this.vmem = vmem;
     }
 
+    public void updateAge(ProcessInfo oldInfo) {
+      this.age = oldInfo.age + 1;
+    }
+    
     public boolean addChild(ProcessInfo p) {
       return children.add(p);
     }

Modified: hadoop/core/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskMemoryManagerThread.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskMemoryManagerThread.java?rev=779450&r1=779449&r2=779450&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskMemoryManagerThread.java (original)
+++ hadoop/core/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskMemoryManagerThread.java Thu May 28 06:37:33 2009
@@ -51,21 +51,34 @@
   private List<TaskAttemptID> tasksToBeRemoved;
 
   public TaskMemoryManagerThread(TaskTracker taskTracker) {
+    
+    this(taskTracker.getTotalMemoryAllottedForTasksOnTT() * 1024 * 1024L,
+      taskTracker.getJobConf().getLong(
+        "mapred.tasktracker.taskmemorymanager.monitoring-interval", 
+        5000L),
+      taskTracker.getJobConf().getLong(
+        "mapred.tasktracker.procfsbasedprocesstree.sleeptime-before-sigkill",
+        ProcfsBasedProcessTree.DEFAULT_SLEEPTIME_BEFORE_SIGKILL));
+
     this.taskTracker = taskTracker;
+  }
+
+  // mainly for test purposes. note that the tasktracker variable is
+  // not set here.
+  TaskMemoryManagerThread(long maxMemoryAllowedForAllTasks,
+                            long monitoringInterval,
+                            long sleepTimeBeforeSigKill) {
     setName(this.getClass().getName());
 
     processTreeInfoMap = new HashMap<TaskAttemptID, ProcessTreeInfo>();
     tasksToBeAdded = new HashMap<TaskAttemptID, ProcessTreeInfo>();
     tasksToBeRemoved = new ArrayList<TaskAttemptID>();
 
-    maxMemoryAllowedForAllTasks =
-        taskTracker.getTotalMemoryAllottedForTasksOnTT() * 1024 * 1024L;
+    this.maxMemoryAllowedForAllTasks = maxMemoryAllowedForAllTasks;
 
-    monitoringInterval = taskTracker.getJobConf().getLong(
-        "mapred.tasktracker.taskmemorymanager.monitoring-interval", 5000L);
-    sleepTimeBeforeSigKill = taskTracker.getJobConf().getLong(
-        "mapred.tasktracker.procfsbasedprocesstree.sleeptime-before-sigkill",
-        ProcfsBasedProcessTree.DEFAULT_SLEEPTIME_BEFORE_SIGKILL);
+    this.monitoringInterval = monitoringInterval;
+    
+    this.sleepTimeBeforeSigKill = sleepTimeBeforeSigKill;
   }
 
   public void addTask(TaskAttemptID tid, long memLimit, String pidFile) {
@@ -200,12 +213,15 @@
           ptInfo.setProcessTree(pTree); // update ptInfo with proces-tree of
           // updated state
           long currentMemUsage = pTree.getCumulativeVmem();
+          // as processes begin with an age 1, we want to see if there 
+          // are processes more than 1 iteration old.
+          long curMemUsageOfAgedProcesses = pTree.getCumulativeVmem(1);
           long limit = ptInfo.getMemLimit();
           LOG.info("Memory usage of ProcessTree " + pId + " :"
               + currentMemUsage + "bytes. Limit : " + limit + "bytes");
 
-          if (limit != JobConf.DISABLED_MEMORY_LIMIT
-              && currentMemUsage > limit) {
+          if (isProcessTreeOverLimit(tid.toString(), currentMemUsage, 
+                                      curMemUsageOfAgedProcesses, limit)) {
             // Task (the root process) is still alive and overflowing memory.
             // Clean up.
             String msg =
@@ -254,6 +270,65 @@
     }
   }
 
+  /**
+   * Check whether a task's process tree's current memory usage is over limit.
+   * 
+   * When a java process exec's a program, it could momentarily account for
+   * double the size of it's memory, because the JVM does a fork()+exec()
+   * which at fork time creates a copy of the parent's memory. If the 
+   * monitoring thread detects the memory used by the task tree at the same
+   * instance, it could assume it is over limit and kill the tree, for no
+   * fault of the process itself.
+   * 
+   * We counter this problem by employing a heuristic check:
+   * - if a process tree exceeds the memory limit by more than twice, 
+   * it is killed immediately
+   * - if a process tree has processes older than the monitoring interval
+   * exceeding the memory limit by even 1 time, it is killed. Else it is given
+   * the benefit of doubt to lie around for one more iteration.
+   * 
+   * @param tId Task Id for the task tree
+   * @param currentMemUsage Memory usage of a task tree
+   * @param curMemUsageOfAgedProcesses Memory usage of processes older than
+   *                                    an iteration in a task tree
+   * @param limit The limit specified for the task
+   * @return true if the memory usage is more than twice the specified limit,
+   *              or if processes in the tree, older than this thread's 
+   *              monitoring interval, exceed the memory limit. False, 
+   *              otherwise.
+   */
+  boolean isProcessTreeOverLimit(String tId, 
+                                  long currentMemUsage, 
+                                  long curMemUsageOfAgedProcesses, 
+                                  long limit) {
+    boolean isOverLimit = false;
+    
+    if (currentMemUsage > (2*limit)) {
+      LOG.warn("Process tree for task: " + tId + " running over twice " +
+                "the configured limit. Limit=" + limit + 
+                ", current usage = " + currentMemUsage);
+      isOverLimit = true;
+    } else if (curMemUsageOfAgedProcesses > limit) {
+      LOG.warn("Process tree for task: " + tId + " has processes older than 1 " +
+          "iteration running over the configured limit. Limit=" + limit + 
+          ", current usage = " + curMemUsageOfAgedProcesses);
+      isOverLimit = true;
+    }
+
+    return isOverLimit; 
+  }
+
+  // method provided just for easy testing purposes
+  boolean isProcessTreeOverLimit(ProcfsBasedProcessTree pTree, 
+                                    String tId, long limit) {
+    long currentMemUsage = pTree.getCumulativeVmem();
+    // as processes begin with an age 1, we want to see if there are processes
+    // more than 1 iteration old.
+    long curMemUsageOfAgedProcesses = pTree.getCumulativeVmem(1);
+    return isProcessTreeOverLimit(tId, currentMemUsage, 
+                                  curMemUsageOfAgedProcesses, limit);
+  }
+
   private void killTasksWithLeastProgress(long memoryStillInUsage) {
 
     List<TaskAttemptID> tasksToKill = new ArrayList<TaskAttemptID>();

Modified: hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestTaskTrackerMemoryManager.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestTaskTrackerMemoryManager.java?rev=779450&r1=779449&r2=779450&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestTaskTrackerMemoryManager.java (original)
+++ hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestTaskTrackerMemoryManager.java Thu May 28 06:37:33 2009
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.mapred;
 
+import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -27,12 +28,15 @@
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.examples.SleepJob;
 import org.apache.hadoop.util.MemoryCalculatorPlugin;
 import org.apache.hadoop.util.ProcfsBasedProcessTree;
 import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.util.TestProcfsBasedProcessTree;
 import org.apache.hadoop.util.ToolRunner;
 import org.apache.hadoop.fs.FileSystem;
 
@@ -45,6 +49,9 @@
 
   private static final Log LOG =
       LogFactory.getLog(TestTaskTrackerMemoryManager.class);
+  private static String TEST_ROOT_DIR = new Path(System.getProperty(
+		    "test.build.data", "/tmp")).toString().replace(' ', '+');
+
   private MiniMRCluster miniMRCluster;
 
   private String taskOverLimitPatternString =
@@ -350,4 +357,91 @@
     // Test succeeded, kill the job.
     job.killJob();
   }
+  
+  /**
+   * Test to verify the check for whether a process tree is over limit or not.
+   * @throws IOException if there was a problem setting up the
+   *                      fake procfs directories or files.
+   */
+  public void testProcessTreeLimits() throws IOException {
+    
+    // set up a dummy proc file system
+    File procfsRootDir = new File(TEST_ROOT_DIR, "proc");
+    String[] pids = { "100", "200", "300", "400", "500", "600", "700" };
+    try {
+      TestProcfsBasedProcessTree.setupProcfsRootDir(procfsRootDir);
+      
+      // create pid dirs.
+      TestProcfsBasedProcessTree.setupPidDirs(procfsRootDir, pids);
+      
+      // create process infos.
+      TestProcfsBasedProcessTree.ProcessStatInfo[] procs =
+          new TestProcfsBasedProcessTree.ProcessStatInfo[7];
+
+      // assume pids 100, 500 are in 1 tree 
+      // 200,300,400 are in another
+      // 600,700 are in a third
+      procs[0] = new TestProcfsBasedProcessTree.ProcessStatInfo(
+          new String[] {"100", "proc1", "1", "100", "100", "100000"});
+      procs[1] = new TestProcfsBasedProcessTree.ProcessStatInfo(
+          new String[] {"200", "proc2", "1", "200", "200", "200000"});
+      procs[2] = new TestProcfsBasedProcessTree.ProcessStatInfo(
+          new String[] {"300", "proc3", "200", "200", "200", "300000"});
+      procs[3] = new TestProcfsBasedProcessTree.ProcessStatInfo(
+          new String[] {"400", "proc4", "200", "200", "200", "400000"});
+      procs[4] = new TestProcfsBasedProcessTree.ProcessStatInfo(
+          new String[] {"500", "proc5", "100", "100", "100", "1500000"});
+      procs[5] = new TestProcfsBasedProcessTree.ProcessStatInfo(
+          new String[] {"600", "proc6", "1", "600", "600", "100000"});
+      procs[6] = new TestProcfsBasedProcessTree.ProcessStatInfo(
+          new String[] {"700", "proc7", "600", "600", "600", "100000"});
+      // write stat files.
+      TestProcfsBasedProcessTree.writeStatFiles(procfsRootDir, pids, procs);
+
+      // vmem limit
+      long limit = 700000;
+      
+      // Create TaskMemoryMonitorThread
+      TaskMemoryManagerThread test = new TaskMemoryManagerThread(1000000L,
+                                                                5000L,
+                                                                5000L);
+      // create process trees
+      // tree rooted at 100 is over limit immediately, as it is
+      // twice over the mem limit.
+      ProcfsBasedProcessTree pTree = new ProcfsBasedProcessTree(
+                                          "100", 
+                                          procfsRootDir.getAbsolutePath());
+      pTree.getProcessTree();
+      assertTrue("tree rooted at 100 should be over limit " +
+                    "after first iteration.",
+                  test.isProcessTreeOverLimit(pTree, "dummyId", limit));
+      
+      // the tree rooted at 200 is initially below limit.
+      pTree = new ProcfsBasedProcessTree("200", 
+                                          procfsRootDir.getAbsolutePath());
+      pTree.getProcessTree();
+      assertFalse("tree rooted at 200 shouldn't be over limit " +
+                    "after one iteration.",
+                  test.isProcessTreeOverLimit(pTree, "dummyId", limit));
+      // second iteration - now the tree has been over limit twice,
+      // hence it should be declared over limit.
+      pTree.getProcessTree();
+      assertTrue("tree rooted at 200 should be over limit after 2 iterations",
+                  test.isProcessTreeOverLimit(pTree, "dummyId", limit));
+      
+      // the tree rooted at 600 is never over limit.
+      pTree = new ProcfsBasedProcessTree("600", 
+                                          procfsRootDir.getAbsolutePath());
+      pTree.getProcessTree();
+      assertFalse("tree rooted at 600 should never be over limit.",
+                    test.isProcessTreeOverLimit(pTree, "dummyId", limit));
+      
+      // another iteration does not make any difference.
+      pTree.getProcessTree();
+      assertFalse("tree rooted at 600 should never be over limit.",
+                    test.isProcessTreeOverLimit(pTree, "dummyId", limit));
+    } finally {
+      FileUtil.fullyDelete(procfsRootDir);
+    }
+  }
 }

Modified: hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/util/TestProcfsBasedProcessTree.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/util/TestProcfsBasedProcessTree.java?rev=779450&r1=779449&r2=779450&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/util/TestProcfsBasedProcessTree.java (original)
+++ hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/util/TestProcfsBasedProcessTree.java Thu May 28 06:37:33 2009
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.util;
 
+import java.io.BufferedWriter;
 import java.io.File;
 import java.io.FileWriter;
 import java.io.IOException;
@@ -25,6 +26,8 @@
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.util.Shell.ExitCodeException;
 import org.apache.hadoop.util.Shell.ShellCommandExecutor;
 
@@ -34,6 +37,9 @@
 
   private static final Log LOG = LogFactory
       .getLog(TestProcfsBasedProcessTree.class);
+  private static String TEST_ROOT_DIR = new Path(System.getProperty(
+      "test.build.data", "/tmp")).toString().replace(' ', '+');
+
   private ShellCommandExecutor shexec = null;
   private String pidFile;
   private String shellScript;
@@ -155,4 +161,234 @@
         .getCumulativeVmem() == 0);
     assertTrue(p.toString().equals("[ ]"));
   }
+  
+  public static class ProcessStatInfo {
+    // sample stat in a single line : 3910 (gpm) S 1 3910 3910 0 -1 4194624 
+    // 83 0 0 0 0 0 0 0 16 0 1 0 7852 2408448 88 4294967295 134512640 
+    // 134590050 3220521392 3220520036 10975138 0 0 4096 134234626 
+    // 4294967295 0 0 17 1 0 0
+    String pid;
+    String name;
+    String ppid;
+    String pgrpId;
+    String session;
+    String vmem;
+    
+    public ProcessStatInfo(String[] statEntries) {
+      pid = statEntries[0];
+      name = statEntries[1];
+      ppid = statEntries[2];
+      pgrpId = statEntries[3];
+      session = statEntries[4];
+      vmem = statEntries[5];
+    }
+    
+    // construct a line that mimics the procfs stat file.
+    // all unused numerical entries are set to 0.
+    public String getStatLine() {
+      return String.format("%s (%s) S %s %s %s 0 0 0" +
+                      " 0 0 0 0 0 0 0 0 0 0 0 0 0 %s 0 0 0" +
+                      " 0 0 0 0 0 0 0 0" +
+                      " 0 0 0 0 0", 
+                      pid, name, ppid, pgrpId, session, vmem);
+    }
+  }
+  
+  /**
+   * A basic test that creates a few process directories and writes
+   * stat files. Verifies that the virtual memory is correctly  
+   * computed.
+   * @throws IOException if there was a problem setting up the
+   *                      fake procfs directories or files.
+   */
+  public void testVirtualMemoryForProcessTree() throws IOException {
+
+    // test processes
+    String[] pids = { "100", "200", "300", "400" };
+    // create the fake procfs root directory. 
+    File procfsRootDir = new File(TEST_ROOT_DIR, "proc");
+
+    try {
+      setupProcfsRootDir(procfsRootDir);
+      setupPidDirs(procfsRootDir, pids);
+      
+      // create stat objects.
+      // assuming processes 100, 200, 300 are in tree and 400 is not.
+      ProcessStatInfo[] procInfos = new ProcessStatInfo[4];
+      procInfos[0] = new ProcessStatInfo(new String[] 
+                                  {"100", "proc1", "1", "100", "100", "100000"});
+      procInfos[1] = new ProcessStatInfo(new String[] 
+                                  {"200", "proc2", "100", "100", "100", "200000"});
+      procInfos[2] = new ProcessStatInfo(new String[] 
+                                  {"300", "proc3", "200", "100", "100", "300000"});
+      procInfos[3] = new ProcessStatInfo(new String[] 
+                                  {"400", "proc4", "1", "400", "400", "400000"});
+      
+      writeStatFiles(procfsRootDir, pids, procInfos);
+      
+      // crank up the process tree class.
+      ProcfsBasedProcessTree processTree = 
+          new ProcfsBasedProcessTree("100", procfsRootDir.getAbsolutePath());
+      // build the process tree.
+      processTree.getProcessTree();
+      
+      // verify cumulative memory
+      assertEquals("Cumulative memory does not match", 
+              Long.parseLong("600000"), processTree.getCumulativeVmem());
+    } finally {
+      FileUtil.fullyDelete(procfsRootDir);
+    }
+  }
+  
+  /**
+   * Tests that cumulative memory is computed only for
+   * processes older than a given age.
+   * @throws IOException if there was a problem setting up the
+   *                      fake procfs directories or files.
+   */
+  public void testVMemForOlderProcesses() throws IOException {
+    // initial list of processes
+    String[] pids = { "100", "200", "300", "400" };
+    // create the fake procfs root directory. 
+    File procfsRootDir = new File(TEST_ROOT_DIR, "proc");
+
+    try {
+      setupProcfsRootDir(procfsRootDir);
+      setupPidDirs(procfsRootDir, pids);
+      
+      // create stat objects.
+      // assuming 100, 200 and 400 are in tree, 300 is not.
+      ProcessStatInfo[] procInfos = new ProcessStatInfo[4];
+      procInfos[0] = new ProcessStatInfo(new String[] 
+                                  {"100", "proc1", "1", "100", "100", "100000"});
+      procInfos[1] = new ProcessStatInfo(new String[] 
+                                  {"200", "proc2", "100", "100", "100", "200000"});
+      procInfos[2] = new ProcessStatInfo(new String[] 
+                                  {"300", "proc3", "1", "300", "300", "300000"});
+      procInfos[3] = new ProcessStatInfo(new String[] 
+                                  {"400", "proc4", "100", "100", "100", "400000"});
+      
+      writeStatFiles(procfsRootDir, pids, procInfos);
+      
+      // crank up the process tree class.
+      ProcfsBasedProcessTree processTree = 
+          new ProcfsBasedProcessTree("100", procfsRootDir.getAbsolutePath());
+      // build the process tree.
+      processTree.getProcessTree();
+      
+      // verify cumulative memory
+      assertEquals("Cumulative memory does not match", 
+              Long.parseLong("700000"), processTree.getCumulativeVmem());
+      
+      // write one more process as child of 100.
+      String[] newPids = { "500" };
+      setupPidDirs(procfsRootDir, newPids);
+      
+      ProcessStatInfo[] newProcInfos = new ProcessStatInfo[1];
+      newProcInfos[0] = new ProcessStatInfo(new String[]
+                             {"500", "proc5", "100", "100", "100", "500000"});
+      writeStatFiles(procfsRootDir, newPids, newProcInfos);
+      
+      // check vmem includes the new process.
+      processTree.getProcessTree();
+      assertEquals("Cumulative memory does not include new process",
+              Long.parseLong("1200000"), processTree.getCumulativeVmem());
+      
+      // however processes older than 1 iteration will retain the older value
+      assertEquals("Cumulative memory shouldn't have included new process",
+              Long.parseLong("700000"), processTree.getCumulativeVmem(1));
+      
+      // one more process
+      newPids = new String[]{ "600" };
+      setupPidDirs(procfsRootDir, newPids);
+      
+      newProcInfos = new ProcessStatInfo[1];
+      newProcInfos[0] = new ProcessStatInfo(new String[]
+                                     {"600", "proc6", "100", "100", "100", "600000"});
+      writeStatFiles(procfsRootDir, newPids, newProcInfos);
+
+      // refresh process tree
+      processTree.getProcessTree();
+      
+      // processes older than 2 iterations should be same as before.
+      assertEquals("Cumulative memory shouldn't have included new processes",
+          Long.parseLong("700000"), processTree.getCumulativeVmem(2));
+      
+      // processes older than 1 iteration should not include new process,
+      // but include process 500
+      assertEquals("Cumulative memory shouldn't have included new processes",
+          Long.parseLong("1200000"), processTree.getCumulativeVmem(1));
+      
+      // no processes older than 3 iterations, this should be 0
+      assertEquals("Getting non-zero vmem for processes older than 3 iterations",
+                    0L, processTree.getCumulativeVmem(3));
+    } finally {
+      FileUtil.fullyDelete(procfsRootDir);
+    }
+  }
+
+  /**
+   * Create a directory to mimic the procfs file system's root.
+   * @param procfsRootDir root directory to create.
+   * @throws IOException if could not delete the procfs root directory
+   */
+  public static void setupProcfsRootDir(File procfsRootDir) 
+                                        throws IOException { 
+    // cleanup any existing process root dir.
+    if (procfsRootDir.exists()) {
+      assertTrue(FileUtil.fullyDelete(procfsRootDir));  
+    }
+
+    // create afresh
+    assertTrue(procfsRootDir.mkdirs());
+  }
+
+  /**
+   * Create PID directories under the specified procfs root directory
+   * @param procfsRootDir root directory of procfs file system
+   * @param pids the PID directories to create.
+   * @throws IOException If PID dirs could not be created
+   */
+  public static void setupPidDirs(File procfsRootDir, String[] pids) 
+                      throws IOException {
+    for (String pid : pids) {
+      File pidDir = new File(procfsRootDir, pid);
+      pidDir.mkdir();
+      if (!pidDir.exists()) {
+        throw new IOException ("couldn't make process directory under " +
+            "fake procfs");
+      } else {
+        LOG.info("created pid dir");
+      }
+    }
+  }
+  
+  /**
+   * Write stat files under the specified pid directories with data
+   * setup in the corresponding ProcessStatInfo objects
+   * @param procfsRootDir root directory of procfs file system
+   * @param pids the PID directories under which to create the stat file
+   * @param procs corresponding ProcessStatInfo objects whose data should be
+   *              written to the stat files.
+   * @throws IOException if stat files could not be written
+   */
+  public static void writeStatFiles(File procfsRootDir, String[] pids, 
+                              ProcessStatInfo[] procs) throws IOException {
+    for (int i=0; i<pids.length; i++) {
+      File statFile = new File(new File(procfsRootDir, pids[i]), "stat");
+      BufferedWriter bw = null;
+      try {
+        FileWriter fw = new FileWriter(statFile);
+        bw = new BufferedWriter(fw);
+        bw.write(procs[i].getStatLine());
+        LOG.info("wrote stat file for " + pids[i] + 
+                  " with contents: " + procs[i].getStatLine());
+      } finally {
+        // not handling exception - will throw an error and fail the test.
+        if (bw != null) {
+          bw.close();
+        }
+      }
+    }
+  }
 }