You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cu...@apache.org on 2006/02/24 00:36:13 UTC

svn commit: r380272 - in /lucene/hadoop/trunk: conf/hadoop-default.xml src/java/org/apache/hadoop/mapred/TaskRunner.java

Author: cutting
Date: Thu Feb 23 15:36:08 2006
New Revision: 380272

URL: http://svn.apache.org/viewcvs?rev=380272&view=rev
Log:
Fix for HADOOP-41.  Support passing more options to child JVM.  Contributed by Michael Stack.

Modified:
    lucene/hadoop/trunk/conf/hadoop-default.xml
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskRunner.java

Modified: lucene/hadoop/trunk/conf/hadoop-default.xml
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/conf/hadoop-default.xml?rev=380272&r1=380271&r2=380272&view=diff
==============================================================================
--- lucene/hadoop/trunk/conf/hadoop-default.xml (original)
+++ lucene/hadoop/trunk/conf/hadoop-default.xml Thu Feb 23 15:36:08 2006
@@ -190,10 +190,21 @@
 </property>
 
 <property>
-  <name>mapred.child.heap.size</name>
-  <value>200m</value>
-  <description>The heap size (-Xmx) that will be used for task tracker
-  child processes.</description>
+  <name>mapred.child.java.opts</name>
+  <value>-Xmx200m</value>
+  <description>Java opts for the task tracker child processes.  Subsumes
+  'mapred.child.heap.size' (If a mapred.child.heap.size value is found
+  in a configuration, its maximum heap size will be used and a warning
+  emitted that heap.size has been deprecated). Also, the following symbols,
+  if present, will be interpolated: @taskid@ is replaced by current TaskID;
+  and @port@ will be replaced by mapred.task.tracker.report.port + 1 (A second
+  child will fail with a port-in-use if mapred.tasktracker.tasks.maximum is
+  greater than one). Any other occurrences of '@' will go unchanged. For
+  example, to enable verbose gc logging to a file named for the taskid in
+  /tmp and to set the heap maximum to be a gigabyte, pass a 'value' of:
+
+        -Xmx1024m -verbose:gc -Xloggc:/tmp/@taskid@.gc
+  </description>
 </property>
 
 <property>

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskRunner.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskRunner.java?rev=380272&r1=380271&r2=380272&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskRunner.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskRunner.java Thu Feb 23 15:36:08 2006
@@ -21,6 +21,7 @@
 
 import java.io.*;
 import java.util.logging.*;
+import java.util.Vector;
 
 /** Base class that runs a task in a separate process.  Tasks are run in a
  * separate process in order to isolate the map/reduce system code from bugs in
@@ -90,20 +91,56 @@
         classPath.append(workDir);
       }
 
+      //  Build exec child jmv args.
+      Vector vargs = new Vector(8);
       File jvm =                                  // use same jvm as parent
         new File(new File(System.getProperty("java.home"), "bin"), "java");
-        
-      // run java
-      runChild(new String[] {
-        jvm.toString(),
-        //"-Xrunhprof:cpu=samples,file="+t.getTaskId()+".prof",
-        "-Xmx"+job.get("mapred.child.heap.size", "200m"),
-        "-cp", classPath.toString(),
-        TaskTracker.Child.class.getName(),        // main is Child
-        tracker.taskReportPort+"",                // pass umbilical port
-        t.getTaskId()                             // pass task identifier
-      }, workDir);
 
+      vargs.add(jvm.toString());
+
+      // Add child java ops.  Also, mapred.child.heap.size has been superceded
+      // by // mapred.child.java.opts.  Manage case where both are present
+      // letting the mapred.child.heap.size win over any setting of heap size in
+      // mapred.child.java.opts (Emit a warning that heap.size is deprecated).
+      //
+      // The following symbols if present in mapred.child.java.opts value are
+      // replaced:
+      // + @taskid@ is interpolated with value of TaskID.
+      // + Replaces @port@ with mapred.task.tracker.report.port + 1.
+      // Other occurrences of @ will not be altered.
+      //
+      // Example with multiple arguments and substitutions, showing
+      // jvm GC logging, and start of a passwordless JVM JMX agent so can
+      // connect with jconsole and the likes to watch child memory, threads
+      // and get thread dumps.
+      //
+      //     <name>mapred.child.optional.jvm.args</name>
+      //     <value>-verbose:gc -Xloggc:/tmp/@taskid@.gc \
+      //     -Dcom.sun.management.jmxremote.authenticate=false \
+      //     -Dcom.sun.management.jmxremote.ssl=false \
+      //     -Dcom.sun.management.jmxremote.port=@port@
+      //     </value>
+      //
+      String javaOpts = handleDeprecatedHeapSize(
+          job.get("mapred.child.java.opts", "-Xmx200m"),
+          job.get("mapred.child.heap.size"));
+      javaOpts = replaceAll(javaOpts, "@taskid@", t.getTaskId());
+      int port = job.getInt("mapred.task.tracker.report.port", 50050) + 1;
+      javaOpts = replaceAll(javaOpts, "@port@", Integer.toString(port));
+      String [] javaOptsSplit = javaOpts.split(" ");
+      for (int i = 0; i < javaOptsSplit.length; i++) {
+         vargs.add(javaOptsSplit[i]);
+      }
+
+      // Add classpath.
+      vargs.add("-classpath");
+      vargs.add(classPath.toString());
+      // Add main class and its arguments 
+      vargs.add(TaskTracker.Child.class.getName());  // main of Child
+      vargs.add(tracker.taskReportPort + "");        // pass umbilical port
+      vargs.add(t.getTaskId());                      // pass task identifier
+      // Run java
+      runChild((String[])vargs.toArray(new String[0]), workDir);
     } catch (FSError e) {
       LOG.log(Level.SEVERE, "FSError", e);
       try {
@@ -123,6 +160,65 @@
     } finally {
       tracker.reportTaskFinished(t.getTaskId());
     }
+  }
+
+  /**
+   * Handle deprecated mapred.child.heap.size.
+   * If present, interpolate into mapred.child.java.opts value with
+   * warning.
+   * @param javaOpts Value of mapred.child.java.opts property.
+   * @param heapSize Value of mapred.child.heap.size property.
+   * @return A <code>javaOpts</code> with <code>heapSize</code>
+   * interpolated if present.
+   */
+  private String handleDeprecatedHeapSize(String javaOpts,
+          final String heapSize) {
+    if (heapSize == null || heapSize.length() <= 0) {
+        return javaOpts;
+    }
+    final String MX = "-Xmx";
+    int index = javaOpts.indexOf(MX);
+    if (index < 0) {
+        javaOpts = javaOpts + " " + MX + heapSize;
+    } else {
+        int end = javaOpts.indexOf(" ", index + MX.length());
+        javaOpts = javaOpts.substring(0, index + MX.length()) +
+            heapSize + ((end < 0)? "": javaOpts.substring(end));
+    }
+    LOG.warning("mapred.child.heap.size is deprecated. Use " +
+        "mapred.child.heap.size instead. Meantime, interpolated " +
+        "child.heap.size into child.java.opt: " + javaOpts);
+    return javaOpts;
+  }
+
+  /**
+   * Replace <code>toFind</code> with <code>replacement</code>.
+   * When hadoop moves to JDK1.5, replace this method with
+   * String#replace (Of is commons-lang available, replace with
+   * StringUtils#replace). 
+   * @param text String to do replacements in.
+   * @param toFind String to find.
+   * @param replacement String to replace <code>toFind</code> with.
+   * @return A String with all instances of <code>toFind</code>
+   * replaced by <code>replacement</code> (The original
+   * <code>text</code> is returned if <code>toFind</code> is not
+   * found in <code>text<code>).
+   */
+  private static String replaceAll(String text, final String toFind,
+      final String replacement) {
+    if (text ==  null || toFind ==  null || replacement ==  null) {
+      throw new IllegalArgumentException("Text " + text + " or toFind " +
+        toFind + " or replacement " + replacement + " are null.");
+    }
+    int offset = 0;
+    for (int index = text.indexOf(toFind); index >= 0;
+          index = text.indexOf(toFind, offset)) {
+      offset = index + toFind.length();
+      text = text.substring(0, index) + replacement +
+          text.substring(offset);
+        
+    }
+    return text;
   }
 
   /**