You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cu...@apache.org on 2006/02/24 00:36:13 UTC
svn commit: r380272 - in /lucene/hadoop/trunk: conf/hadoop-default.xml
src/java/org/apache/hadoop/mapred/TaskRunner.java
Author: cutting
Date: Thu Feb 23 15:36:08 2006
New Revision: 380272
URL: http://svn.apache.org/viewcvs?rev=380272&view=rev
Log:
Fix for HADOOP-41. Support passing more options to child JVM. Contributed by Michael Stack.
Modified:
lucene/hadoop/trunk/conf/hadoop-default.xml
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskRunner.java
Modified: lucene/hadoop/trunk/conf/hadoop-default.xml
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/conf/hadoop-default.xml?rev=380272&r1=380271&r2=380272&view=diff
==============================================================================
--- lucene/hadoop/trunk/conf/hadoop-default.xml (original)
+++ lucene/hadoop/trunk/conf/hadoop-default.xml Thu Feb 23 15:36:08 2006
@@ -190,10 +190,21 @@
</property>
<property>
- <name>mapred.child.heap.size</name>
- <value>200m</value>
- <description>The heap size (-Xmx) that will be used for task tracker
- child processes.</description>
+ <name>mapred.child.java.opts</name>
+ <value>-Xmx200m</value>
+ <description>Java opts for the task tracker child processes. Subsumes
+ 'mapred.child.heap.size' (If a mapred.child.heap.size value is found
+ in a configuration, its maximum heap size will be used and a warning
+ emitted that heap.size has been deprecated). Also, the following symbols,
+ if present, will be interpolated: @taskid@ is replaced by current TaskID;
+ and @port@ will be replaced by mapred.task.tracker.report.port + 1 (A second
+ child will fail with a port-in-use if mapred.tasktracker.tasks.maximum is
+ greater than one). Any other occurrences of '@' will go unchanged. For
+ example, to enable verbose gc logging to a file named for the taskid in
+ /tmp and to set the heap maximum to be a gigabyte, pass a 'value' of:
+
+ -Xmx1024m -verbose:gc -Xloggc:/tmp/@taskid@.gc
+ </description>
</property>
<property>
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskRunner.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskRunner.java?rev=380272&r1=380271&r2=380272&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskRunner.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskRunner.java Thu Feb 23 15:36:08 2006
@@ -21,6 +21,7 @@
import java.io.*;
import java.util.logging.*;
+import java.util.Vector;
/** Base class that runs a task in a separate process. Tasks are run in a
* separate process in order to isolate the map/reduce system code from bugs in
@@ -90,20 +91,56 @@
classPath.append(workDir);
}
+ // Build exec child jmv args.
+ Vector vargs = new Vector(8);
File jvm = // use same jvm as parent
new File(new File(System.getProperty("java.home"), "bin"), "java");
-
- // run java
- runChild(new String[] {
- jvm.toString(),
- //"-Xrunhprof:cpu=samples,file="+t.getTaskId()+".prof",
- "-Xmx"+job.get("mapred.child.heap.size", "200m"),
- "-cp", classPath.toString(),
- TaskTracker.Child.class.getName(), // main is Child
- tracker.taskReportPort+"", // pass umbilical port
- t.getTaskId() // pass task identifier
- }, workDir);
+ vargs.add(jvm.toString());
+
+ // Add child java ops. Also, mapred.child.heap.size has been superceded
+ // by // mapred.child.java.opts. Manage case where both are present
+ // letting the mapred.child.heap.size win over any setting of heap size in
+ // mapred.child.java.opts (Emit a warning that heap.size is deprecated).
+ //
+ // The following symbols if present in mapred.child.java.opts value are
+ // replaced:
+ // + @taskid@ is interpolated with value of TaskID.
+ // + Replaces @port@ with mapred.task.tracker.report.port + 1.
+ // Other occurrences of @ will not be altered.
+ //
+ // Example with multiple arguments and substitutions, showing
+ // jvm GC logging, and start of a passwordless JVM JMX agent so can
+ // connect with jconsole and the likes to watch child memory, threads
+ // and get thread dumps.
+ //
+ // <name>mapred.child.optional.jvm.args</name>
+ // <value>-verbose:gc -Xloggc:/tmp/@taskid@.gc \
+ // -Dcom.sun.management.jmxremote.authenticate=false \
+ // -Dcom.sun.management.jmxremote.ssl=false \
+ // -Dcom.sun.management.jmxremote.port=@port@
+ // </value>
+ //
+ String javaOpts = handleDeprecatedHeapSize(
+ job.get("mapred.child.java.opts", "-Xmx200m"),
+ job.get("mapred.child.heap.size"));
+ javaOpts = replaceAll(javaOpts, "@taskid@", t.getTaskId());
+ int port = job.getInt("mapred.task.tracker.report.port", 50050) + 1;
+ javaOpts = replaceAll(javaOpts, "@port@", Integer.toString(port));
+ String [] javaOptsSplit = javaOpts.split(" ");
+ for (int i = 0; i < javaOptsSplit.length; i++) {
+ vargs.add(javaOptsSplit[i]);
+ }
+
+ // Add classpath.
+ vargs.add("-classpath");
+ vargs.add(classPath.toString());
+ // Add main class and its arguments
+ vargs.add(TaskTracker.Child.class.getName()); // main of Child
+ vargs.add(tracker.taskReportPort + ""); // pass umbilical port
+ vargs.add(t.getTaskId()); // pass task identifier
+ // Run java
+ runChild((String[])vargs.toArray(new String[0]), workDir);
} catch (FSError e) {
LOG.log(Level.SEVERE, "FSError", e);
try {
@@ -123,6 +160,65 @@
} finally {
tracker.reportTaskFinished(t.getTaskId());
}
+ }
+
+ /**
+ * Handle deprecated mapred.child.heap.size.
+ * If present, interpolate into mapred.child.java.opts value with
+ * warning.
+ * @param javaOpts Value of mapred.child.java.opts property.
+ * @param heapSize Value of mapred.child.heap.size property.
+ * @return A <code>javaOpts</code> with <code>heapSize</code>
+ * interpolated if present.
+ */
+ private String handleDeprecatedHeapSize(String javaOpts,
+ final String heapSize) {
+ if (heapSize == null || heapSize.length() <= 0) {
+ return javaOpts;
+ }
+ final String MX = "-Xmx";
+ int index = javaOpts.indexOf(MX);
+ if (index < 0) {
+ javaOpts = javaOpts + " " + MX + heapSize;
+ } else {
+ int end = javaOpts.indexOf(" ", index + MX.length());
+ javaOpts = javaOpts.substring(0, index + MX.length()) +
+ heapSize + ((end < 0)? "": javaOpts.substring(end));
+ }
+ LOG.warning("mapred.child.heap.size is deprecated. Use " +
+ "mapred.child.heap.size instead. Meantime, interpolated " +
+ "child.heap.size into child.java.opt: " + javaOpts);
+ return javaOpts;
+ }
+
+ /**
+ * Replace <code>toFind</code> with <code>replacement</code>.
+ * When hadoop moves to JDK1.5, replace this method with
+ * String#replace (Of is commons-lang available, replace with
+ * StringUtils#replace).
+ * @param text String to do replacements in.
+ * @param toFind String to find.
+ * @param replacement String to replace <code>toFind</code> with.
+ * @return A String with all instances of <code>toFind</code>
+ * replaced by <code>replacement</code> (The original
+ * <code>text</code> is returned if <code>toFind</code> is not
+ * found in <code>text<code>).
+ */
+ private static String replaceAll(String text, final String toFind,
+ final String replacement) {
+ if (text == null || toFind == null || replacement == null) {
+ throw new IllegalArgumentException("Text " + text + " or toFind " +
+ toFind + " or replacement " + replacement + " are null.");
+ }
+ int offset = 0;
+ for (int index = text.indexOf(toFind); index >= 0;
+ index = text.indexOf(toFind, offset)) {
+ offset = index + toFind.length();
+ text = text.substring(0, index) + replacement +
+ text.substring(offset);
+
+ }
+ return text;
}
/**