You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by vi...@apache.org on 2010/06/08 08:33:57 UTC
svn commit: r952548 - in /hadoop/mapreduce/trunk: CHANGES.txt
src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamJob.java
src/docs/src/documentation/content/xdocs/streaming.xml
Author: vinodkv
Date: Tue Jun 8 06:33:57 2010
New Revision: 952548
URL: http://svn.apache.org/viewvc?rev=952548&view=rev
Log:
MAPREDUCE-1697. Document the behavior of -file option and deprecate it in favour of -files option in streaming. Contributed by Amareshwari Sriramadasu.
Modified:
hadoop/mapreduce/trunk/CHANGES.txt
hadoop/mapreduce/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamJob.java
hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/streaming.xml
Modified: hadoop/mapreduce/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/CHANGES.txt?rev=952548&r1=952547&r2=952548&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/CHANGES.txt (original)
+++ hadoop/mapreduce/trunk/CHANGES.txt Tue Jun 8 06:33:57 2010
@@ -825,6 +825,10 @@ Release 0.21.0 - Unreleased
MAPREDUCE-1404. Move Cluster-Setup and Single-Node-Setup Docs from
MapReduce to Common. (tomwhite)
+ MAPREDUCE-1697. Document the behavior of -file option and deprecate it
+ in favour of -files option in streaming. (Amareshwari Sriramadasu
+ via vinodkv)
+
OPTIMIZATIONS
MAPREDUCE-270. Fix the tasktracker to optionally send an out-of-band
Modified: hadoop/mapreduce/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamJob.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamJob.java?rev=952548&r1=952547&r2=952548&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamJob.java (original)
+++ hadoop/mapreduce/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamJob.java Tue Jun 8 06:33:57 2010
@@ -270,6 +270,8 @@ public class StreamJob implements Tool {
values = cmdLine.getOptionValues("file");
if (values != null && values.length > 0) {
+ LOG.warn("-file option is deprecated, please use generic option" +
+ " -files instead.");
StringBuilder unpackRegex = new StringBuilder(
config_.getPattern(MRJobConfig.JAR_UNPACK_PATTERN,
JobConf.UNPACK_JAR_PATTERN_DEFAULT).pattern());
@@ -475,7 +477,8 @@ public class StreamJob implements Tool {
System.out.println(" -combiner <cmd|JavaClassName>" +
" The streaming command to run");
System.out.println(" -reducer <cmd|JavaClassName> The streaming command to run");
- System.out.println(" -file <file> File/dir to be shipped in the Job jar file");
+ System.out.println(" -file <file> File/dir to be shipped in the" +
+ " Job jar file.\n Deprecated. Use generic option \"-files\" instead");
System.out.println(" -inputformat TextInputFormat(default)|SequenceFileAsTextInputFormat|JavaClassName Optional.");
System.out.println(" -outputformat TextOutputFormat(default)|JavaClassName Optional.");
System.out.println(" -partitioner JavaClassName Optional.");
@@ -506,7 +509,13 @@ public class StreamJob implements Tool {
System.out.println("Map output format, reduce input/output format:");
System.out.println(" Format defined by what the mapper command outputs. Line-oriented");
System.out.println();
- System.out.println("The files named in the -file argument[s] end up in the");
+ System.out.println("The files with extensions .class and .jar/.zip, ");
+ System.out.println(" specified for the -file argument[s], end up in ");
+ System.out.println(" \"classes\" and \"lib\" directories respectively" +
+ " inside the");
+ System.out.println(" working directory when the mapper and reducer are run.");
+ System.out.println(" All other files specified for the -file argument[s]" +
+ " end up in the");
System.out.println(" working directory when the mapper and reducer are run.");
System.out.println(" The location of this working directory is unspecified.");
System.out.println();
Modified: hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/streaming.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/streaming.xml?rev=952548&r1=952547&r2=952548&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/streaming.xml (original)
+++ hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/streaming.xml Tue Jun 8 06:33:57 2010
@@ -157,6 +157,22 @@ $HADOOP_HOME/bin/hadoop jar $HADOOP_HOM
-file myPythonScript.py \
-file myDictionary.txt \
</source>
+<p>
+If files with extension .class are added using -file option, they are packaged
+into "classes" directory in the job jar. Similarly, if files with extension .jar
+or .zip are added using -file option they are packaged into "lib" directory in
+the job jar. When unjarred on tasktracker, symlinks to the directories "classes"
+and "lib" are created from the cwd of the task. All the files in classes and lib
+directories are automatically added to classpath of the task. If you'd like to
+explicitly access these .class or .jar/.zip files inside your mapper/reducer
+code, access them as ./lib/jarfile.jar or ./lib/zipfile.zip or
+./classes/classFile.class".
+</p>
+<p>
+All other files added using -file option (e.g, .txt or .pl) are packaged
+directly in the job jar. When unjarred on tasktracker, symlinks to these files
+are created from the cwd of the task.
+</p>
</section>
<section>