You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by vi...@apache.org on 2010/06/08 08:33:57 UTC

svn commit: r952548 - in /hadoop/mapreduce/trunk: CHANGES.txt src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamJob.java src/docs/src/documentation/content/xdocs/streaming.xml

Author: vinodkv
Date: Tue Jun  8 06:33:57 2010
New Revision: 952548

URL: http://svn.apache.org/viewvc?rev=952548&view=rev
Log:
MAPREDUCE-1697. Document the behavior of -file option and deprecate it in favour of -files option in streaming. Contributed by Amareshwari Sriramadasu.

Modified:
    hadoop/mapreduce/trunk/CHANGES.txt
    hadoop/mapreduce/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamJob.java
    hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/streaming.xml

Modified: hadoop/mapreduce/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/CHANGES.txt?rev=952548&r1=952547&r2=952548&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/CHANGES.txt (original)
+++ hadoop/mapreduce/trunk/CHANGES.txt Tue Jun  8 06:33:57 2010
@@ -825,6 +825,10 @@ Release 0.21.0 - Unreleased
     MAPREDUCE-1404.  Move Cluster-Setup and Single-Node-Setup Docs from
     MapReduce to Common.  (tomwhite)
 
+    MAPREDUCE-1697. Document the behavior of -file option and deprecate it
+    in favour of -files option in streaming. (Amareshwari Sriramadasu
+    via vinodkv)
+
   OPTIMIZATIONS
 
     MAPREDUCE-270. Fix the tasktracker to optionally send an out-of-band

Modified: hadoop/mapreduce/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamJob.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamJob.java?rev=952548&r1=952547&r2=952548&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamJob.java (original)
+++ hadoop/mapreduce/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamJob.java Tue Jun  8 06:33:57 2010
@@ -270,6 +270,8 @@ public class StreamJob implements Tool {
       
       values = cmdLine.getOptionValues("file");
       if (values != null && values.length > 0) {
+        LOG.warn("-file option is deprecated, please use generic option" +
+        		" -files instead.");
         StringBuilder unpackRegex = new StringBuilder(
           config_.getPattern(MRJobConfig.JAR_UNPACK_PATTERN,
                              JobConf.UNPACK_JAR_PATTERN_DEFAULT).pattern());
@@ -475,7 +477,8 @@ public class StreamJob implements Tool {
     System.out.println("  -combiner <cmd|JavaClassName>" + 
                        " The streaming command to run");
     System.out.println("  -reducer  <cmd|JavaClassName>      The streaming command to run");
-    System.out.println("  -file     <file>     File/dir to be shipped in the Job jar file");
+    System.out.println("  -file     <file>     File/dir to be shipped in the" +
+    		" Job jar file.\n Deprecated. Use generic option \"-files\" instead");
     System.out.println("  -inputformat TextInputFormat(default)|SequenceFileAsTextInputFormat|JavaClassName Optional.");
     System.out.println("  -outputformat TextOutputFormat(default)|JavaClassName  Optional.");
     System.out.println("  -partitioner JavaClassName  Optional.");
@@ -506,7 +509,13 @@ public class StreamJob implements Tool {
     System.out.println("Map output format, reduce input/output format:");
     System.out.println("  Format defined by what the mapper command outputs. Line-oriented");
     System.out.println();
-    System.out.println("The files named in the -file argument[s] end up in the");
+    System.out.println("The files with extensions .class and .jar/.zip, ");
+    System.out.println("  specified for the -file argument[s], end up in ");
+    System.out.println("  \"classes\" and \"lib\" directories respectively" +
+    		" inside the");
+    System.out.println("  working directory when the mapper and reducer are run.");
+    System.out.println("  All other files specified for the -file argument[s]" +
+    		" end up in the");
     System.out.println("  working directory when the mapper and reducer are run.");
     System.out.println("  The location of this working directory is unspecified.");
     System.out.println();

Modified: hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/streaming.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/streaming.xml?rev=952548&r1=952547&r2=952548&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/streaming.xml (original)
+++ hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/streaming.xml Tue Jun  8 06:33:57 2010
@@ -157,6 +157,22 @@ $HADOOP_HOME/bin/hadoop  jar $HADOOP_HOM
     -file myPythonScript.py \
     -file myDictionary.txt \
 </source>
+<p>
+If files with extension .class are added using -file option, they are packaged
+into "classes" directory in the job jar. Similarly, if files with extension .jar
+or .zip are added using -file option they are packaged into "lib" directory in
+the job jar. When unjarred on tasktracker, symlinks to the directories "classes"
+and "lib" are created from the cwd of the task. All the files in classes and lib
+directories are automatically added to classpath of the task. If you'd like to 
+explicitly access these .class or .jar/.zip files inside your mapper/reducer
+code, access them as ./lib/jarfile.jar or ./lib/zipfile.zip or 
+./classes/classFile.class".
+</p>
+<p>
+All other files added using -file option (e.g, .txt or .pl) are packaged
+directly in the job jar. When unjarred on tasktracker, symlinks to these files
+are created from the cwd of the task.
+</p>
 </section>
 
 <section>