You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by sh...@apache.org on 2010/06/02 09:45:44 UTC

svn commit: r950400 - in /hadoop/mapreduce/trunk: ./ src/contrib/streaming/src/java/org/apache/hadoop/streaming/ src/contrib/streaming/src/test/org/apache/hadoop/streaming/ src/docs/src/documentation/content/xdocs/

Author: sharad
Date: Wed Jun  2 07:45:44 2010
New Revision: 950400

URL: http://svn.apache.org/viewvc?rev=950400&view=rev
Log:
MAPREDUCE-1785. Add streaming config option for not emitting the key. Contributed by Eli Collins.

Modified:
    hadoop/mapreduce/trunk/CHANGES.txt
    hadoop/mapreduce/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/PipeMapper.java
    hadoop/mapreduce/trunk/src/contrib/streaming/src/test/org/apache/hadoop/streaming/TestStreamingKeyValue.java
    hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/streaming.xml

Modified: hadoop/mapreduce/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/CHANGES.txt?rev=950400&r1=950399&r2=950400&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/CHANGES.txt (original)
+++ hadoop/mapreduce/trunk/CHANGES.txt Wed Jun  2 07:45:44 2010
@@ -32,6 +32,9 @@ Trunk (unreleased changes)
     MAPREDUCE-1773. streaming doesn't support jobclient.output.filter.
     (Amareshwari Sriramadasu via vinodkv)
 
+    MAPREDUCE-1785. Add streaming config option for not emitting the key.
+    (Eli Collins via sharad)
+
   OPTIMIZATIONS
 
     MAPREDUCE-1354. Enhancements to JobTracker for better performance and

Modified: hadoop/mapreduce/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/PipeMapper.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/PipeMapper.java?rev=950400&r1=950399&r2=950400&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/PipeMapper.java (original)
+++ hadoop/mapreduce/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/PipeMapper.java Wed Jun  2 07:45:44 2010
@@ -72,7 +72,8 @@ public class PipeMapper extends PipeMapR
     skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false);
     if (mapInputWriterClass_.getCanonicalName().equals(TextInputWriter.class.getCanonicalName())) {
       String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class).getCanonicalName();
-      ignoreKey = inputFormatClassName.equals(TextInputFormat.class.getCanonicalName());
+      ignoreKey = job.getBoolean("stream.map.input.ignoreKey", 
+        inputFormatClassName.equals(TextInputFormat.class.getCanonicalName()));
     }
     
     try {

Modified: hadoop/mapreduce/trunk/src/contrib/streaming/src/test/org/apache/hadoop/streaming/TestStreamingKeyValue.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/streaming/src/test/org/apache/hadoop/streaming/TestStreamingKeyValue.java?rev=950400&r1=950399&r2=950400&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/streaming/src/test/org/apache/hadoop/streaming/TestStreamingKeyValue.java (original)
+++ hadoop/mapreduce/trunk/src/contrib/streaming/src/test/org/apache/hadoop/streaming/TestStreamingKeyValue.java Wed Jun  2 07:45:44 2010
@@ -43,11 +43,17 @@ public class TestStreamingKeyValue
     "roses are \tred\t\n\tviolets are blue\nbunnies are pink\n" +
     "this is for testing a big\tinput line\n" +
     "small input\n";
-  protected String outputExpect = 
+  protected String outputWithoutKey = 
     "\tviolets are blue\nbunnies are pink\t\n" + 
     "roses are \tred\t\n" +
     "small input\t\n" +
     "this is for testing a big\tinput line\n";
+  protected String outputWithKey = 
+    "0\troses are \tred\t\n" +  
+    "16\t\tviolets are blue\n" +
+    "34\tbunnies are pink\n" +
+    "51\tthis is for testing a big\tinput line\n" +
+    "88\tsmall input\n";
 
   private StreamJob job;
 
@@ -66,19 +72,20 @@ public class TestStreamingKeyValue
     out.close();
   }
 
-  protected String[] genArgs() {
+  protected String[] genArgs(boolean ignoreKey) {
     return new String[] {
       "-input", INPUT_FILE.getAbsolutePath(),
       "-output", OUTPUT_DIR.getAbsolutePath(),
       "-mapper", "cat",
       "-jobconf", MRJobConfig.PRESERVE_FAILED_TASK_FILES + "=true", 
       "-jobconf", "stream.non.zero.exit.is.failure=true",
-      "-jobconf", "stream.tmpdir="+System.getProperty("test.build.data","/tmp")
+      "-jobconf", "stream.tmpdir="+System.getProperty("test.build.data","/tmp"),
+      "-jobconf", "stream.map.input.ignoreKey="+ignoreKey,      
     };
   }
   
-  @Test
-  public void testCommandLine() throws Exception
+  public void runStreamJob(final String outputExpect, boolean ignoreKey) 
+      throws Exception
   {
     String outFileName = "part-00000";
     File outFile = null;
@@ -93,7 +100,7 @@ public class TestStreamingKeyValue
 
       // During tests, the default Configuration will use a local mapred
       // So don't specify -config or -cluster
-      job = new StreamJob(genArgs(), mayExit);      
+      job = new StreamJob(genArgs(ignoreKey), mayExit);      
       job.go();
       outFile = new File(OUTPUT_DIR, outFileName).getAbsoluteFile();
       String output = StreamUtil.slurp(outFile);
@@ -106,9 +113,27 @@ public class TestStreamingKeyValue
     }
   }
 
-  public static void main(String[]args) throws Exception
+  /**
+   * Run the job with the indicating the input format key should be emitted. 
+   */
+  @Test
+  public void testCommandLineWithKey() throws Exception
   {
-    new TestStreamingKeyValue().testCommandLine();
+    runStreamJob(outputWithKey, false);
   }
 
+  /**
+   * Run the job the default way (the input format key is not emitted).
+   */
+  @Test
+  public void testCommandLineWithoutKey() throws Exception
+  {
+      runStreamJob(outputWithoutKey, true);
+  }
+  
+  public static void main(String[]args) throws Exception
+  {
+    new TestStreamingKeyValue().testCommandLineWithKey();    
+    new TestStreamingKeyValue().testCommandLineWithoutKey();
+  }
 }

Modified: hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/streaming.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/streaming.xml?rev=950400&r1=950399&r2=950400&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/streaming.xml (original)
+++ hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/streaming.xml Wed Jun  2 07:45:44 2010
@@ -310,6 +310,7 @@ the nth field separator in a line of the
 </p>
 <p> Similarly, you can specify "stream.map.input.field.separator" and "stream.reduce.input.field.separator" as the input separator for MapReduce 
 inputs. By default the separator is the tab character.</p>
+<p> You can specify "stream.map.input.ignoreKey" so the key is not emitted to the mapper when using text input. This is the default behavior when using TextInputFormat.</p>
 </section>
 
 <section>