You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by sh...@apache.org on 2010/06/02 09:45:44 UTC
svn commit: r950400 - in /hadoop/mapreduce/trunk: ./
src/contrib/streaming/src/java/org/apache/hadoop/streaming/
src/contrib/streaming/src/test/org/apache/hadoop/streaming/
src/docs/src/documentation/content/xdocs/
Author: sharad
Date: Wed Jun 2 07:45:44 2010
New Revision: 950400
URL: http://svn.apache.org/viewvc?rev=950400&view=rev
Log:
MAPREDUCE-1785. Add streaming config option for not emitting the key. Contributed by Eli Collins.
Modified:
hadoop/mapreduce/trunk/CHANGES.txt
hadoop/mapreduce/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/PipeMapper.java
hadoop/mapreduce/trunk/src/contrib/streaming/src/test/org/apache/hadoop/streaming/TestStreamingKeyValue.java
hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/streaming.xml
Modified: hadoop/mapreduce/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/CHANGES.txt?rev=950400&r1=950399&r2=950400&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/CHANGES.txt (original)
+++ hadoop/mapreduce/trunk/CHANGES.txt Wed Jun 2 07:45:44 2010
@@ -32,6 +32,9 @@ Trunk (unreleased changes)
MAPREDUCE-1773. streaming doesn't support jobclient.output.filter.
(Amareshwari Sriramadasu via vinodkv)
+ MAPREDUCE-1785. Add streaming config option for not emitting the key.
+ (Eli Collins via sharad)
+
OPTIMIZATIONS
MAPREDUCE-1354. Enhancements to JobTracker for better performance and
Modified: hadoop/mapreduce/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/PipeMapper.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/PipeMapper.java?rev=950400&r1=950399&r2=950400&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/PipeMapper.java (original)
+++ hadoop/mapreduce/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/PipeMapper.java Wed Jun 2 07:45:44 2010
@@ -72,7 +72,8 @@ public class PipeMapper extends PipeMapR
skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false);
if (mapInputWriterClass_.getCanonicalName().equals(TextInputWriter.class.getCanonicalName())) {
String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class).getCanonicalName();
- ignoreKey = inputFormatClassName.equals(TextInputFormat.class.getCanonicalName());
+ ignoreKey = job.getBoolean("stream.map.input.ignoreKey",
+ inputFormatClassName.equals(TextInputFormat.class.getCanonicalName()));
}
try {
Modified: hadoop/mapreduce/trunk/src/contrib/streaming/src/test/org/apache/hadoop/streaming/TestStreamingKeyValue.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/streaming/src/test/org/apache/hadoop/streaming/TestStreamingKeyValue.java?rev=950400&r1=950399&r2=950400&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/streaming/src/test/org/apache/hadoop/streaming/TestStreamingKeyValue.java (original)
+++ hadoop/mapreduce/trunk/src/contrib/streaming/src/test/org/apache/hadoop/streaming/TestStreamingKeyValue.java Wed Jun 2 07:45:44 2010
@@ -43,11 +43,17 @@ public class TestStreamingKeyValue
"roses are \tred\t\n\tviolets are blue\nbunnies are pink\n" +
"this is for testing a big\tinput line\n" +
"small input\n";
- protected String outputExpect =
+ protected String outputWithoutKey =
"\tviolets are blue\nbunnies are pink\t\n" +
"roses are \tred\t\n" +
"small input\t\n" +
"this is for testing a big\tinput line\n";
+ protected String outputWithKey =
+ "0\troses are \tred\t\n" +
+ "16\t\tviolets are blue\n" +
+ "34\tbunnies are pink\n" +
+ "51\tthis is for testing a big\tinput line\n" +
+ "88\tsmall input\n";
private StreamJob job;
@@ -66,19 +72,20 @@ public class TestStreamingKeyValue
out.close();
}
- protected String[] genArgs() {
+ protected String[] genArgs(boolean ignoreKey) {
return new String[] {
"-input", INPUT_FILE.getAbsolutePath(),
"-output", OUTPUT_DIR.getAbsolutePath(),
"-mapper", "cat",
"-jobconf", MRJobConfig.PRESERVE_FAILED_TASK_FILES + "=true",
"-jobconf", "stream.non.zero.exit.is.failure=true",
- "-jobconf", "stream.tmpdir="+System.getProperty("test.build.data","/tmp")
+ "-jobconf", "stream.tmpdir="+System.getProperty("test.build.data","/tmp"),
+ "-jobconf", "stream.map.input.ignoreKey="+ignoreKey,
};
}
- @Test
- public void testCommandLine() throws Exception
+ public void runStreamJob(final String outputExpect, boolean ignoreKey)
+ throws Exception
{
String outFileName = "part-00000";
File outFile = null;
@@ -93,7 +100,7 @@ public class TestStreamingKeyValue
// During tests, the default Configuration will use a local mapred
// So don't specify -config or -cluster
- job = new StreamJob(genArgs(), mayExit);
+ job = new StreamJob(genArgs(ignoreKey), mayExit);
job.go();
outFile = new File(OUTPUT_DIR, outFileName).getAbsoluteFile();
String output = StreamUtil.slurp(outFile);
@@ -106,9 +113,27 @@ public class TestStreamingKeyValue
}
}
- public static void main(String[]args) throws Exception
+ /**
+ * Run the job with the indicating the input format key should be emitted.
+ */
+ @Test
+ public void testCommandLineWithKey() throws Exception
{
- new TestStreamingKeyValue().testCommandLine();
+ runStreamJob(outputWithKey, false);
}
+ /**
+ * Run the job the default way (the input format key is not emitted).
+ */
+ @Test
+ public void testCommandLineWithoutKey() throws Exception
+ {
+ runStreamJob(outputWithoutKey, true);
+ }
+
+ public static void main(String[]args) throws Exception
+ {
+ new TestStreamingKeyValue().testCommandLineWithKey();
+ new TestStreamingKeyValue().testCommandLineWithoutKey();
+ }
}
Modified: hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/streaming.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/streaming.xml?rev=950400&r1=950399&r2=950400&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/streaming.xml (original)
+++ hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/streaming.xml Wed Jun 2 07:45:44 2010
@@ -310,6 +310,7 @@ the nth field separator in a line of the
</p>
<p> Similarly, you can specify "stream.map.input.field.separator" and "stream.reduce.input.field.separator" as the input separator for MapReduce
inputs. By default the separator is the tab character.</p>
+<p> You can specify "stream.map.input.ignoreKey" so the key is not emitted to the mapper when using text input. This is the default behavior when using TextInputFormat.</p>
</section>
<section>