You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by ac...@apache.org on 2011/09/21 03:10:50 UTC

svn commit: r1173451 - in /hadoop/common/trunk/hadoop-mapreduce-project: CHANGES.txt src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamJob.java

Author: acmurthy
Date: Wed Sep 21 01:10:49 2011
New Revision: 1173451

URL: http://svn.apache.org/viewvc?rev=1173451&view=rev
Log:
MAPREDUCE-3018. Fixed -file option for streaming. Contributed by Mahadev Konar.

Modified:
    hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt
    hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamJob.java

Modified: hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt?rev=1173451&r1=1173450&r2=1173451&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt (original)
+++ hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt Wed Sep 21 01:10:49 2011
@@ -1370,6 +1370,8 @@ Release 0.23.0 - Unreleased
     YarnClientProtocolProvider and ensured MiniMRYarnCluster sets JobHistory
     configuration for tests. (acmurthy) 
 
+    MAPREDUCE-3018. Fixed -file option for streaming. (mahadev via acmurthy) 
+
 Release 0.22.0 - Unreleased
 
   INCOMPATIBLE CHANGES

Modified: hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamJob.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamJob.java?rev=1173451&r1=1173450&r2=1173451&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamJob.java (original)
+++ hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamJob.java Wed Sep 21 01:10:49 2011
@@ -22,8 +22,10 @@ import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.net.URI;
+import java.net.URISyntaxException;
 import java.net.URLEncoder;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
 import java.util.regex.Pattern;
@@ -43,6 +45,7 @@ import org.apache.hadoop.mapreduce.MRCon
 import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.filecache.DistributedCache;
 import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.FileAlreadyExistsException;
 import org.apache.hadoop.mapred.FileInputFormat;
@@ -277,19 +280,25 @@ public class StreamJob implements Tool {
       if (values != null && values.length > 0) {
         LOG.warn("-file option is deprecated, please use generic option" +
         		" -files instead.");
-        StringBuilder unpackRegex = new StringBuilder(
-          config_.getPattern(MRJobConfig.JAR_UNPACK_PATTERN,
-                             JobConf.UNPACK_JAR_PATTERN_DEFAULT).pattern());
+
+        String fileList = null;
         for (String file : values) {
           packageFiles_.add(file);
-          String fname = new File(file).getName();
-          unpackRegex.append("|(?:").append(Pattern.quote(fname)).append(")");
+          try {
+            URI pathURI = new URI(file);
+            Path path = new Path(pathURI);
+            FileSystem localFs = FileSystem.getLocal(config_);
+            String finalPath = path.makeQualified(localFs).toString();
+            fileList = fileList == null ? finalPath : fileList + "," + finalPath;
+          } catch (Exception e) {
+            throw new IllegalArgumentException(e);
+          }
         }
-        config_.setPattern(MRJobConfig.JAR_UNPACK_PATTERN,
-                           Pattern.compile(unpackRegex.toString()));
+        config_.set("tmpfiles", config_.get("tmpfiles", "") +
+                                  (fileList == null ? "" : fileList));
         validate(packageFiles_);
       }
-         
+
       String fsName = cmdLine.getOptionValue("dfs");
       if (null != fsName){
         LOG.warn("-dfs option is deprecated, please use -fs instead.");