You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by sh...@apache.org on 2008/03/28 01:52:15 UTC

svn commit: r642053 - in /hadoop/core/trunk: CHANGES.txt src/java/org/apache/hadoop/mapred/JobConf.java src/java/org/apache/hadoop/util/StringUtils.java src/test/org/apache/hadoop/conf/TestJobConf.java src/test/org/apache/hadoop/util/TestStringUtils.java

Author: shv
Date: Thu Mar 27 17:52:11 2008
New Revision: 642053

URL: http://svn.apache.org/viewvc?rev=642053&view=rev
Log:
HADOOP-3064. Commas in a file path should not be treated as delimiters. Contributed by Hairong Kuang.

Added:
    hadoop/core/trunk/src/test/org/apache/hadoop/util/TestStringUtils.java
Modified:
    hadoop/core/trunk/CHANGES.txt
    hadoop/core/trunk/src/java/org/apache/hadoop/mapred/JobConf.java
    hadoop/core/trunk/src/java/org/apache/hadoop/util/StringUtils.java
    hadoop/core/trunk/src/test/org/apache/hadoop/conf/TestJobConf.java

Modified: hadoop/core/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=642053&r1=642052&r2=642053&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Thu Mar 27 17:52:11 2008
@@ -395,6 +395,9 @@
     HADOOP-3065. Better logging message if the rack location of a datanode
     cannot be determined.  (Devaraj Das via dhruba)
 
+    HADOOP-3064. Commas in a file path should not be treated as delimiters.
+    (Hairong Kuang via shv)
+
 Release 0.16.2 - Unreleased
 
   BUG FIXES

Modified: hadoop/core/trunk/src/java/org/apache/hadoop/mapred/JobConf.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/java/org/apache/hadoop/mapred/JobConf.java?rev=642053&r1=642052&r2=642053&view=diff
==============================================================================
--- hadoop/core/trunk/src/java/org/apache/hadoop/mapred/JobConf.java (original)
+++ hadoop/core/trunk/src/java/org/apache/hadoop/mapred/JobConf.java Thu Mar 27 17:52:11 2008
@@ -45,6 +45,7 @@
 import org.apache.hadoop.mapred.lib.IdentityReducer;
 import org.apache.hadoop.mapred.lib.HashPartitioner;
 import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.Tool;
 
 /** 
@@ -229,7 +230,7 @@
    */
   public void setInputPath(Path dir) {
     dir = new Path(getWorkingDirectory(), dir);
-    set("mapred.input.dir", dir.toString());
+    set("mapred.input.dir", StringUtils.escapeString(dir.toString()));
   }
 
   /**
@@ -240,8 +241,10 @@
    */
   public void addInputPath(Path dir) {
     dir = new Path(getWorkingDirectory(), dir);
+    String dirStr = StringUtils.escapeString(dir.toString());
     String dirs = get("mapred.input.dir");
-    set("mapred.input.dir", dirs == null ? dir.toString() : dirs + "," + dir);
+    set("mapred.input.dir", dirs == null ? dirStr :
+      dirs + StringUtils.COMMA_STR + dirStr);
   }
 
   /**
@@ -251,10 +254,10 @@
    */
   public Path[] getInputPaths() {
     String dirs = get("mapred.input.dir", "");
-    ArrayList list = Collections.list(new StringTokenizer(dirs, ","));
-    Path[] result = new Path[list.size()];
-    for (int i = 0; i < list.size(); i++) {
-      result[i] = new Path((String)list.get(i));
+    String [] list = StringUtils.split(dirs);
+    Path[] result = new Path[list.length];
+    for (int i = 0; i < list.length; i++) {
+      result[i] = new Path(StringUtils.unEscapeString(list[i]));
     }
     return result;
   }

Modified: hadoop/core/trunk/src/java/org/apache/hadoop/util/StringUtils.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/java/org/apache/hadoop/util/StringUtils.java?rev=642053&r1=642052&r2=642053&view=diff
==============================================================================
--- hadoop/core/trunk/src/java/org/apache/hadoop/util/StringUtils.java (original)
+++ hadoop/core/trunk/src/java/org/apache/hadoop/util/StringUtils.java Thu Mar 27 17:52:11 2008
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.util;
 
+import java.io.IOException;
 import java.io.PrintWriter;
 import java.io.StringWriter;
 import java.net.InetAddress;
@@ -276,6 +277,142 @@
     return values.toArray(new String[values.size()]);
   }
 
+  final public static char COMMA = ',';
+  final public static String COMMA_STR = ",";
+  final public static char ESCAPE_CHAR = '\\';
+  
+  /**
+   * Split a string using the default separator
+   * @param str a string that may have escaped separator
+   * @return an array of strings
+   */
+  public static String[] split(String str) {
+    return split(str, ESCAPE_CHAR, COMMA);
+  }
+  
+  /**
+   * Split a string using the given separator
+   * @param str a string that may have escaped separator
+   * @param escapeChar a char that be used to escape the separator
+   * @param separator a separator char
+   * @return an array of strings
+   */
+  public static String[] split(
+      String str, char escapeChar, char separator) {
+    if (str==null) {
+      return null;
+    }
+    ArrayList<String> strList = new ArrayList<String>();
+    StringBuilder split = new StringBuilder();
+    int numPreEscapes = 0;
+    for (int i=0; i<str.length(); i++) {
+      char curChar = str.charAt(i);
+      if (numPreEscapes==0 && curChar == separator) { // separator 
+        strList.add(split.toString());
+        split.setLength(0); // clear the split
+      } else {
+        split.append(curChar);
+        numPreEscapes = (curChar == escapeChar)?(++numPreEscapes)%2:0;
+      }
+    }
+    strList.add(split.toString());
+    // remove trailing empty split(s)
+    int last = strList.size(); // last split
+    while (--last>=0 && "".equals(strList.get(last))) {
+      strList.remove(last);
+    }
+    return strList.toArray(new String[strList.size()]);
+  }
+  
+  /**
+   * Escape commas in the string using the default escape char
+   * @param str a string
+   * @return an escaped string
+   */
+  public static String escapeString(String str) {
+    return escapeString(str, ESCAPE_CHAR, COMMA);
+  }
+  
+  /**
+   * Escape <code>charToEscape</code> in the string 
+   * with the escape char <code>escapeChar</code>
+   * 
+   * @param str string
+   * @param escapeChar escape char
+   * @param charToEscape the char to be escaped
+   * @return an escaped string
+   */
+  public static String escapeString(
+      String str, char escapeChar, char charToEscape) {
+    if (str == null) {
+      return null;
+    }
+    StringBuilder result = new StringBuilder();
+    for (int i=0; i<str.length(); i++) {
+      char curChar = str.charAt(i);
+      if (curChar == escapeChar || curChar == charToEscape) {
+        // special char
+        result.append(escapeChar);
+      }
+      result.append(curChar);
+    }
+    return result.toString();
+  }
+  
+  /**
+   * Unescape commas in the string using the default escape char
+   * @param str a string
+   * @return an unescaped string
+   */
+  public static String unEscapeString(String str) {
+    return unEscapeString(str, ESCAPE_CHAR, COMMA);
+  }
+  
+  /**
+   * Unescape <code>charToEscape</code> in the string 
+   * with the escape char <code>escapeChar</code>
+   * 
+   * @param str string
+   * @param escapeChar escape char
+   * @param charToEscape the escaped char
+   * @return an unescaped string
+   */
+  public static String unEscapeString(
+      String str, char escapeChar, char charToEscape) {
+    if (str == null) {
+      return null;
+    }
+    StringBuilder result = new StringBuilder(str.length());
+    boolean hasPreEscape = false;
+    for (int i=0; i<str.length(); i++) {
+      char curChar = str.charAt(i);
+      if (hasPreEscape) {
+        if (curChar != escapeChar && curChar != charToEscape) {
+          // no special char
+          throw new IllegalArgumentException("Illegal escaped string " + str + 
+              " unescaped " + escapeChar + " at " + (i-1));
+        } 
+        // otherwise discard the escape char
+        result.append(curChar);
+        hasPreEscape = false;
+      } else {
+        if (curChar == charToEscape) {
+          throw new IllegalArgumentException("Illegal escaped string " + str + 
+              " unescaped " + charToEscape + " at " + i);
+        } else if (curChar == escapeChar) {
+          hasPreEscape = true;
+        } else {
+          result.append(curChar);
+        }
+      }
+    }
+    if (hasPreEscape ) {
+      throw new IllegalArgumentException("Illegal escaped string " + str + 
+          ", not expecting " + charToEscape + " in the end." );
+    }
+    return result.toString();
+  }
+  
   /**
    * Return hostname without throwing exception.
    * @return hostname

Modified: hadoop/core/trunk/src/test/org/apache/hadoop/conf/TestJobConf.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/org/apache/hadoop/conf/TestJobConf.java?rev=642053&r1=642052&r2=642053&view=diff
==============================================================================
--- hadoop/core/trunk/src/test/org/apache/hadoop/conf/TestJobConf.java (original)
+++ hadoop/core/trunk/src/test/org/apache/hadoop/conf/TestJobConf.java Thu Mar 27 17:52:11 2008
@@ -19,7 +19,10 @@
 
 import junit.framework.Assert;
 import junit.framework.TestCase;
+
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.util.StringUtils;
 
 public class TestJobConf extends TestCase {
 
@@ -49,5 +52,39 @@
     Assert.assertEquals("test", configuration.getProfileParams());
   }
 
-
+  public void testInputPath() throws Exception {
+    JobConf jobConf = new JobConf();
+    Path path = new Path(jobConf.getWorkingDirectory(), 
+        "xx{y"+StringUtils.COMMA_STR+"z}");
+    jobConf.setInputPath(path);
+    Path[] paths = jobConf.getInputPaths();
+    assertEquals(1, paths.length);
+    assertEquals(path.toString(), paths[0].toString());
+    
+    StringBuilder pathStr = new StringBuilder();
+    pathStr.append(StringUtils.ESCAPE_CHAR);
+    pathStr.append(StringUtils.ESCAPE_CHAR);
+    pathStr.append(StringUtils.COMMA);
+    pathStr.append(StringUtils.COMMA);
+    pathStr.append('a');
+    path = new Path(jobConf.getWorkingDirectory(), pathStr.toString());
+    jobConf.setInputPath(path);
+    paths = jobConf.getInputPaths();
+    assertEquals(1, paths.length);
+    assertEquals(path.toString(), paths[0].toString());
+    
+    pathStr.setLength(0);
+    pathStr.append(StringUtils.ESCAPE_CHAR);
+    pathStr.append("xx");
+    pathStr.append(StringUtils.ESCAPE_CHAR);
+    path = new Path(jobConf.getWorkingDirectory(), pathStr.toString());
+    Path path1 = new Path(jobConf.getWorkingDirectory(),
+        "yy"+StringUtils.COMMA_STR+"zz");
+    jobConf.setInputPath(path);
+    jobConf.addInputPath(path1);
+    paths = jobConf.getInputPaths();
+    assertEquals(2, paths.length);
+    assertEquals(path.toString(), paths[0].toString());
+    assertEquals(path1.toString(), paths[1].toString());
+  }
 }

Added: hadoop/core/trunk/src/test/org/apache/hadoop/util/TestStringUtils.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/org/apache/hadoop/util/TestStringUtils.java?rev=642053&view=auto
==============================================================================
--- hadoop/core/trunk/src/test/org/apache/hadoop/util/TestStringUtils.java (added)
+++ hadoop/core/trunk/src/test/org/apache/hadoop/util/TestStringUtils.java Thu Mar 27 17:52:11 2008
@@ -0,0 +1,107 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util;
+
+import junit.framework.TestCase;
+
+public class TestStringUtils extends TestCase {
+  final private static String NULL_STR = null;
+  final private static String EMPTY_STR = "";
+  final private static String STR_WO_SPECIAL_CHARS = "AB";
+  final private static String STR_WITH_COMMA = "A,B";
+  final private static String ESCAPED_STR_WITH_COMMA = "A\\,B";
+  final private static String STR_WITH_ESCAPE = "AB\\";
+  final private static String ESCAPED_STR_WITH_ESCAPE = "AB\\\\";
+  final private static String STR_WITH_BOTH2 = ",A\\,,B\\\\,";
+  final private static String ESCAPED_STR_WITH_BOTH2 = 
+    "\\,A\\\\\\,\\,B\\\\\\\\\\,";
+  
+  public void testEscapeString() throws Exception {
+    assertEquals(NULL_STR, StringUtils.escapeString(NULL_STR));
+    assertEquals(EMPTY_STR, StringUtils.escapeString(EMPTY_STR));
+    assertEquals(STR_WO_SPECIAL_CHARS,
+        StringUtils.escapeString(STR_WO_SPECIAL_CHARS));
+    assertEquals(ESCAPED_STR_WITH_COMMA,
+        StringUtils.escapeString(STR_WITH_COMMA));
+    assertEquals(ESCAPED_STR_WITH_ESCAPE,
+        StringUtils.escapeString(STR_WITH_ESCAPE));
+    assertEquals(ESCAPED_STR_WITH_BOTH2, 
+        StringUtils.escapeString(STR_WITH_BOTH2));
+  }
+  
+  public void testSplit() throws Exception {
+    assertEquals(NULL_STR, StringUtils.split(NULL_STR));
+    String[] splits = StringUtils.split(EMPTY_STR);
+    assertEquals(0, splits.length);
+    splits = StringUtils.split(",,");
+    assertEquals(0, splits.length);
+    splits = StringUtils.split(STR_WO_SPECIAL_CHARS);
+    assertEquals(1, splits.length);
+    assertEquals(STR_WO_SPECIAL_CHARS, splits[0]);
+    splits = StringUtils.split(STR_WITH_COMMA);
+    assertEquals(2, splits.length);
+    assertEquals("A", splits[0]);
+    assertEquals("B", splits[1]);
+    splits = StringUtils.split(ESCAPED_STR_WITH_COMMA);
+    assertEquals(1, splits.length);
+    assertEquals(ESCAPED_STR_WITH_COMMA, splits[0]);
+    splits = StringUtils.split(STR_WITH_ESCAPE);
+    assertEquals(1, splits.length);
+    assertEquals(STR_WITH_ESCAPE, splits[0]);
+    splits = StringUtils.split(STR_WITH_BOTH2);
+    assertEquals(3, splits.length);
+    assertEquals(EMPTY_STR, splits[0]);
+    assertEquals("A\\,", splits[1]);
+    assertEquals("B\\\\", splits[2]);
+    splits = StringUtils.split(ESCAPED_STR_WITH_BOTH2);
+    assertEquals(1, splits.length);
+    assertEquals(ESCAPED_STR_WITH_BOTH2, splits[0]);    
+  }
+  
+  public void testUnescapeString() throws Exception {
+    assertEquals(NULL_STR, StringUtils.unEscapeString(NULL_STR));
+    assertEquals(EMPTY_STR, StringUtils.unEscapeString(EMPTY_STR));
+    assertEquals(STR_WO_SPECIAL_CHARS,
+        StringUtils.unEscapeString(STR_WO_SPECIAL_CHARS));
+    try {
+      StringUtils.unEscapeString(STR_WITH_COMMA);
+      fail("Should throw IllegalArgumentException");
+    } catch (IllegalArgumentException e) {
+      // expected
+    }
+    assertEquals(STR_WITH_COMMA,
+        StringUtils.unEscapeString(ESCAPED_STR_WITH_COMMA));
+    try {
+      StringUtils.unEscapeString(STR_WITH_ESCAPE);
+      fail("Should throw IllegalArgumentException");
+    } catch (IllegalArgumentException e) {
+      // expected
+    }
+    assertEquals(STR_WITH_ESCAPE,
+        StringUtils.unEscapeString(ESCAPED_STR_WITH_ESCAPE));
+    try {
+      StringUtils.unEscapeString(STR_WITH_BOTH2);
+      fail("Should throw IllegalArgumentException");
+    } catch (IllegalArgumentException e) {
+      // expected
+    }
+    assertEquals(STR_WITH_BOTH2,
+        StringUtils.unEscapeString(ESCAPED_STR_WITH_BOTH2));
+  }
+}
\ No newline at end of file