You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by sh...@apache.org on 2008/03/28 01:52:15 UTC
svn commit: r642053 - in /hadoop/core/trunk: CHANGES.txt
src/java/org/apache/hadoop/mapred/JobConf.java
src/java/org/apache/hadoop/util/StringUtils.java
src/test/org/apache/hadoop/conf/TestJobConf.java
src/test/org/apache/hadoop/util/TestStringUtils.java
Author: shv
Date: Thu Mar 27 17:52:11 2008
New Revision: 642053
URL: http://svn.apache.org/viewvc?rev=642053&view=rev
Log:
HADOOP-3064. Commas in a file path should not be treated as delimiters. Contributed by Hairong Kuang.
Added:
hadoop/core/trunk/src/test/org/apache/hadoop/util/TestStringUtils.java
Modified:
hadoop/core/trunk/CHANGES.txt
hadoop/core/trunk/src/java/org/apache/hadoop/mapred/JobConf.java
hadoop/core/trunk/src/java/org/apache/hadoop/util/StringUtils.java
hadoop/core/trunk/src/test/org/apache/hadoop/conf/TestJobConf.java
Modified: hadoop/core/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=642053&r1=642052&r2=642053&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Thu Mar 27 17:52:11 2008
@@ -395,6 +395,9 @@
HADOOP-3065. Better logging message if the rack location of a datanode
cannot be determined. (Devaraj Das via dhruba)
+ HADOOP-3064. Commas in a file path should not be treated as delimiters.
+ (Hairong Kuang via shv)
+
Release 0.16.2 - Unreleased
BUG FIXES
Modified: hadoop/core/trunk/src/java/org/apache/hadoop/mapred/JobConf.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/java/org/apache/hadoop/mapred/JobConf.java?rev=642053&r1=642052&r2=642053&view=diff
==============================================================================
--- hadoop/core/trunk/src/java/org/apache/hadoop/mapred/JobConf.java (original)
+++ hadoop/core/trunk/src/java/org/apache/hadoop/mapred/JobConf.java Thu Mar 27 17:52:11 2008
@@ -45,6 +45,7 @@
import org.apache.hadoop.mapred.lib.IdentityReducer;
import org.apache.hadoop.mapred.lib.HashPartitioner;
import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Tool;
/**
@@ -229,7 +230,7 @@
*/
public void setInputPath(Path dir) {
dir = new Path(getWorkingDirectory(), dir);
- set("mapred.input.dir", dir.toString());
+ set("mapred.input.dir", StringUtils.escapeString(dir.toString()));
}
/**
@@ -240,8 +241,10 @@
*/
public void addInputPath(Path dir) {
dir = new Path(getWorkingDirectory(), dir);
+ String dirStr = StringUtils.escapeString(dir.toString());
String dirs = get("mapred.input.dir");
- set("mapred.input.dir", dirs == null ? dir.toString() : dirs + "," + dir);
+ set("mapred.input.dir", dirs == null ? dirStr :
+ dirs + StringUtils.COMMA_STR + dirStr);
}
/**
@@ -251,10 +254,10 @@
*/
public Path[] getInputPaths() {
String dirs = get("mapred.input.dir", "");
- ArrayList list = Collections.list(new StringTokenizer(dirs, ","));
- Path[] result = new Path[list.size()];
- for (int i = 0; i < list.size(); i++) {
- result[i] = new Path((String)list.get(i));
+ String [] list = StringUtils.split(dirs);
+ Path[] result = new Path[list.length];
+ for (int i = 0; i < list.length; i++) {
+ result[i] = new Path(StringUtils.unEscapeString(list[i]));
}
return result;
}
Modified: hadoop/core/trunk/src/java/org/apache/hadoop/util/StringUtils.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/java/org/apache/hadoop/util/StringUtils.java?rev=642053&r1=642052&r2=642053&view=diff
==============================================================================
--- hadoop/core/trunk/src/java/org/apache/hadoop/util/StringUtils.java (original)
+++ hadoop/core/trunk/src/java/org/apache/hadoop/util/StringUtils.java Thu Mar 27 17:52:11 2008
@@ -18,6 +18,7 @@
package org.apache.hadoop.util;
+import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.net.InetAddress;
@@ -276,6 +277,142 @@
return values.toArray(new String[values.size()]);
}
+ final public static char COMMA = ',';
+ final public static String COMMA_STR = ",";
+ final public static char ESCAPE_CHAR = '\\';
+
+ /**
+ * Split a string using the default separator
+ * @param str a string that may have escaped separator
+ * @return an array of strings
+ */
+ public static String[] split(String str) {
+ return split(str, ESCAPE_CHAR, COMMA);
+ }
+
+ /**
+ * Split a string using the given separator
+ * @param str a string that may have escaped separator
+ * @param escapeChar a char that be used to escape the separator
+ * @param separator a separator char
+ * @return an array of strings
+ */
+ public static String[] split(
+ String str, char escapeChar, char separator) {
+ if (str==null) {
+ return null;
+ }
+ ArrayList<String> strList = new ArrayList<String>();
+ StringBuilder split = new StringBuilder();
+ int numPreEscapes = 0;
+ for (int i=0; i<str.length(); i++) {
+ char curChar = str.charAt(i);
+ if (numPreEscapes==0 && curChar == separator) { // separator
+ strList.add(split.toString());
+ split.setLength(0); // clear the split
+ } else {
+ split.append(curChar);
+ numPreEscapes = (curChar == escapeChar)?(++numPreEscapes)%2:0;
+ }
+ }
+ strList.add(split.toString());
+ // remove trailing empty split(s)
+ int last = strList.size(); // last split
+ while (--last>=0 && "".equals(strList.get(last))) {
+ strList.remove(last);
+ }
+ return strList.toArray(new String[strList.size()]);
+ }
+
+ /**
+ * Escape commas in the string using the default escape char
+ * @param str a string
+ * @return an escaped string
+ */
+ public static String escapeString(String str) {
+ return escapeString(str, ESCAPE_CHAR, COMMA);
+ }
+
+ /**
+ * Escape <code>charToEscape</code> in the string
+ * with the escape char <code>escapeChar</code>
+ *
+ * @param str string
+ * @param escapeChar escape char
+ * @param charToEscape the char to be escaped
+ * @return an escaped string
+ */
+ public static String escapeString(
+ String str, char escapeChar, char charToEscape) {
+ if (str == null) {
+ return null;
+ }
+ StringBuilder result = new StringBuilder();
+ for (int i=0; i<str.length(); i++) {
+ char curChar = str.charAt(i);
+ if (curChar == escapeChar || curChar == charToEscape) {
+ // special char
+ result.append(escapeChar);
+ }
+ result.append(curChar);
+ }
+ return result.toString();
+ }
+
+ /**
+ * Unescape commas in the string using the default escape char
+ * @param str a string
+ * @return an unescaped string
+ */
+ public static String unEscapeString(String str) {
+ return unEscapeString(str, ESCAPE_CHAR, COMMA);
+ }
+
+ /**
+ * Unescape <code>charToEscape</code> in the string
+ * with the escape char <code>escapeChar</code>
+ *
+ * @param str string
+ * @param escapeChar escape char
+ * @param charToEscape the escaped char
+ * @return an unescaped string
+ */
+ public static String unEscapeString(
+ String str, char escapeChar, char charToEscape) {
+ if (str == null) {
+ return null;
+ }
+ StringBuilder result = new StringBuilder(str.length());
+ boolean hasPreEscape = false;
+ for (int i=0; i<str.length(); i++) {
+ char curChar = str.charAt(i);
+ if (hasPreEscape) {
+ if (curChar != escapeChar && curChar != charToEscape) {
+ // no special char
+ throw new IllegalArgumentException("Illegal escaped string " + str +
+ " unescaped " + escapeChar + " at " + (i-1));
+ }
+ // otherwise discard the escape char
+ result.append(curChar);
+ hasPreEscape = false;
+ } else {
+ if (curChar == charToEscape) {
+ throw new IllegalArgumentException("Illegal escaped string " + str +
+ " unescaped " + charToEscape + " at " + i);
+ } else if (curChar == escapeChar) {
+ hasPreEscape = true;
+ } else {
+ result.append(curChar);
+ }
+ }
+ }
+ if (hasPreEscape ) {
+ throw new IllegalArgumentException("Illegal escaped string " + str +
+ ", not expecting " + charToEscape + " in the end." );
+ }
+ return result.toString();
+ }
+
/**
* Return hostname without throwing exception.
* @return hostname
Modified: hadoop/core/trunk/src/test/org/apache/hadoop/conf/TestJobConf.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/org/apache/hadoop/conf/TestJobConf.java?rev=642053&r1=642052&r2=642053&view=diff
==============================================================================
--- hadoop/core/trunk/src/test/org/apache/hadoop/conf/TestJobConf.java (original)
+++ hadoop/core/trunk/src/test/org/apache/hadoop/conf/TestJobConf.java Thu Mar 27 17:52:11 2008
@@ -19,7 +19,10 @@
import junit.framework.Assert;
import junit.framework.TestCase;
+
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.util.StringUtils;
public class TestJobConf extends TestCase {
@@ -49,5 +52,39 @@
Assert.assertEquals("test", configuration.getProfileParams());
}
-
+ public void testInputPath() throws Exception {
+ JobConf jobConf = new JobConf();
+ Path path = new Path(jobConf.getWorkingDirectory(),
+ "xx{y"+StringUtils.COMMA_STR+"z}");
+ jobConf.setInputPath(path);
+ Path[] paths = jobConf.getInputPaths();
+ assertEquals(1, paths.length);
+ assertEquals(path.toString(), paths[0].toString());
+
+ StringBuilder pathStr = new StringBuilder();
+ pathStr.append(StringUtils.ESCAPE_CHAR);
+ pathStr.append(StringUtils.ESCAPE_CHAR);
+ pathStr.append(StringUtils.COMMA);
+ pathStr.append(StringUtils.COMMA);
+ pathStr.append('a');
+ path = new Path(jobConf.getWorkingDirectory(), pathStr.toString());
+ jobConf.setInputPath(path);
+ paths = jobConf.getInputPaths();
+ assertEquals(1, paths.length);
+ assertEquals(path.toString(), paths[0].toString());
+
+ pathStr.setLength(0);
+ pathStr.append(StringUtils.ESCAPE_CHAR);
+ pathStr.append("xx");
+ pathStr.append(StringUtils.ESCAPE_CHAR);
+ path = new Path(jobConf.getWorkingDirectory(), pathStr.toString());
+ Path path1 = new Path(jobConf.getWorkingDirectory(),
+ "yy"+StringUtils.COMMA_STR+"zz");
+ jobConf.setInputPath(path);
+ jobConf.addInputPath(path1);
+ paths = jobConf.getInputPaths();
+ assertEquals(2, paths.length);
+ assertEquals(path.toString(), paths[0].toString());
+ assertEquals(path1.toString(), paths[1].toString());
+ }
}
Added: hadoop/core/trunk/src/test/org/apache/hadoop/util/TestStringUtils.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/org/apache/hadoop/util/TestStringUtils.java?rev=642053&view=auto
==============================================================================
--- hadoop/core/trunk/src/test/org/apache/hadoop/util/TestStringUtils.java (added)
+++ hadoop/core/trunk/src/test/org/apache/hadoop/util/TestStringUtils.java Thu Mar 27 17:52:11 2008
@@ -0,0 +1,107 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util;
+
+import junit.framework.TestCase;
+
+public class TestStringUtils extends TestCase {
+ final private static String NULL_STR = null;
+ final private static String EMPTY_STR = "";
+ final private static String STR_WO_SPECIAL_CHARS = "AB";
+ final private static String STR_WITH_COMMA = "A,B";
+ final private static String ESCAPED_STR_WITH_COMMA = "A\\,B";
+ final private static String STR_WITH_ESCAPE = "AB\\";
+ final private static String ESCAPED_STR_WITH_ESCAPE = "AB\\\\";
+ final private static String STR_WITH_BOTH2 = ",A\\,,B\\\\,";
+ final private static String ESCAPED_STR_WITH_BOTH2 =
+ "\\,A\\\\\\,\\,B\\\\\\\\\\,";
+
+ public void testEscapeString() throws Exception {
+ assertEquals(NULL_STR, StringUtils.escapeString(NULL_STR));
+ assertEquals(EMPTY_STR, StringUtils.escapeString(EMPTY_STR));
+ assertEquals(STR_WO_SPECIAL_CHARS,
+ StringUtils.escapeString(STR_WO_SPECIAL_CHARS));
+ assertEquals(ESCAPED_STR_WITH_COMMA,
+ StringUtils.escapeString(STR_WITH_COMMA));
+ assertEquals(ESCAPED_STR_WITH_ESCAPE,
+ StringUtils.escapeString(STR_WITH_ESCAPE));
+ assertEquals(ESCAPED_STR_WITH_BOTH2,
+ StringUtils.escapeString(STR_WITH_BOTH2));
+ }
+
+ public void testSplit() throws Exception {
+ assertEquals(NULL_STR, StringUtils.split(NULL_STR));
+ String[] splits = StringUtils.split(EMPTY_STR);
+ assertEquals(0, splits.length);
+ splits = StringUtils.split(",,");
+ assertEquals(0, splits.length);
+ splits = StringUtils.split(STR_WO_SPECIAL_CHARS);
+ assertEquals(1, splits.length);
+ assertEquals(STR_WO_SPECIAL_CHARS, splits[0]);
+ splits = StringUtils.split(STR_WITH_COMMA);
+ assertEquals(2, splits.length);
+ assertEquals("A", splits[0]);
+ assertEquals("B", splits[1]);
+ splits = StringUtils.split(ESCAPED_STR_WITH_COMMA);
+ assertEquals(1, splits.length);
+ assertEquals(ESCAPED_STR_WITH_COMMA, splits[0]);
+ splits = StringUtils.split(STR_WITH_ESCAPE);
+ assertEquals(1, splits.length);
+ assertEquals(STR_WITH_ESCAPE, splits[0]);
+ splits = StringUtils.split(STR_WITH_BOTH2);
+ assertEquals(3, splits.length);
+ assertEquals(EMPTY_STR, splits[0]);
+ assertEquals("A\\,", splits[1]);
+ assertEquals("B\\\\", splits[2]);
+ splits = StringUtils.split(ESCAPED_STR_WITH_BOTH2);
+ assertEquals(1, splits.length);
+ assertEquals(ESCAPED_STR_WITH_BOTH2, splits[0]);
+ }
+
+ public void testUnescapeString() throws Exception {
+ assertEquals(NULL_STR, StringUtils.unEscapeString(NULL_STR));
+ assertEquals(EMPTY_STR, StringUtils.unEscapeString(EMPTY_STR));
+ assertEquals(STR_WO_SPECIAL_CHARS,
+ StringUtils.unEscapeString(STR_WO_SPECIAL_CHARS));
+ try {
+ StringUtils.unEscapeString(STR_WITH_COMMA);
+ fail("Should throw IllegalArgumentException");
+ } catch (IllegalArgumentException e) {
+ // expected
+ }
+ assertEquals(STR_WITH_COMMA,
+ StringUtils.unEscapeString(ESCAPED_STR_WITH_COMMA));
+ try {
+ StringUtils.unEscapeString(STR_WITH_ESCAPE);
+ fail("Should throw IllegalArgumentException");
+ } catch (IllegalArgumentException e) {
+ // expected
+ }
+ assertEquals(STR_WITH_ESCAPE,
+ StringUtils.unEscapeString(ESCAPED_STR_WITH_ESCAPE));
+ try {
+ StringUtils.unEscapeString(STR_WITH_BOTH2);
+ fail("Should throw IllegalArgumentException");
+ } catch (IllegalArgumentException e) {
+ // expected
+ }
+ assertEquals(STR_WITH_BOTH2,
+ StringUtils.unEscapeString(ESCAPED_STR_WITH_BOTH2));
+ }
+}
\ No newline at end of file