You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by to...@apache.org on 2010/05/05 23:46:32 UTC
svn commit: r941508 - in /hadoop/common/trunk: CHANGES.txt
src/java/org/apache/hadoop/util/StringUtils.java
src/test/core/org/apache/hadoop/util/TestStringUtils.java
Author: tomwhite
Date: Wed May 5 21:46:31 2010
New Revision: 941508
URL: http://svn.apache.org/viewvc?rev=941508&view=rev
Log:
HADOOP-6623. Add StringUtils.split for non-escaped single-character separator. Contributed by Todd Lipcon.
Modified:
hadoop/common/trunk/CHANGES.txt
hadoop/common/trunk/src/java/org/apache/hadoop/util/StringUtils.java
hadoop/common/trunk/src/test/core/org/apache/hadoop/util/TestStringUtils.java
Modified: hadoop/common/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/CHANGES.txt?rev=941508&r1=941507&r2=941508&view=diff
==============================================================================
--- hadoop/common/trunk/CHANGES.txt (original)
+++ hadoop/common/trunk/CHANGES.txt Wed May 5 21:46:31 2010
@@ -2,6 +2,11 @@ Hadoop Change Log
Trunk (unreleased changes)
+ IMPROVEMENTS
+
+ HADOOP-6623. Add StringUtils.split for non-escaped single-character
+ separator. (Todd Lipcon via tomwhite)
+
BUG FIXES
HADOOP-6730. Bug in FileContext#copy and provide base class for FileContext
Modified: hadoop/common/trunk/src/java/org/apache/hadoop/util/StringUtils.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/src/java/org/apache/hadoop/util/StringUtils.java?rev=941508&r1=941507&r2=941508&view=diff
==============================================================================
--- hadoop/common/trunk/src/java/org/apache/hadoop/util/StringUtils.java (original)
+++ hadoop/common/trunk/src/java/org/apache/hadoop/util/StringUtils.java Wed May 5 21:46:31 2010
@@ -383,6 +383,35 @@ public class StringUtils {
}
return strList.toArray(new String[strList.size()]);
}
+
+ /**
+ * Split a string using the given separator, with no escaping performed.
+ * @param str a string to be split. Note that this may not be null.
+ * @param separator a separator char
+ * @return an array of strings
+ */
+ public static String[] split(
+ String str, char separator) {
+ // String.split returns a single empty result for splitting the empty
+ // string.
+ if ("".equals(str)) {
+ return new String[]{""};
+ }
+ ArrayList<String> strList = new ArrayList<String>();
+ int startIndex = 0;
+ int nextIndex = 0;
+ while ((nextIndex = str.indexOf((int)separator, startIndex)) != -1) {
+ strList.add(str.substring(startIndex, nextIndex));
+ startIndex = nextIndex + 1;
+ }
+ strList.add(str.substring(startIndex));
+ // remove trailing empty split(s)
+ int last = strList.size(); // last split
+ while (--last>=0 && "".equals(strList.get(last))) {
+ strList.remove(last);
+ }
+ return strList.toArray(new String[strList.size()]);
+ }
/**
* Finds the first occurrence of the separator character ignoring the escaped
Modified: hadoop/common/trunk/src/test/core/org/apache/hadoop/util/TestStringUtils.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/src/test/core/org/apache/hadoop/util/TestStringUtils.java?rev=941508&r1=941507&r2=941508&view=diff
==============================================================================
--- hadoop/common/trunk/src/test/core/org/apache/hadoop/util/TestStringUtils.java (original)
+++ hadoop/common/trunk/src/test/core/org/apache/hadoop/util/TestStringUtils.java Wed May 5 21:46:31 2010
@@ -78,6 +78,21 @@ public class TestStringUtils extends Tes
assertEquals(ESCAPED_STR_WITH_BOTH2, splits[0]);
}
+ public void testSimpleSplit() throws Exception {
+ final String[] TO_TEST = {
+ "a/b/c",
+ "a/b/c////",
+ "///a/b/c",
+ "",
+ "/",
+ "////"};
+ for (String testSubject : TO_TEST) {
+ assertArrayEquals("Testing '" + testSubject + "'",
+ testSubject.split("/"),
+ StringUtils.split(testSubject, '/'));
+ }
+ }
+
public void testUnescapeString() throws Exception {
assertEquals(NULL_STR, StringUtils.unEscapeString(NULL_STR));
assertEquals(EMPTY_STR, StringUtils.unEscapeString(EMPTY_STR));
@@ -188,4 +203,32 @@ public class TestStringUtils extends Tes
assertEquals("Yy", StringUtils.camelize("yY"));
assertEquals("Zz", StringUtils.camelize("zZ"));
}
+
+ // Benchmark for StringUtils split
+ public static void main(String []args) {
+ final String TO_SPLIT = "foo,bar,baz,blah,blah";
+ for (boolean useOurs : new boolean[] { false, true }) {
+ for (int outer=0; outer < 10; outer++) {
+ long st = System.nanoTime();
+ int components = 0;
+ for (int inner=0; inner < 1000000; inner++) {
+ String[] res;
+ if (useOurs) {
+ res = StringUtils.split(TO_SPLIT, ',');
+ } else {
+ res = TO_SPLIT.split(",");
+ }
+ // be sure to use res, otherwise might be optimized out
+ components += res.length;
+ }
+ long et = System.nanoTime();
+ if (outer > 3) {
+ System.out.println(
+ (useOurs ? "StringUtils impl" : "Java impl") +
+ " #" + outer + ":" +
+ (et - st)/1000000 + "ms");
+ }
+ }
+ }
+ }
}