You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by to...@apache.org on 2010/05/05 23:46:32 UTC

svn commit: r941508 - in /hadoop/common/trunk: CHANGES.txt src/java/org/apache/hadoop/util/StringUtils.java src/test/core/org/apache/hadoop/util/TestStringUtils.java

Author: tomwhite
Date: Wed May  5 21:46:31 2010
New Revision: 941508

URL: http://svn.apache.org/viewvc?rev=941508&view=rev
Log:
HADOOP-6623. Add StringUtils.split for non-escaped single-character separator. Contributed by Todd Lipcon.

Modified:
    hadoop/common/trunk/CHANGES.txt
    hadoop/common/trunk/src/java/org/apache/hadoop/util/StringUtils.java
    hadoop/common/trunk/src/test/core/org/apache/hadoop/util/TestStringUtils.java

Modified: hadoop/common/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/CHANGES.txt?rev=941508&r1=941507&r2=941508&view=diff
==============================================================================
--- hadoop/common/trunk/CHANGES.txt (original)
+++ hadoop/common/trunk/CHANGES.txt Wed May  5 21:46:31 2010
@@ -2,6 +2,11 @@ Hadoop Change Log
 
 Trunk (unreleased changes)
 
+  IMPROVEMENTS
+
+    HADOOP-6623. Add StringUtils.split for non-escaped single-character
+    separator. (Todd Lipcon via tomwhite)
+
   BUG FIXES
 
     HADOOP-6730. Bug in FileContext#copy and provide base class for FileContext 

Modified: hadoop/common/trunk/src/java/org/apache/hadoop/util/StringUtils.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/src/java/org/apache/hadoop/util/StringUtils.java?rev=941508&r1=941507&r2=941508&view=diff
==============================================================================
--- hadoop/common/trunk/src/java/org/apache/hadoop/util/StringUtils.java (original)
+++ hadoop/common/trunk/src/java/org/apache/hadoop/util/StringUtils.java Wed May  5 21:46:31 2010
@@ -383,6 +383,35 @@ public class StringUtils {
     }
     return strList.toArray(new String[strList.size()]);
   }
+
+  /**
+   * Split a string using the given separator, with no escaping performed.
+   * @param str a string to be split. Note that this may not be null.
+   * @param separator a separator char
+   * @return an array of strings
+   */
+  public static String[] split(
+      String str, char separator) {
+    // String.split returns a single empty result for splitting the empty
+    // string.
+    if ("".equals(str)) {
+      return new String[]{""};
+    }
+    ArrayList<String> strList = new ArrayList<String>();
+    int startIndex = 0;
+    int nextIndex = 0;
+    while ((nextIndex = str.indexOf((int)separator, startIndex)) != -1) {
+      strList.add(str.substring(startIndex, nextIndex));
+      startIndex = nextIndex + 1;
+    }
+    strList.add(str.substring(startIndex));
+    // remove trailing empty split(s)
+    int last = strList.size(); // last split
+    while (--last>=0 && "".equals(strList.get(last))) {
+      strList.remove(last);
+    }
+    return strList.toArray(new String[strList.size()]);
+  }
   
   /**
    * Finds the first occurrence of the separator character ignoring the escaped

Modified: hadoop/common/trunk/src/test/core/org/apache/hadoop/util/TestStringUtils.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/src/test/core/org/apache/hadoop/util/TestStringUtils.java?rev=941508&r1=941507&r2=941508&view=diff
==============================================================================
--- hadoop/common/trunk/src/test/core/org/apache/hadoop/util/TestStringUtils.java (original)
+++ hadoop/common/trunk/src/test/core/org/apache/hadoop/util/TestStringUtils.java Wed May  5 21:46:31 2010
@@ -78,6 +78,21 @@ public class TestStringUtils extends Tes
     assertEquals(ESCAPED_STR_WITH_BOTH2, splits[0]);    
   }
   
+  public void testSimpleSplit() throws Exception {
+    final String[] TO_TEST = {
+        "a/b/c",
+        "a/b/c////",
+        "///a/b/c",
+        "",
+        "/",
+        "////"};
+    for (String testSubject : TO_TEST) {
+      assertArrayEquals("Testing '" + testSubject + "'",
+        testSubject.split("/"),
+        StringUtils.split(testSubject, '/'));
+    }
+  }
+
   public void testUnescapeString() throws Exception {
     assertEquals(NULL_STR, StringUtils.unEscapeString(NULL_STR));
     assertEquals(EMPTY_STR, StringUtils.unEscapeString(EMPTY_STR));
@@ -188,4 +203,32 @@ public class TestStringUtils extends Tes
     assertEquals("Yy", StringUtils.camelize("yY"));
     assertEquals("Zz", StringUtils.camelize("zZ"));
   }
+
+  // Benchmark for StringUtils split
+  public static void main(String []args) {
+    final String TO_SPLIT = "foo,bar,baz,blah,blah";
+    for (boolean useOurs : new boolean[] { false, true }) {
+      for (int outer=0; outer < 10; outer++) {
+        long st = System.nanoTime();
+        int components = 0;
+        for (int inner=0; inner < 1000000; inner++) {
+          String[] res;
+          if (useOurs) {
+            res = StringUtils.split(TO_SPLIT, ',');
+          } else {
+            res = TO_SPLIT.split(",");
+          }
+           // be sure to use res, otherwise might be optimized out
+          components += res.length;
+        }
+        long et = System.nanoTime();
+        if (outer > 3) {
+          System.out.println(
+            (useOurs ? "StringUtils impl" : "Java impl") +
+            " #" + outer + ":" +
+            (et - st)/1000000 + "ms");
+        }
+      }
+    }
+  }
 }