You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ni...@apache.org on 2011/01/09 02:41:57 UTC

svn commit: r1056862 - in /commons/proper/lang/branches/LANG_2_X/src: main/java/org/apache/commons/lang/StringUtils.java test/java/org/apache/commons/lang/StringUtilsTest.java

Author: niallp
Date: Sun Jan  9 01:41:56 2011
New Revision: 1056862

URL: http://svn.apache.org/viewvc?rev=1056862&view=rev
Log:
Port LANG-640 to LANG 2.x Branch - add normalizeSpace() to StringUtils

Modified:
    commons/proper/lang/branches/LANG_2_X/src/main/java/org/apache/commons/lang/StringUtils.java
    commons/proper/lang/branches/LANG_2_X/src/test/java/org/apache/commons/lang/StringUtilsTest.java

Modified: commons/proper/lang/branches/LANG_2_X/src/main/java/org/apache/commons/lang/StringUtils.java
URL: http://svn.apache.org/viewvc/commons/proper/lang/branches/LANG_2_X/src/main/java/org/apache/commons/lang/StringUtils.java?rev=1056862&r1=1056861&r2=1056862&view=diff
==============================================================================
--- commons/proper/lang/branches/LANG_2_X/src/main/java/org/apache/commons/lang/StringUtils.java (original)
+++ commons/proper/lang/branches/LANG_2_X/src/main/java/org/apache/commons/lang/StringUtils.java Sun Jan  9 01:41:56 2011
@@ -6479,4 +6479,54 @@ public class StringUtils {
         int strOffset = str.length() - suffix.length();
         return str.regionMatches(ignoreCase, strOffset, suffix, 0, suffix.length());
     }
+
+    /**
+     * <p>
+     * Similar to <a
+     * href="http://www.w3.org/TR/xpath/#function-normalize-space">http://www.w3.org/TR/xpath/#function-normalize
+     * -space</a>
+     * </p>
+     * <p>
+     * The function returns the argument string with whitespace normalized by using
+     * <code>{@link #trim(String)}</code> to remove leading and trailing whitespace
+     * and then replacing sequences of whitespace characters by a single space.
+     * </p>
+     * In XML Whitespace characters are the same as those allowed by the <a
+     * href="http://www.w3.org/TR/REC-xml/#NT-S">S</a> production, which is S ::= (#x20 | #x9 | #xD | #xA)+
+     * <p>
+     * See Java's {@link Character#isWhitespace(char)} for which characters are considered whitespace.
+     * <p>
+     * The difference is that Java's whitespace includes vertical tab and form feed, which this functional will also
+     * normalize. Additonally <code>{@link #trim(String)}</code> removes control characters (char &lt;= 32) from both
+     * ends of this String.
+     * </p>
+     *
+     * @see Character#isWhitespace(char)
+     * @see #trim(String)
+     * @see <ahref="http://www.w3.org/TR/xpath/#function-normalize-space">
+     *              http://www.w3.org/TR/xpath/#function-normalize-space</a>
+     * @param str the source String to normalize whitespaces from, may be null
+     * @return the modified string with whitespace normalized, <code>null</code> if null String input
+     * 
+     * @since 2.6
+     */
+    public static String normalizeSpace(String str) {
+        if(str == null) {
+            return null;
+        }
+        str = trim(str);
+        StringBuffer b = new StringBuffer(str.length());
+        for (int i = 0; i < str.length(); i++) {
+            char c = str.charAt(i);
+            if (Character.isWhitespace(c)) {
+                if (i > 0 && !Character.isWhitespace(str.charAt(i - 1))) {
+                    b.append(' ');
+                }
+            } else {
+                b.append(c);
+            }
+        }
+        return b.toString();
+    }
+
 }

Modified: commons/proper/lang/branches/LANG_2_X/src/test/java/org/apache/commons/lang/StringUtilsTest.java
URL: http://svn.apache.org/viewvc/commons/proper/lang/branches/LANG_2_X/src/test/java/org/apache/commons/lang/StringUtilsTest.java?rev=1056862&r1=1056861&r2=1056862&view=diff
==============================================================================
--- commons/proper/lang/branches/LANG_2_X/src/test/java/org/apache/commons/lang/StringUtilsTest.java (original)
+++ commons/proper/lang/branches/LANG_2_X/src/test/java/org/apache/commons/lang/StringUtilsTest.java Sun Jan  9 01:41:56 2011
@@ -1910,6 +1910,26 @@ public class StringUtilsTest extends Tes
         assertFalse(StringUtils.startsWithAny("abcxyz", new String[] {null, "xyz", "abcd"}));
     }
  
+    public void testNormalizeSpace() {
+        assertEquals(null, StringUtils.normalizeSpace(null));
+        assertEquals("", StringUtils.normalizeSpace(""));
+        assertEquals("", StringUtils.normalizeSpace(" "));
+        assertEquals("", StringUtils.normalizeSpace("\t"));
+        assertEquals("", StringUtils.normalizeSpace("\n"));
+        assertEquals("", StringUtils.normalizeSpace("\u0009"));
+        assertEquals("", StringUtils.normalizeSpace("\u000B"));
+        assertEquals("", StringUtils.normalizeSpace("\u000C"));
+        assertEquals("", StringUtils.normalizeSpace("\u001C"));
+        assertEquals("", StringUtils.normalizeSpace("\u001D"));
+        assertEquals("", StringUtils.normalizeSpace("\u001E"));
+        assertEquals("", StringUtils.normalizeSpace("\u001F"));
+        assertEquals("", StringUtils.normalizeSpace("\f"));
+        assertEquals("", StringUtils.normalizeSpace("\r"));
+        assertEquals("a", StringUtils.normalizeSpace("  a  "));
+        assertEquals("a b c", StringUtils.normalizeSpace("  a  b   c  "));
+        assertEquals("a b c", StringUtils.normalizeSpace("a\t\f\r  b\u000B   c\n"));
+    }
+
     public void testLANG666() {
         assertEquals("12",StringUtils.stripEnd("120.00", ".0"));
         assertEquals("121",StringUtils.stripEnd("121.00", ".0"));