You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by jc...@apache.org on 2010/08/06 03:08:58 UTC

svn commit: r982844 - in /commons/proper/lang/trunk/src: main/java/org/apache/commons/lang3/StringUtils.java test/java/org/apache/commons/lang3/StringUtilsTest.java

Author: jcarman
Date: Fri Aug  6 01:08:57 2010
New Revision: 982844

URL: http://svn.apache.org/viewvc?rev=982844&view=rev
Log:
LANG-640: Add normalizeSpace to StringUtils

Modified:
    commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/StringUtils.java
    commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/StringUtilsTest.java

Modified: commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/StringUtils.java
URL: http://svn.apache.org/viewvc/commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/StringUtils.java?rev=982844&r1=982843&r2=982844&view=diff
==============================================================================
--- commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/StringUtils.java (original)
+++ commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/StringUtils.java Fri Aug  6 01:08:57 2010
@@ -20,6 +20,7 @@ import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Locale;
+import java.util.regex.Pattern;
 
 /**
  * <p>Operations on {@link java.lang.String} that are
@@ -157,6 +158,11 @@ public class StringUtils {
     private static final int PAD_LIMIT = 8192;
 
     /**
+     * A regex pattern for recognizing blocks of whitespace characters.
+     */
+    private static final Pattern WHITESPACE_BLOCK = Pattern.compile("\\s+");
+    
+    /**
      * <p><code>StringUtils</code> instances should NOT be constructed in
      * standard programming. Instead, the class should be used as
      * <code>StringUtils.trim(" foo ");</code>.</p>
@@ -6257,4 +6263,52 @@ public class StringUtils {
         int strOffset = str.length() - suffix.length();
         return str.regionMatches(ignoreCase, strOffset, suffix, 0, suffix.length());
     }
+
+    /**
+     * <p>
+     * Similar to <a
+     * href="http://www.w3.org/TR/xpath/#function-normalize-space">http://www.w3.org/TR/xpath/#function-normalize
+     * -space</a>
+     * </p>
+     * <p>
+     * The function returns the argument string with whitespace normalized by using
+     * <code>{@link #trim(String)}</code> to remove leading and trailing whitespace
+     * and then replacing sequences of whitespace characters by a single space.
+     * </p>
+     * In XML Whitespace characters are the same as those allowed by the <a
+     * href="http://www.w3.org/TR/REC-xml/#NT-S">S</a> production, which is S ::= (#x20 | #x9 | #xD | #xA)+
+     * <p>
+     * Java's regexp pattern \s defines whitespace as [ \t\n\x0B\f\r]
+     * <p>
+     * For reference:
+     * <ul>
+     * <li>\x0B = vertical tab</li>
+     * <li>\f = #xC = form feed</li>
+     * <li>#x20 = space</li>
+     * <li>#x9 = \t</li>
+     * <li>#xA = \n</li>
+     * <li>#xD = \r</li>
+     * </ul>
+     * </p>
+     * <p>
+     * The difference is that Java's whitespace includes vertical tab and form feed, which this functional will also
+     * normalize. Additonally <code>{@link #trim(String)}</code> removes control characters (char &lt;= 32) from both
+     * ends of this String.
+     * </p>
+     * 
+     * @see Pattern
+     * @see #trim(String)
+     * @see <a
+     *      href="http://www.w3.org/TR/xpath/#function-normalize-space">http://www.w3.org/TR/xpath/#function-normalize-space</a>
+     * @param str the source String to normalize whitespaces from, may be null
+     * @return the modified string with whitespace normalized, <code>null</code> if null String input
+     * 
+     * @since 3.0
+     */
+    public static String normalizeSpace(String str) {
+        if(str == null) {
+            return null;
+        }
+        return WHITESPACE_BLOCK.matcher(trim(str)).replaceAll(" ");         
+    }
 }

Modified: commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/StringUtilsTest.java
URL: http://svn.apache.org/viewvc/commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/StringUtilsTest.java?rev=982844&r1=982843&r2=982844&view=diff
==============================================================================
--- commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/StringUtilsTest.java (original)
+++ commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/StringUtilsTest.java Fri Aug  6 01:08:57 2010
@@ -1855,4 +1855,17 @@ public class StringUtilsTest extends Tes
         assertFalse(StringUtils.startsWithAny("abcxyz", new String[] {null, "xyz", "abcd"}));
     }
  
+    public void testNormalizeSpace() {
+        assertEquals(null, StringUtils.normalizeSpace(null));
+        assertEquals("", StringUtils.normalizeSpace(""));
+        assertEquals("", StringUtils.normalizeSpace(" "));
+        assertEquals("", StringUtils.normalizeSpace("\t"));
+        assertEquals("", StringUtils.normalizeSpace("\n"));        
+        assertEquals("", StringUtils.normalizeSpace("\u000B"));
+        assertEquals("", StringUtils.normalizeSpace("\f"));
+        assertEquals("", StringUtils.normalizeSpace("\r"));
+        assertEquals("a", StringUtils.normalizeSpace("  a  "));
+        assertEquals("a b c", StringUtils.normalizeSpace("  a  b   c  "));
+        assertEquals("a b c", StringUtils.normalizeSpace("a\t\f\r  b\u000B   c\n"));
+    }
 }