You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by jc...@apache.org on 2010/08/06 03:08:58 UTC
svn commit: r982844 - in /commons/proper/lang/trunk/src:
main/java/org/apache/commons/lang3/StringUtils.java
test/java/org/apache/commons/lang3/StringUtilsTest.java
Author: jcarman
Date: Fri Aug 6 01:08:57 2010
New Revision: 982844
URL: http://svn.apache.org/viewvc?rev=982844&view=rev
Log:
LANG-640: Add normalizeSpace to StringUtils
Modified:
commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/StringUtils.java
commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/StringUtilsTest.java
Modified: commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/StringUtils.java
URL: http://svn.apache.org/viewvc/commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/StringUtils.java?rev=982844&r1=982843&r2=982844&view=diff
==============================================================================
--- commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/StringUtils.java (original)
+++ commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/StringUtils.java Fri Aug 6 01:08:57 2010
@@ -20,6 +20,7 @@ import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
+import java.util.regex.Pattern;
/**
* <p>Operations on {@link java.lang.String} that are
@@ -157,6 +158,11 @@ public class StringUtils {
private static final int PAD_LIMIT = 8192;
/**
+ * A regex pattern for recognizing blocks of whitespace characters.
+ */
+ private static final Pattern WHITESPACE_BLOCK = Pattern.compile("\\s+");
+
+ /**
* <p><code>StringUtils</code> instances should NOT be constructed in
* standard programming. Instead, the class should be used as
* <code>StringUtils.trim(" foo ");</code>.</p>
@@ -6257,4 +6263,52 @@ public class StringUtils {
int strOffset = str.length() - suffix.length();
return str.regionMatches(ignoreCase, strOffset, suffix, 0, suffix.length());
}
+
+ /**
+ * <p>
+ * Similar to <a
+ * href="http://www.w3.org/TR/xpath/#function-normalize-space">http://www.w3.org/TR/xpath/#function-normalize
+ * -space</a>
+ * </p>
+ * <p>
+ * The function returns the argument string with whitespace normalized by using
+ * <code>{@link #trim(String)}</code> to remove leading and trailing whitespace
+ * and then replacing sequences of whitespace characters by a single space.
+ * </p>
+ * In XML Whitespace characters are the same as those allowed by the <a
+ * href="http://www.w3.org/TR/REC-xml/#NT-S">S</a> production, which is S ::= (#x20 | #x9 | #xD | #xA)+
+ * <p>
+ * Java's regexp pattern \s defines whitespace as [ \t\n\x0B\f\r]
+ * <p>
+ * For reference:
+ * <ul>
+ * <li>\x0B = vertical tab</li>
+ * <li>\f = #xC = form feed</li>
+ * <li>#x20 = space</li>
+ * <li>#x9 = \t</li>
+ * <li>#xA = \n</li>
+ * <li>#xD = \r</li>
+ * </ul>
+ * </p>
+ * <p>
+ * The difference is that Java's whitespace includes vertical tab and form feed, which this functional will also
+ * normalize. Additonally <code>{@link #trim(String)}</code> removes control characters (char <= 32) from both
+ * ends of this String.
+ * </p>
+ *
+ * @see Pattern
+ * @see #trim(String)
+ * @see <a
+ * href="http://www.w3.org/TR/xpath/#function-normalize-space">http://www.w3.org/TR/xpath/#function-normalize-space</a>
+ * @param str the source String to normalize whitespaces from, may be null
+ * @return the modified string with whitespace normalized, <code>null</code> if null String input
+ *
+ * @since 3.0
+ */
+ public static String normalizeSpace(String str) {
+ if(str == null) {
+ return null;
+ }
+ return WHITESPACE_BLOCK.matcher(trim(str)).replaceAll(" ");
+ }
}
Modified: commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/StringUtilsTest.java
URL: http://svn.apache.org/viewvc/commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/StringUtilsTest.java?rev=982844&r1=982843&r2=982844&view=diff
==============================================================================
--- commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/StringUtilsTest.java (original)
+++ commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/StringUtilsTest.java Fri Aug 6 01:08:57 2010
@@ -1855,4 +1855,17 @@ public class StringUtilsTest extends Tes
assertFalse(StringUtils.startsWithAny("abcxyz", new String[] {null, "xyz", "abcd"}));
}
+ public void testNormalizeSpace() {
+ assertEquals(null, StringUtils.normalizeSpace(null));
+ assertEquals("", StringUtils.normalizeSpace(""));
+ assertEquals("", StringUtils.normalizeSpace(" "));
+ assertEquals("", StringUtils.normalizeSpace("\t"));
+ assertEquals("", StringUtils.normalizeSpace("\n"));
+ assertEquals("", StringUtils.normalizeSpace("\u000B"));
+ assertEquals("", StringUtils.normalizeSpace("\f"));
+ assertEquals("", StringUtils.normalizeSpace("\r"));
+ assertEquals("a", StringUtils.normalizeSpace(" a "));
+ assertEquals("a b c", StringUtils.normalizeSpace(" a b c "));
+ assertEquals("a b c", StringUtils.normalizeSpace("a\t\f\r b\u000B c\n"));
+ }
}