You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ni...@apache.org on 2011/01/09 02:41:57 UTC
svn commit: r1056862 - in /commons/proper/lang/branches/LANG_2_X/src:
main/java/org/apache/commons/lang/StringUtils.java
test/java/org/apache/commons/lang/StringUtilsTest.java
Author: niallp
Date: Sun Jan 9 01:41:56 2011
New Revision: 1056862
URL: http://svn.apache.org/viewvc?rev=1056862&view=rev
Log:
Port LANG-640 to LANG 2.x Branch - add normalizeSpace() to StringUtils
Modified:
commons/proper/lang/branches/LANG_2_X/src/main/java/org/apache/commons/lang/StringUtils.java
commons/proper/lang/branches/LANG_2_X/src/test/java/org/apache/commons/lang/StringUtilsTest.java
Modified: commons/proper/lang/branches/LANG_2_X/src/main/java/org/apache/commons/lang/StringUtils.java
URL: http://svn.apache.org/viewvc/commons/proper/lang/branches/LANG_2_X/src/main/java/org/apache/commons/lang/StringUtils.java?rev=1056862&r1=1056861&r2=1056862&view=diff
==============================================================================
--- commons/proper/lang/branches/LANG_2_X/src/main/java/org/apache/commons/lang/StringUtils.java (original)
+++ commons/proper/lang/branches/LANG_2_X/src/main/java/org/apache/commons/lang/StringUtils.java Sun Jan 9 01:41:56 2011
@@ -6479,4 +6479,54 @@ public class StringUtils {
int strOffset = str.length() - suffix.length();
return str.regionMatches(ignoreCase, strOffset, suffix, 0, suffix.length());
}
+
+ /**
+ * <p>
+ * Similar to <a
+ * href="http://www.w3.org/TR/xpath/#function-normalize-space">http://www.w3.org/TR/xpath/#function-normalize
+ * -space</a>
+ * </p>
+ * <p>
+ * The function returns the argument string with whitespace normalized by using
+ * <code>{@link #trim(String)}</code> to remove leading and trailing whitespace
+ * and then replacing sequences of whitespace characters by a single space.
+ * </p>
+ * In XML Whitespace characters are the same as those allowed by the <a
+ * href="http://www.w3.org/TR/REC-xml/#NT-S">S</a> production, which is S ::= (#x20 | #x9 | #xD | #xA)+
+ * <p>
+ * See Java's {@link Character#isWhitespace(char)} for which characters are considered whitespace.
+ * <p>
+ * The difference is that Java's whitespace includes vertical tab and form feed, which this functional will also
+ * normalize. Additonally <code>{@link #trim(String)}</code> removes control characters (char <= 32) from both
+ * ends of this String.
+ * </p>
+ *
+ * @see Character#isWhitespace(char)
+ * @see #trim(String)
+ * @see <ahref="http://www.w3.org/TR/xpath/#function-normalize-space">
+ * http://www.w3.org/TR/xpath/#function-normalize-space</a>
+ * @param str the source String to normalize whitespaces from, may be null
+ * @return the modified string with whitespace normalized, <code>null</code> if null String input
+ *
+ * @since 2.6
+ */
+ public static String normalizeSpace(String str) {
+ if(str == null) {
+ return null;
+ }
+ str = trim(str);
+ StringBuffer b = new StringBuffer(str.length());
+ for (int i = 0; i < str.length(); i++) {
+ char c = str.charAt(i);
+ if (Character.isWhitespace(c)) {
+ if (i > 0 && !Character.isWhitespace(str.charAt(i - 1))) {
+ b.append(' ');
+ }
+ } else {
+ b.append(c);
+ }
+ }
+ return b.toString();
+ }
+
}
Modified: commons/proper/lang/branches/LANG_2_X/src/test/java/org/apache/commons/lang/StringUtilsTest.java
URL: http://svn.apache.org/viewvc/commons/proper/lang/branches/LANG_2_X/src/test/java/org/apache/commons/lang/StringUtilsTest.java?rev=1056862&r1=1056861&r2=1056862&view=diff
==============================================================================
--- commons/proper/lang/branches/LANG_2_X/src/test/java/org/apache/commons/lang/StringUtilsTest.java (original)
+++ commons/proper/lang/branches/LANG_2_X/src/test/java/org/apache/commons/lang/StringUtilsTest.java Sun Jan 9 01:41:56 2011
@@ -1910,6 +1910,26 @@ public class StringUtilsTest extends Tes
assertFalse(StringUtils.startsWithAny("abcxyz", new String[] {null, "xyz", "abcd"}));
}
+ public void testNormalizeSpace() {
+ assertEquals(null, StringUtils.normalizeSpace(null));
+ assertEquals("", StringUtils.normalizeSpace(""));
+ assertEquals("", StringUtils.normalizeSpace(" "));
+ assertEquals("", StringUtils.normalizeSpace("\t"));
+ assertEquals("", StringUtils.normalizeSpace("\n"));
+ assertEquals("", StringUtils.normalizeSpace("\u0009"));
+ assertEquals("", StringUtils.normalizeSpace("\u000B"));
+ assertEquals("", StringUtils.normalizeSpace("\u000C"));
+ assertEquals("", StringUtils.normalizeSpace("\u001C"));
+ assertEquals("", StringUtils.normalizeSpace("\u001D"));
+ assertEquals("", StringUtils.normalizeSpace("\u001E"));
+ assertEquals("", StringUtils.normalizeSpace("\u001F"));
+ assertEquals("", StringUtils.normalizeSpace("\f"));
+ assertEquals("", StringUtils.normalizeSpace("\r"));
+ assertEquals("a", StringUtils.normalizeSpace(" a "));
+ assertEquals("a b c", StringUtils.normalizeSpace(" a b c "));
+ assertEquals("a b c", StringUtils.normalizeSpace("a\t\f\r b\u000B c\n"));
+ }
+
public void testLANG666() {
assertEquals("12",StringUtils.stripEnd("120.00", ".0"));
assertEquals("121",StringUtils.stripEnd("121.00", ".0"));