You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@wookie.apache.org by sc...@apache.org on 2010/04/29 16:14:40 UTC
svn commit: r939317 -
/incubator/wookie/trunk/parser/java/src/org/apache/wookie/w3c/util/UnicodeUtils.java
Author: scottbw
Date: Thu Apr 29 14:14:40 2010
New Revision: 939317
URL: http://svn.apache.org/viewvc?rev=939317&view=rev
Log:
Simplified the method for normalizing text content in UnicodeUtils, and added more comments. I also replaced the loop concatenating strings with a stringbuffer for better performance.
Modified:
incubator/wookie/trunk/parser/java/src/org/apache/wookie/w3c/util/UnicodeUtils.java
Modified: incubator/wookie/trunk/parser/java/src/org/apache/wookie/w3c/util/UnicodeUtils.java
URL: http://svn.apache.org/viewvc/incubator/wookie/trunk/parser/java/src/org/apache/wookie/w3c/util/UnicodeUtils.java?rev=939317&r1=939316&r2=939317&view=diff
==============================================================================
--- incubator/wookie/trunk/parser/java/src/org/apache/wookie/w3c/util/UnicodeUtils.java (original)
+++ incubator/wookie/trunk/parser/java/src/org/apache/wookie/w3c/util/UnicodeUtils.java Thu Apr 29 14:14:40 2010
@@ -48,20 +48,33 @@ public class UnicodeUtils {
return normalize(in, false);
}
+ /**
+ * Normalizes all space characters (and whitespace if includeWhitespace is set to true) in the given string to
+ * U+0020, then collapses multiple adjacent spaces to a single space, and
+ * removes any leading and trailing spaces. If the input string is null,
+ * the method returns an empty string ("")
+ * @param in the string to normalize
+ * @param includeWhitespace set to true to normalize whitespace as well as space characters
+ * @return the normalized string
+ */
private static String normalize(String in, boolean includeWhitespace){
if (in == null) return "";
- String out = "";
+ // Create a buffer for the string
+ StringBuffer buf = new StringBuffer();
+ // Iterate over characters in the string and append them to buffer, replacing matching characters with standard spaces
for (int x=0;x<in.length();x++){
- String s = in.substring(x, x+1);
- char ch = s.charAt(0);
+ char ch = in.charAt(x);
if (Character.isSpaceChar(ch) || (Character.isWhitespace(ch) && includeWhitespace)){
- s = " ";
+ ch = new Character(' ');
}
- out = out + s;
+ buf.append(ch);
}
- out = CharSetUtils.squeeze(out, " ");
- out = StringUtils.strip(out);
- return out;
+ String str = buf.toString();
+ // Squeeze out extra spaces
+ str = CharSetUtils.squeeze(str, " ");
+ // Strip off trailing and leading spaces
+ str = StringUtils.strip(str);
+ return str;
}
}