You are viewing a plain text version of this content. The canonical link for it is here.
Posted to axis-cvs@ws.apache.org by to...@apache.org on 2005/02/15 10:36:51 UTC
cvs commit: ws-axis/java/src/org/apache/axis/utils StringUtils.java
toshi 2005/02/15 01:36:51
Modified: java/src/org/apache/axis/utils StringUtils.java
Log:
Clean up to remain independent from the commons-lang code.
Feel free to add a patch, if it's not much to look at.
Revision Changes Path
1.8 +40 -61 ws-axis/java/src/org/apache/axis/utils/StringUtils.java
Index: StringUtils.java
===================================================================
RCS file: /home/cvs/ws-axis/java/src/org/apache/axis/utils/StringUtils.java,v
retrieving revision 1.7
retrieving revision 1.8
diff -u -r1.7 -r1.8
--- StringUtils.java 14 Feb 2005 13:17:13 -0000 1.7
+++ StringUtils.java 15 Feb 2005 09:36:51 -0000 1.8
@@ -312,21 +312,21 @@
}
/**
- * <p>Unescapes any Java literals found in the <code>String</code>.
- * For example, it will turn a sequence of <code>'\'</code> and
- * <code>'n'</code> into a newline character, unless the <code>'\'</code>
- * is preceded by another <code>'\'</code>.</p>
+ * <p>Unescapes numeric character referencs found in the <code>String</code>.</p>
+ *
+ * <p>For example, it will return a unicode string which means the specified numeric
+ * character references looks like "ようこそ".</p>
*
* @param str the <code>String</code> to unescape, may be null
* @return a new unescaped <code>String</code>, <code>null</code> if null string input
*/
- public static String unescapeJava(String str) {
+ public static String unescapeNumericChar(String str) {
if (str == null) {
return null;
}
try {
StringWriter writer = new StringWriter(str.length());
- unescapeJava(writer, str);
+ unescapeNumericChar(writer, str);
return writer.toString();
} catch (IOException ioe) {
// this should never ever happen while writing to a StringWriter
@@ -334,14 +334,13 @@
return null;
}
}
-
+
/**
- * <p>Unescapes any Java literals found in the <code>String</code> to a
+ * <p>Unescapes numeric character references found in the <code>String</code> to a
* <code>Writer</code>.</p>
*
- * <p>For example, it will turn a sequence of <code>'\'</code> and
- * <code>'n'</code> into a newline character, unless the <code>'\'</code>
- * is preceded by another <code>'\'</code>.</p>
+ * <p>For example, it will return a unicode string which means the specified numeric
+ * character references looks like "ようこそ".</p>
*
* <p>A <code>null</code> string input has no effect.</p>
*
@@ -350,87 +349,67 @@
* @throws IllegalArgumentException if the Writer is <code>null</code>
* @throws java.io.IOException if error occurs on underlying Writer
*/
- public static void unescapeJava(Writer out, String str) throws IOException {
+ public static void unescapeNumericChar(Writer out, String str) throws IOException {
if (out == null) {
throw new IllegalArgumentException("The Writer must not be null");
}
if (str == null) {
return;
}
+
int sz = str.length();
StringBuffer unicode = new StringBuffer(4);
- boolean hadSlash = false;
+ StringBuffer escapes = new StringBuffer(3);
boolean inUnicode = false;
+
for (int i = 0; i < sz; i++) {
char ch = str.charAt(i);
if (inUnicode) {
// if in unicode, then we're reading unicode
// values in somehow
unicode.append(ch);
- if (unicode.length() == 4) {
+ if (unicode.length() == 4 && str.charAt(i+1) == ';') {
// unicode now contains the four hex digits
// which represents our unicode character
try {
int value = Integer.parseInt(unicode.toString(), 16);
out.write((char) value);
unicode.setLength(0);
+ // need to skip the delimiter - ';'
+ i = i + 1;
inUnicode = false;
- hadSlash = false;
} catch (NumberFormatException nfe) {
throw new InternalException(nfe);
}
+ } else if (unicode.length() == 4) {
+ // can't find the delimiter ';', thus it's an invalid unicode
+ out.write(unicode.toString());
+ unicode.setLength(0);
+ inUnicode = false;
}
continue;
- }
- if (hadSlash) {
- // handle an escaped value
- hadSlash = false;
- switch (ch) {
- case '\\':
- out.write('\\');
- break;
- case '\'':
- out.write('\'');
- break;
- case '\"':
- out.write('"');
- break;
- case 'r':
- out.write('\r');
- break;
- case 'f':
- out.write('\f');
- break;
- case 't':
- out.write('\t');
- break;
- case 'n':
- out.write('\n');
- break;
- case 'b':
- out.write('\b');
- break;
- case 'u':
- {
- // uh-oh, we're in unicode country....
- inUnicode = true;
- break;
- }
- default :
- out.write(ch);
- break;
+ } else if (ch=='&') {
+ // Start of the escape sequence ...
+ // At least, the numeric character references require 8 bytes to
+ // describe a Unicode character like as""
+ if (i+7 <= sz) {
+ escapes.append(ch);
+ escapes.append(str.charAt(i+1));
+ escapes.append(str.charAt(i+2));
+ if (escapes.toString().equals("&#x")) {
+ inUnicode = true;
+ } else {
+ out.write(escapes.toString());
+ }
+ escapes.setLength(0);
+ // need to skip the escaping chars - '&#x'
+ i = i + 2;
+ } else {
+ out.write(ch);
}
continue;
- } else if (ch == '\\') {
- hadSlash = true;
- continue;
}
out.write(ch);
}
- if (hadSlash) {
- // then we're in the weird case of a \ at the end of the
- // string, let's output it anyway.
- out.write('\\');
- }
}
}