You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ba...@apache.org on 2009/03/01 21:54:40 UTC
svn commit: r749095 - in /commons/proper/lang/trunk/src:
java/org/apache/commons/lang/Entities.java
test/org/apache/commons/lang/StringEscapeUtilsTest.java
Author: bayard
Date: Sun Mar 1 20:54:40 2009
New Revision: 749095
URL: http://svn.apache.org/viewvc?rev=749095&view=rev
Log:
Applying Alexander Kjall's patch from LANG-480; along with a unit test made from his example. Fixes unicode conversion above U+00FFFF being done into 2 characters
Modified:
commons/proper/lang/trunk/src/java/org/apache/commons/lang/Entities.java
commons/proper/lang/trunk/src/test/org/apache/commons/lang/StringEscapeUtilsTest.java
Modified: commons/proper/lang/trunk/src/java/org/apache/commons/lang/Entities.java
URL: http://svn.apache.org/viewvc/commons/proper/lang/trunk/src/java/org/apache/commons/lang/Entities.java?rev=749095&r1=749094&r2=749095&view=diff
==============================================================================
--- commons/proper/lang/trunk/src/java/org/apache/commons/lang/Entities.java (original)
+++ commons/proper/lang/trunk/src/java/org/apache/commons/lang/Entities.java Sun Mar 1 20:54:40 2009
@@ -825,10 +825,15 @@
public void escape(Writer writer, String str) throws IOException {
int len = str.length();
for (int i = 0; i < len; i++) {
- char c = str.charAt(i);
+ int c = Character.codePointAt(str, i);
String entityName = this.entityName(c);
if (entityName == null) {
- if (c > 0x7F) {
+ if (c >= 0x010000 && i < len - 1) {
+ writer.write("&#");
+ writer.write(Integer.toString(c, 10));
+ writer.write(';');
+ i++;
+ } else if (c > 0x7F) {
writer.write("&#");
writer.write(Integer.toString(c, 10));
writer.write(';');
Modified: commons/proper/lang/trunk/src/test/org/apache/commons/lang/StringEscapeUtilsTest.java
URL: http://svn.apache.org/viewvc/commons/proper/lang/trunk/src/test/org/apache/commons/lang/StringEscapeUtilsTest.java?rev=749095&r1=749094&r2=749095&view=diff
==============================================================================
--- commons/proper/lang/trunk/src/test/org/apache/commons/lang/StringEscapeUtilsTest.java (original)
+++ commons/proper/lang/trunk/src/test/org/apache/commons/lang/StringEscapeUtilsTest.java Sun Mar 1 20:54:40 2009
@@ -415,4 +415,18 @@
fail("Threw: " + e);
}
}
+
+ // https://issues.apache.org/jira/browse/LANG-480
+ public void testEscapeHtmlHighUnicode() throws java.io.UnsupportedEncodingException {
+ // this is the utf8 representation of the character:
+ // COUNTING ROD UNIT DIGIT THREE
+ // in unicode
+ // codepoint: U+1D362
+ byte[] data = new byte[] { (byte)0xF0, (byte)0x9D, (byte)0x8D, (byte)0xA2 };
+
+ String escaped = StringEscapeUtils.escapeHtml( new String(data, "UTF8") );
+ String unescaped = StringEscapeUtils.unescapeHtml( escaped );
+
+ assertEquals( "High unicode was not escaped correctly", "𝍢", escaped);
+ }
}