You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@wookie.apache.org by sc...@apache.org on 2011/05/27 22:27:11 UTC

svn commit: r1128443 - /incubator/wookie/trunk/parser/java/src/org/apache/wookie/w3c/impl/AbstractLocalizedEntity.java

Author: scottbw
Date: Fri May 27 20:27:11 2011
New Revision: 1128443

URL: http://svn.apache.org/viewvc?rev=1128443&view=rev
Log:
Fixed a potential issue in the parser with double-encoding XML content, particularly nested text nodes, when recursively extracting text from localized text fields.

Modified:
    incubator/wookie/trunk/parser/java/src/org/apache/wookie/w3c/impl/AbstractLocalizedEntity.java

Modified: incubator/wookie/trunk/parser/java/src/org/apache/wookie/w3c/impl/AbstractLocalizedEntity.java
URL: http://svn.apache.org/viewvc/incubator/wookie/trunk/parser/java/src/org/apache/wookie/w3c/impl/AbstractLocalizedEntity.java?rev=1128443&r1=1128442&r2=1128443&view=diff
==============================================================================
--- incubator/wookie/trunk/parser/java/src/org/apache/wookie/w3c/impl/AbstractLocalizedEntity.java (original)
+++ incubator/wookie/trunk/parser/java/src/org/apache/wookie/w3c/impl/AbstractLocalizedEntity.java Fri May 27 20:27:11 2011
@@ -13,6 +13,7 @@
  */
 package org.apache.wookie.w3c.impl;
 
+import org.apache.commons.lang.StringEscapeUtils;
 import org.apache.wookie.w3c.ILocalizedEntity;
 import org.apache.wookie.w3c.IW3CXMLConfiguration;
 import org.apache.wookie.w3c.util.LocalizationUtils;
@@ -99,8 +100,13 @@ public abstract class AbstractLocalizedE
 					content.append(getLocalizedTextContent((Element)node));
 				}
 			}
+			// Append text to the string
+			// First we have to unescape any XML special characters so we don't
+			// double-encode them (e.g. ´ = ´) when exporting to 
+			// HTML or XML later
 			if (node instanceof Text){
-				content.append(((Text)node).getText());
+			  String text = ((Text)node).getText();
+				content.append(StringEscapeUtils.unescapeXml(text));
 			}
 		}
 		return UnicodeUtils.normalizeWhitespace(content.toString());