You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@wicket.apache.org by mg...@apache.org on 2011/05/04 22:24:59 UTC

svn commit: r1099592 - in /wicket/trunk: wicket-core/src/main/java/org/apache/wicket/markup/parser/ wicket-core/src/test/java/org/apache/wicket/markup/ wicket-util/src/main/java/org/apache/wicket/util/string/

Author: mgrigorov
Date: Wed May  4 20:24:59 2011
New Revision: 1099592

URL: http://svn.apache.org/viewvc?rev=1099592&view=rev
Log:
WICKET-3608 input button escapes escaped value

Unescape html entities while reading them. ComponentTag.writeOutput() will encode them when writing.
Uses StringEscapeUtils (only html/xml related methods left) and Entities (non-modified) from commons-lang 2.6.


Added:
    wicket/trunk/wicket-core/src/test/java/org/apache/wicket/markup/ComponentTagAttributeEscapingTest.java
    wicket/trunk/wicket-util/src/main/java/org/apache/wicket/util/string/Entities.java   (with props)
    wicket/trunk/wicket-util/src/main/java/org/apache/wicket/util/string/StringEscapeUtils.java   (with props)
Modified:
    wicket/trunk/wicket-core/src/main/java/org/apache/wicket/markup/parser/TagAttributes.java
    wicket/trunk/wicket-core/src/main/java/org/apache/wicket/markup/parser/XmlTag.java
    wicket/trunk/wicket-util/src/main/java/org/apache/wicket/util/string/Strings.java

Modified: wicket/trunk/wicket-core/src/main/java/org/apache/wicket/markup/parser/TagAttributes.java
URL: http://svn.apache.org/viewvc/wicket/trunk/wicket-core/src/main/java/org/apache/wicket/markup/parser/TagAttributes.java?rev=1099592&r1=1099591&r2=1099592&view=diff
==============================================================================
--- wicket/trunk/wicket-core/src/main/java/org/apache/wicket/markup/parser/TagAttributes.java (original)
+++ wicket/trunk/wicket-core/src/main/java/org/apache/wicket/markup/parser/TagAttributes.java Wed May  4 20:24:59 2011
@@ -18,6 +18,7 @@ package org.apache.wicket.markup.parser;
 
 import java.util.Map;
 
+import org.apache.wicket.util.string.Strings;
 import org.apache.wicket.util.value.IValueMap;
 import org.apache.wicket.util.value.ValueMap;
 import org.slf4j.Logger;
@@ -57,7 +58,7 @@ public class TagAttributes extends Value
 	public final Object put(String key, Object value)
 	{
 		checkIdAttribute(key);
-		return super.put(key, value);
+		return putInternal(key, value);
 	}
 
 	/**
@@ -81,7 +82,7 @@ public class TagAttributes extends Value
 	 */
 	public final Object putInternal(String key, Object value)
 	{
-		return super.put(key, value);
+		return super.put(key, unescapeHtml(value));
 	}
 
 	@Override
@@ -95,4 +96,24 @@ public class TagAttributes extends Value
 
 		super.putAll(map);
 	}
+
+	/**
+	 * Unescapes the HTML entities from the <code>value</code> if it is a {@link CharSequence} and
+	 * there are any
+	 * 
+	 * @param value
+	 *            the attribute value
+	 * @return the HTML unescaped value or the non-modified input
+	 */
+	private static final Object unescapeHtml(Object value)
+	{
+		if (value instanceof CharSequence)
+		{
+			return Strings.unescapeMarkup(value.toString());
+		}
+		else
+		{
+			return value;
+		}
+	}
 }
\ No newline at end of file

Modified: wicket/trunk/wicket-core/src/main/java/org/apache/wicket/markup/parser/XmlTag.java
URL: http://svn.apache.org/viewvc/wicket/trunk/wicket-core/src/main/java/org/apache/wicket/markup/parser/XmlTag.java?rev=1099592&r1=1099591&r2=1099592&view=diff
==============================================================================
--- wicket/trunk/wicket-core/src/main/java/org/apache/wicket/markup/parser/XmlTag.java (original)
+++ wicket/trunk/wicket-core/src/main/java/org/apache/wicket/markup/parser/XmlTag.java Wed May  4 20:24:59 2011
@@ -25,7 +25,6 @@ import org.apache.wicket.util.string.App
 import org.apache.wicket.util.string.StringValue;
 import org.apache.wicket.util.string.Strings;
 import org.apache.wicket.util.value.IValueMap;
-import org.apache.wicket.util.value.ValueMap;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -354,7 +353,7 @@ public class XmlTag
 		dest.copyOf = copyOf;
 		if (attributes != null)
 		{
-			dest.attributes = new ValueMap(attributes);
+			dest.attributes = new TagAttributes(attributes);
 		}
 	}
 

Added: wicket/trunk/wicket-core/src/test/java/org/apache/wicket/markup/ComponentTagAttributeEscapingTest.java
URL: http://svn.apache.org/viewvc/wicket/trunk/wicket-core/src/test/java/org/apache/wicket/markup/ComponentTagAttributeEscapingTest.java?rev=1099592&view=auto
==============================================================================
--- wicket/trunk/wicket-core/src/test/java/org/apache/wicket/markup/ComponentTagAttributeEscapingTest.java (added)
+++ wicket/trunk/wicket-core/src/test/java/org/apache/wicket/markup/ComponentTagAttributeEscapingTest.java Wed May  4 20:24:59 2011
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.wicket.markup;
+
+import junit.framework.TestCase;
+
+import org.apache.wicket.MarkupContainer;
+import org.apache.wicket.markup.html.WebPage;
+import org.apache.wicket.markup.html.form.Button;
+import org.apache.wicket.markup.html.link.Link;
+import org.apache.wicket.util.resource.IResourceStream;
+import org.apache.wicket.util.resource.StringResourceStream;
+import org.apache.wicket.util.tester.WicketTester;
+
+/**
+ * @author Pedro Santos
+ */
+public class ComponentTagAttributeEscapingTest extends TestCase
+{
+
+	/**
+	 * @throws Exception
+	 */
+	public void testComponentAttributesNotDoubleEscaped() throws Exception
+	{
+		WicketTester tester = new WicketTester();
+		tester.startPage(ButtonValuePage.class);
+		String response = tester.getLastResponseAsString();
+		System.out.println(response);
+		assertTrue("One of the pound entity representations is missing: &pound; or &#163;",
+			response.contains("Â£Â£"));
+		assertTrue("must not be double escaped", response.contains("Watch escaped value: &gt;&gt;"));
+		assertTrue("following the last assert logic, this one would true",
+			response.contains("alerting: &amp;"));
+		assertTrue("not double escape manually add attributes",
+			response.contains("some_attribute=\"&amp;\""));
+	}
+
+	/**
+	 * Just two distinct components with escaped characters in markup attribute.
+	 * */
+	public static class ButtonValuePage extends WebPage implements IMarkupResourceStreamProvider
+	{
+		/** */
+		private static final long serialVersionUID = 1L;
+
+		/**
+		 * Construct.
+		 */
+		public ButtonValuePage()
+		{
+			add(new Button("button"));
+			add(new Link<Void>("link")
+			{
+				/** */
+				private static final long serialVersionUID = 1L;
+
+				@Override
+				public void onClick()
+				{
+				}
+
+				@Override
+				protected void onComponentTag(ComponentTag tag)
+				{
+					super.onComponentTag(tag);
+					tag.put("some_attribute", "&amp;");
+				}
+			});
+		}
+
+		public IResourceStream getMarkupResourceStream(MarkupContainer container,
+			Class<?> containerClass)
+		{
+			return new StringResourceStream(
+				"<html>"//
+					+ "<body>"//
+					+ "<a wicket:id=\"link\" onclick=\"alert('alerting: &amp; &pound;&#163; ')\">link</a>"//
+					+ "<input type=\"submit\" wicket:id=\"button\" value=\"Watch escaped value: &gt;&gt;\"/>"//
+					+ "</body>" + //
+					"</html>");
+		}
+	}
+}
\ No newline at end of file

Added: wicket/trunk/wicket-util/src/main/java/org/apache/wicket/util/string/Entities.java
URL: http://svn.apache.org/viewvc/wicket/trunk/wicket-util/src/main/java/org/apache/wicket/util/string/Entities.java?rev=1099592&view=auto
==============================================================================
--- wicket/trunk/wicket-util/src/main/java/org/apache/wicket/util/string/Entities.java (added)
+++ wicket/trunk/wicket-util/src/main/java/org/apache/wicket/util/string/Entities.java Wed May  4 20:24:59 2011
@@ -0,0 +1,1151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.wicket.util.string;
+
+import java.io.IOException;
+import java.io.StringWriter;
+import java.io.Writer;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TreeMap;
+
+import org.apache.wicket.util.collections.IntHashMap;
+
+/**
+ * <p>
+ * Provides HTML and XML entity utilities.
+ * </p>
+ * 
+ * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
+ * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO
+ *      Latin-1</a>
+ * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity
+ *      references</a>
+ * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
+ * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code
+ *      positions</a>
+ * 
+ * @author <a href="mailto:alex@purpletech.com">Alexander Day Chaffee</a>
+ * @author <a href="mailto:ggregory@seagullsw.com">Gary Gregory</a>
+ * @since 2.0
+ * @version $Id$
+ */
+// Copy from commons-lang ver. 2.6. Non-modified.
+class Entities
+{
+
+	private static final String[][] BASIC_ARRAY = { { "quot", "34" }, // " - double-quote
+			{ "amp", "38" }, // & - ampersand
+			{ "lt", "60" }, // < - less-than
+			{ "gt", "62" }, // > - greater-than
+	};
+
+	private static final String[][] APOS_ARRAY = { { "apos", "39" }, // XML apostrophe
+	};
+
+	// package scoped for testing
+	static final String[][] ISO8859_1_ARRAY = { { "nbsp", "160" }, // non-breaking space
+			{ "iexcl", "161" }, // inverted exclamation mark
+			{ "cent", "162" }, // cent sign
+			{ "pound", "163" }, // pound sign
+			{ "curren", "164" }, // currency sign
+			{ "yen", "165" }, // yen sign = yuan sign
+			{ "brvbar", "166" }, // broken bar = broken vertical bar
+			{ "sect", "167" }, // section sign
+			{ "uml", "168" }, // diaeresis = spacing diaeresis
+			{ "copy", "169" }, // ï¿½ - copyright sign
+			{ "ordf", "170" }, // feminine ordinal indicator
+			{ "laquo", "171" }, // left-pointing double angle quotation mark = left pointing
+// guillemet
+			{ "not", "172" }, // not sign
+			{ "shy", "173" }, // soft hyphen = discretionary hyphen
+			{ "reg", "174" }, // ï¿½ - registered trademark sign
+			{ "macr", "175" }, // macron = spacing macron = overline = APL overbar
+			{ "deg", "176" }, // degree sign
+			{ "plusmn", "177" }, // plus-minus sign = plus-or-minus sign
+			{ "sup2", "178" }, // superscript two = superscript digit two = squared
+			{ "sup3", "179" }, // superscript three = superscript digit three = cubed
+			{ "acute", "180" }, // acute accent = spacing acute
+			{ "micro", "181" }, // micro sign
+			{ "para", "182" }, // pilcrow sign = paragraph sign
+			{ "middot", "183" }, // middle dot = Georgian comma = Greek middle dot
+			{ "cedil", "184" }, // cedilla = spacing cedilla
+			{ "sup1", "185" }, // superscript one = superscript digit one
+			{ "ordm", "186" }, // masculine ordinal indicator
+			{ "raquo", "187" }, // right-pointing double angle quotation mark = right pointing
+// guillemet
+			{ "frac14", "188" }, // vulgar fraction one quarter = fraction one quarter
+			{ "frac12", "189" }, // vulgar fraction one half = fraction one half
+			{ "frac34", "190" }, // vulgar fraction three quarters = fraction three quarters
+			{ "iquest", "191" }, // inverted question mark = turned question mark
+			{ "Agrave", "192" }, // ï¿½ - uppercase A, grave accent
+			{ "Aacute", "193" }, // ï¿½ - uppercase A, acute accent
+			{ "Acirc", "194" }, // ï¿½ - uppercase A, circumflex accent
+			{ "Atilde", "195" }, // ï¿½ - uppercase A, tilde
+			{ "Auml", "196" }, // ï¿½ - uppercase A, umlaut
+			{ "Aring", "197" }, // ï¿½ - uppercase A, ring
+			{ "AElig", "198" }, // ï¿½ - uppercase AE
+			{ "Ccedil", "199" }, // ï¿½ - uppercase C, cedilla
+			{ "Egrave", "200" }, // ï¿½ - uppercase E, grave accent
+			{ "Eacute", "201" }, // ï¿½ - uppercase E, acute accent
+			{ "Ecirc", "202" }, // ï¿½ - uppercase E, circumflex accent
+			{ "Euml", "203" }, // ï¿½ - uppercase E, umlaut
+			{ "Igrave", "204" }, // ï¿½ - uppercase I, grave accent
+			{ "Iacute", "205" }, // ï¿½ - uppercase I, acute accent
+			{ "Icirc", "206" }, // ï¿½ - uppercase I, circumflex accent
+			{ "Iuml", "207" }, // ï¿½ - uppercase I, umlaut
+			{ "ETH", "208" }, // ï¿½ - uppercase Eth, Icelandic
+			{ "Ntilde", "209" }, // ï¿½ - uppercase N, tilde
+			{ "Ograve", "210" }, // ï¿½ - uppercase O, grave accent
+			{ "Oacute", "211" }, // ï¿½ - uppercase O, acute accent
+			{ "Ocirc", "212" }, // ï¿½ - uppercase O, circumflex accent
+			{ "Otilde", "213" }, // ï¿½ - uppercase O, tilde
+			{ "Ouml", "214" }, // ï¿½ - uppercase O, umlaut
+			{ "times", "215" }, // multiplication sign
+			{ "Oslash", "216" }, // ï¿½ - uppercase O, slash
+			{ "Ugrave", "217" }, // ï¿½ - uppercase U, grave accent
+			{ "Uacute", "218" }, // ï¿½ - uppercase U, acute accent
+			{ "Ucirc", "219" }, // ï¿½ - uppercase U, circumflex accent
+			{ "Uuml", "220" }, // ï¿½ - uppercase U, umlaut
+			{ "Yacute", "221" }, // ï¿½ - uppercase Y, acute accent
+			{ "THORN", "222" }, // ï¿½ - uppercase THORN, Icelandic
+			{ "szlig", "223" }, // ï¿½ - lowercase sharps, German
+			{ "agrave", "224" }, // ï¿½ - lowercase a, grave accent
+			{ "aacute", "225" }, // ï¿½ - lowercase a, acute accent
+			{ "acirc", "226" }, // ï¿½ - lowercase a, circumflex accent
+			{ "atilde", "227" }, // ï¿½ - lowercase a, tilde
+			{ "auml", "228" }, // ï¿½ - lowercase a, umlaut
+			{ "aring", "229" }, // ï¿½ - lowercase a, ring
+			{ "aelig", "230" }, // ï¿½ - lowercase ae
+			{ "ccedil", "231" }, // ï¿½ - lowercase c, cedilla
+			{ "egrave", "232" }, // ï¿½ - lowercase e, grave accent
+			{ "eacute", "233" }, // ï¿½ - lowercase e, acute accent
+			{ "ecirc", "234" }, // ï¿½ - lowercase e, circumflex accent
+			{ "euml", "235" }, // ï¿½ - lowercase e, umlaut
+			{ "igrave", "236" }, // ï¿½ - lowercase i, grave accent
+			{ "iacute", "237" }, // ï¿½ - lowercase i, acute accent
+			{ "icirc", "238" }, // ï¿½ - lowercase i, circumflex accent
+			{ "iuml", "239" }, // ï¿½ - lowercase i, umlaut
+			{ "eth", "240" }, // ï¿½ - lowercase eth, Icelandic
+			{ "ntilde", "241" }, // ï¿½ - lowercase n, tilde
+			{ "ograve", "242" }, // ï¿½ - lowercase o, grave accent
+			{ "oacute", "243" }, // ï¿½ - lowercase o, acute accent
+			{ "ocirc", "244" }, // ï¿½ - lowercase o, circumflex accent
+			{ "otilde", "245" }, // ï¿½ - lowercase o, tilde
+			{ "ouml", "246" }, // ï¿½ - lowercase o, umlaut
+			{ "divide", "247" }, // division sign
+			{ "oslash", "248" }, // ï¿½ - lowercase o, slash
+			{ "ugrave", "249" }, // ï¿½ - lowercase u, grave accent
+			{ "uacute", "250" }, // ï¿½ - lowercase u, acute accent
+			{ "ucirc", "251" }, // ï¿½ - lowercase u, circumflex accent
+			{ "uuml", "252" }, // ï¿½ - lowercase u, umlaut
+			{ "yacute", "253" }, // ï¿½ - lowercase y, acute accent
+			{ "thorn", "254" }, // ï¿½ - lowercase thorn, Icelandic
+			{ "yuml", "255" }, // ï¿½ - lowercase y, umlaut
+	};
+
+	// http://www.w3.org/TR/REC-html40/sgml/entities.html
+	// package scoped for testing
+	static final String[][] HTML40_ARRAY = {
+			// <!-- Latin Extended-B -->
+			{ "fnof", "402" }, // latin small f with hook = function= florin, U+0192 ISOtech -->
+			// <!-- Greek -->
+			{ "Alpha", "913" }, // greek capital letter alpha, U+0391 -->
+			{ "Beta", "914" }, // greek capital letter beta, U+0392 -->
+			{ "Gamma", "915" }, // greek capital letter gamma,U+0393 ISOgrk3 -->
+			{ "Delta", "916" }, // greek capital letter delta,U+0394 ISOgrk3 -->
+			{ "Epsilon", "917" }, // greek capital letter epsilon, U+0395 -->
+			{ "Zeta", "918" }, // greek capital letter zeta, U+0396 -->
+			{ "Eta", "919" }, // greek capital letter eta, U+0397 -->
+			{ "Theta", "920" }, // greek capital letter theta,U+0398 ISOgrk3 -->
+			{ "Iota", "921" }, // greek capital letter iota, U+0399 -->
+			{ "Kappa", "922" }, // greek capital letter kappa, U+039A -->
+			{ "Lambda", "923" }, // greek capital letter lambda,U+039B ISOgrk3 -->
+			{ "Mu", "924" }, // greek capital letter mu, U+039C -->
+			{ "Nu", "925" }, // greek capital letter nu, U+039D -->
+			{ "Xi", "926" }, // greek capital letter xi, U+039E ISOgrk3 -->
+			{ "Omicron", "927" }, // greek capital letter omicron, U+039F -->
+			{ "Pi", "928" }, // greek capital letter pi, U+03A0 ISOgrk3 -->
+			{ "Rho", "929" }, // greek capital letter rho, U+03A1 -->
+			// <!-- there is no Sigmaf, and no U+03A2 character either -->
+			{ "Sigma", "931" }, // greek capital letter sigma,U+03A3 ISOgrk3 -->
+			{ "Tau", "932" }, // greek capital letter tau, U+03A4 -->
+			{ "Upsilon", "933" }, // greek capital letter upsilon,U+03A5 ISOgrk3 -->
+			{ "Phi", "934" }, // greek capital letter phi,U+03A6 ISOgrk3 -->
+			{ "Chi", "935" }, // greek capital letter chi, U+03A7 -->
+			{ "Psi", "936" }, // greek capital letter psi,U+03A8 ISOgrk3 -->
+			{ "Omega", "937" }, // greek capital letter omega,U+03A9 ISOgrk3 -->
+			{ "alpha", "945" }, // greek small letter alpha,U+03B1 ISOgrk3 -->
+			{ "beta", "946" }, // greek small letter beta, U+03B2 ISOgrk3 -->
+			{ "gamma", "947" }, // greek small letter gamma,U+03B3 ISOgrk3 -->
+			{ "delta", "948" }, // greek small letter delta,U+03B4 ISOgrk3 -->
+			{ "epsilon", "949" }, // greek small letter epsilon,U+03B5 ISOgrk3 -->
+			{ "zeta", "950" }, // greek small letter zeta, U+03B6 ISOgrk3 -->
+			{ "eta", "951" }, // greek small letter eta, U+03B7 ISOgrk3 -->
+			{ "theta", "952" }, // greek small letter theta,U+03B8 ISOgrk3 -->
+			{ "iota", "953" }, // greek small letter iota, U+03B9 ISOgrk3 -->
+			{ "kappa", "954" }, // greek small letter kappa,U+03BA ISOgrk3 -->
+			{ "lambda", "955" }, // greek small letter lambda,U+03BB ISOgrk3 -->
+			{ "mu", "956" }, // greek small letter mu, U+03BC ISOgrk3 -->
+			{ "nu", "957" }, // greek small letter nu, U+03BD ISOgrk3 -->
+			{ "xi", "958" }, // greek small letter xi, U+03BE ISOgrk3 -->
+			{ "omicron", "959" }, // greek small letter omicron, U+03BF NEW -->
+			{ "pi", "960" }, // greek small letter pi, U+03C0 ISOgrk3 -->
+			{ "rho", "961" }, // greek small letter rho, U+03C1 ISOgrk3 -->
+			{ "sigmaf", "962" }, // greek small letter final sigma,U+03C2 ISOgrk3 -->
+			{ "sigma", "963" }, // greek small letter sigma,U+03C3 ISOgrk3 -->
+			{ "tau", "964" }, // greek small letter tau, U+03C4 ISOgrk3 -->
+			{ "upsilon", "965" }, // greek small letter upsilon,U+03C5 ISOgrk3 -->
+			{ "phi", "966" }, // greek small letter phi, U+03C6 ISOgrk3 -->
+			{ "chi", "967" }, // greek small letter chi, U+03C7 ISOgrk3 -->
+			{ "psi", "968" }, // greek small letter psi, U+03C8 ISOgrk3 -->
+			{ "omega", "969" }, // greek small letter omega,U+03C9 ISOgrk3 -->
+			{ "thetasym", "977" }, // greek small letter theta symbol,U+03D1 NEW -->
+			{ "upsih", "978" }, // greek upsilon with hook symbol,U+03D2 NEW -->
+			{ "piv", "982" }, // greek pi symbol, U+03D6 ISOgrk3 -->
+			// <!-- General Punctuation -->
+			{ "bull", "8226" }, // bullet = black small circle,U+2022 ISOpub -->
+			// <!-- bullet is NOT the same as bullet operator, U+2219 -->
+			{ "hellip", "8230" }, // horizontal ellipsis = three dot leader,U+2026 ISOpub -->
+			{ "prime", "8242" }, // prime = minutes = feet, U+2032 ISOtech -->
+			{ "Prime", "8243" }, // double prime = seconds = inches,U+2033 ISOtech -->
+			{ "oline", "8254" }, // overline = spacing overscore,U+203E NEW -->
+			{ "frasl", "8260" }, // fraction slash, U+2044 NEW -->
+			// <!-- Letterlike Symbols -->
+			{ "weierp", "8472" }, // script capital P = power set= Weierstrass p, U+2118 ISOamso -->
+			{ "image", "8465" }, // blackletter capital I = imaginary part,U+2111 ISOamso -->
+			{ "real", "8476" }, // blackletter capital R = real part symbol,U+211C ISOamso -->
+			{ "trade", "8482" }, // trade mark sign, U+2122 ISOnum -->
+			{ "alefsym", "8501" }, // alef symbol = first transfinite cardinal,U+2135 NEW -->
+			// <!-- alef symbol is NOT the same as hebrew letter alef,U+05D0 although the
+			// same glyph could be used to depict both characters -->
+			// <!-- Arrows -->
+			{ "larr", "8592" }, // leftwards arrow, U+2190 ISOnum -->
+			{ "uarr", "8593" }, // upwards arrow, U+2191 ISOnum-->
+			{ "rarr", "8594" }, // rightwards arrow, U+2192 ISOnum -->
+			{ "darr", "8595" }, // downwards arrow, U+2193 ISOnum -->
+			{ "harr", "8596" }, // left right arrow, U+2194 ISOamsa -->
+			{ "crarr", "8629" }, // downwards arrow with corner leftwards= carriage return, U+21B5
+// NEW -->
+			{ "lArr", "8656" }, // leftwards double arrow, U+21D0 ISOtech -->
+			// <!-- ISO 10646 does not say that lArr is the same as the 'is implied by'
+			// arrow but also does not have any other character for that function.
+			// So ? lArr canbe used for 'is implied by' as ISOtech suggests -->
+			{ "uArr", "8657" }, // upwards double arrow, U+21D1 ISOamsa -->
+			{ "rArr", "8658" }, // rightwards double arrow,U+21D2 ISOtech -->
+			// <!-- ISO 10646 does not say this is the 'implies' character but does not
+			// have another character with this function so ?rArr can be used for
+			// 'implies' as ISOtech suggests -->
+			{ "dArr", "8659" }, // downwards double arrow, U+21D3 ISOamsa -->
+			{ "hArr", "8660" }, // left right double arrow,U+21D4 ISOamsa -->
+			// <!-- Mathematical Operators -->
+			{ "forall", "8704" }, // for all, U+2200 ISOtech -->
+			{ "part", "8706" }, // partial differential, U+2202 ISOtech -->
+			{ "exist", "8707" }, // there exists, U+2203 ISOtech -->
+			{ "empty", "8709" }, // empty set = null set = diameter,U+2205 ISOamso -->
+			{ "nabla", "8711" }, // nabla = backward difference,U+2207 ISOtech -->
+			{ "isin", "8712" }, // element of, U+2208 ISOtech -->
+			{ "notin", "8713" }, // not an element of, U+2209 ISOtech -->
+			{ "ni", "8715" }, // contains as member, U+220B ISOtech -->
+			// <!-- should there be a more memorable name than 'ni'? -->
+			{ "prod", "8719" }, // n-ary product = product sign,U+220F ISOamsb -->
+			// <!-- prod is NOT the same character as U+03A0 'greek capital letter pi'
+			// though the same glyph might be used for both -->
+			{ "sum", "8721" }, // n-ary summation, U+2211 ISOamsb -->
+			// <!-- sum is NOT the same character as U+03A3 'greek capital letter sigma'
+			// though the same glyph might be used for both -->
+			{ "minus", "8722" }, // minus sign, U+2212 ISOtech -->
+			{ "lowast", "8727" }, // asterisk operator, U+2217 ISOtech -->
+			{ "radic", "8730" }, // square root = radical sign,U+221A ISOtech -->
+			{ "prop", "8733" }, // proportional to, U+221D ISOtech -->
+			{ "infin", "8734" }, // infinity, U+221E ISOtech -->
+			{ "ang", "8736" }, // angle, U+2220 ISOamso -->
+			{ "and", "8743" }, // logical and = wedge, U+2227 ISOtech -->
+			{ "or", "8744" }, // logical or = vee, U+2228 ISOtech -->
+			{ "cap", "8745" }, // intersection = cap, U+2229 ISOtech -->
+			{ "cup", "8746" }, // union = cup, U+222A ISOtech -->
+			{ "int", "8747" }, // integral, U+222B ISOtech -->
+			{ "there4", "8756" }, // therefore, U+2234 ISOtech -->
+			{ "sim", "8764" }, // tilde operator = varies with = similar to,U+223C ISOtech -->
+			// <!-- tilde operator is NOT the same character as the tilde, U+007E,although
+			// the same glyph might be used to represent both -->
+			{ "cong", "8773" }, // approximately equal to, U+2245 ISOtech -->
+			{ "asymp", "8776" }, // almost equal to = asymptotic to,U+2248 ISOamsr -->
+			{ "ne", "8800" }, // not equal to, U+2260 ISOtech -->
+			{ "equiv", "8801" }, // identical to, U+2261 ISOtech -->
+			{ "le", "8804" }, // less-than or equal to, U+2264 ISOtech -->
+			{ "ge", "8805" }, // greater-than or equal to,U+2265 ISOtech -->
+			{ "sub", "8834" }, // subset of, U+2282 ISOtech -->
+			{ "sup", "8835" }, // superset of, U+2283 ISOtech -->
+			// <!-- note that nsup, 'not a superset of, U+2283' is not covered by the
+			// Symbol font encoding and is not included. Should it be, for symmetry?
+			// It is in ISOamsn --> <!ENTITY nsub", "8836"},
+			// not a subset of, U+2284 ISOamsn -->
+			{ "sube", "8838" }, // subset of or equal to, U+2286 ISOtech -->
+			{ "supe", "8839" }, // superset of or equal to,U+2287 ISOtech -->
+			{ "oplus", "8853" }, // circled plus = direct sum,U+2295 ISOamsb -->
+			{ "otimes", "8855" }, // circled times = vector product,U+2297 ISOamsb -->
+			{ "perp", "8869" }, // up tack = orthogonal to = perpendicular,U+22A5 ISOtech -->
+			{ "sdot", "8901" }, // dot operator, U+22C5 ISOamsb -->
+			// <!-- dot operator is NOT the same character as U+00B7 middle dot -->
+			// <!-- Miscellaneous Technical -->
+			{ "lceil", "8968" }, // left ceiling = apl upstile,U+2308 ISOamsc -->
+			{ "rceil", "8969" }, // right ceiling, U+2309 ISOamsc -->
+			{ "lfloor", "8970" }, // left floor = apl downstile,U+230A ISOamsc -->
+			{ "rfloor", "8971" }, // right floor, U+230B ISOamsc -->
+			{ "lang", "9001" }, // left-pointing angle bracket = bra,U+2329 ISOtech -->
+			// <!-- lang is NOT the same character as U+003C 'less than' or U+2039 'single
+// left-pointing angle quotation
+			// mark' -->
+			{ "rang", "9002" }, // right-pointing angle bracket = ket,U+232A ISOtech -->
+			// <!-- rang is NOT the same character as U+003E 'greater than' or U+203A
+			// 'single right-pointing angle quotation mark' -->
+			// <!-- Geometric Shapes -->
+			{ "loz", "9674" }, // lozenge, U+25CA ISOpub -->
+			// <!-- Miscellaneous Symbols -->
+			{ "spades", "9824" }, // black spade suit, U+2660 ISOpub -->
+			// <!-- black here seems to mean filled as opposed to hollow -->
+			{ "clubs", "9827" }, // black club suit = shamrock,U+2663 ISOpub -->
+			{ "hearts", "9829" }, // black heart suit = valentine,U+2665 ISOpub -->
+			{ "diams", "9830" }, // black diamond suit, U+2666 ISOpub -->
+
+			// <!-- Latin Extended-A -->
+			{ "OElig", "338" }, // -- latin capital ligature OE,U+0152 ISOlat2 -->
+			{ "oelig", "339" }, // -- latin small ligature oe, U+0153 ISOlat2 -->
+			// <!-- ligature is a misnomer, this is a separate character in some languages -->
+			{ "Scaron", "352" }, // -- latin capital letter S with caron,U+0160 ISOlat2 -->
+			{ "scaron", "353" }, // -- latin small letter s with caron,U+0161 ISOlat2 -->
+			{ "Yuml", "376" }, // -- latin capital letter Y with diaeresis,U+0178 ISOlat2 -->
+			// <!-- Spacing Modifier Letters -->
+			{ "circ", "710" }, // -- modifier letter circumflex accent,U+02C6 ISOpub -->
+			{ "tilde", "732" }, // small tilde, U+02DC ISOdia -->
+			// <!-- General Punctuation -->
+			{ "ensp", "8194" }, // en space, U+2002 ISOpub -->
+			{ "emsp", "8195" }, // em space, U+2003 ISOpub -->
+			{ "thinsp", "8201" }, // thin space, U+2009 ISOpub -->
+			{ "zwnj", "8204" }, // zero width non-joiner,U+200C NEW RFC 2070 -->
+			{ "zwj", "8205" }, // zero width joiner, U+200D NEW RFC 2070 -->
+			{ "lrm", "8206" }, // left-to-right mark, U+200E NEW RFC 2070 -->
+			{ "rlm", "8207" }, // right-to-left mark, U+200F NEW RFC 2070 -->
+			{ "ndash", "8211" }, // en dash, U+2013 ISOpub -->
+			{ "mdash", "8212" }, // em dash, U+2014 ISOpub -->
+			{ "lsquo", "8216" }, // left single quotation mark,U+2018 ISOnum -->
+			{ "rsquo", "8217" }, // right single quotation mark,U+2019 ISOnum -->
+			{ "sbquo", "8218" }, // single low-9 quotation mark, U+201A NEW -->
+			{ "ldquo", "8220" }, // left double quotation mark,U+201C ISOnum -->
+			{ "rdquo", "8221" }, // right double quotation mark,U+201D ISOnum -->
+			{ "bdquo", "8222" }, // double low-9 quotation mark, U+201E NEW -->
+			{ "dagger", "8224" }, // dagger, U+2020 ISOpub -->
+			{ "Dagger", "8225" }, // double dagger, U+2021 ISOpub -->
+			{ "permil", "8240" }, // per mille sign, U+2030 ISOtech -->
+			{ "lsaquo", "8249" }, // single left-pointing angle quotation mark,U+2039 ISO proposed
+// -->
+			// <!-- lsaquo is proposed but not yet ISO standardized -->
+			{ "rsaquo", "8250" }, // single right-pointing angle quotation mark,U+203A ISO proposed
+// -->
+			// <!-- rsaquo is proposed but not yet ISO standardized -->
+			{ "euro", "8364" }, // -- euro sign, U+20AC NEW -->
+	};
+
+	/**
+	 * <p>
+	 * The set of entities supported by standard XML.
+	 * </p>
+	 */
+	public static final Entities XML;
+
+	/**
+	 * <p>
+	 * The set of entities supported by HTML 3.2.
+	 * </p>
+	 */
+	public static final Entities HTML32;
+
+	/**
+	 * <p>
+	 * The set of entities supported by HTML 4.0.
+	 * </p>
+	 */
+	public static final Entities HTML40;
+
+	static
+	{
+		Entities xml = new Entities();
+		xml.addEntities(BASIC_ARRAY);
+		xml.addEntities(APOS_ARRAY);
+		XML = xml;
+	}
+
+	static
+	{
+		Entities html32 = new Entities();
+		html32.addEntities(BASIC_ARRAY);
+		html32.addEntities(ISO8859_1_ARRAY);
+		HTML32 = html32;
+	}
+
+	static
+	{
+		Entities html40 = new Entities();
+		fillWithHtml40Entities(html40);
+		HTML40 = html40;
+	}
+
+	/**
+	 * <p>
+	 * Fills the specified entities instance with HTML 40 entities.
+	 * </p>
+	 * 
+	 * @param entities
+	 *            the instance to be filled.
+	 */
+	static void fillWithHtml40Entities(Entities entities)
+	{
+		entities.addEntities(BASIC_ARRAY);
+		entities.addEntities(ISO8859_1_ARRAY);
+		entities.addEntities(HTML40_ARRAY);
+	}
+
+	static interface EntityMap
+	{
+		/**
+		 * <p>
+		 * Add an entry to this entity map.
+		 * </p>
+		 * 
+		 * @param name
+		 *            the entity name
+		 * @param value
+		 *            the entity value
+		 */
+		void add(String name, int value);
+
+		/**
+		 * <p>
+		 * Returns the name of the entity identified by the specified value.
+		 * </p>
+		 * 
+		 * @param value
+		 *            the value to locate
+		 * @return entity name associated with the specified value
+		 */
+		String name(int value);
+
+		/**
+		 * <p>
+		 * Returns the value of the entity identified by the specified name.
+		 * </p>
+		 * 
+		 * @param name
+		 *            the name to locate
+		 * @return entity value associated with the specified name
+		 */
+		int value(String name);
+	}
+
+	static class PrimitiveEntityMap implements EntityMap
+	{
+		private final Map mapNameToValue = new HashMap();
+
+		private final IntHashMap mapValueToName = new IntHashMap();
+
+		/**
+		 * {@inheritDoc}
+		 */
+		// TODO not thread-safe as there is a window between changing the two maps
+		public void add(String name, int value)
+		{
+			mapNameToValue.put(name, new Integer(value));
+			mapValueToName.put(value, name);
+		}
+
+		/**
+		 * {@inheritDoc}
+		 */
+		public String name(int value)
+		{
+			return (String)mapValueToName.get(value);
+		}
+
+		/**
+		 * {@inheritDoc}
+		 */
+		public int value(String name)
+		{
+			Object value = mapNameToValue.get(name);
+			if (value == null)
+			{
+				return -1;
+			}
+			return ((Integer)value).intValue();
+		}
+	}
+
+	static abstract class MapIntMap implements Entities.EntityMap
+	{
+		protected final Map mapNameToValue;
+
+		protected final Map mapValueToName;
+
+		/**
+		 * Construct a new instance with specified maps.
+		 * 
+		 * @param nameToValue
+		 *            name to value map
+		 * @param valueToName
+		 *            value to namee map
+		 */
+		MapIntMap(Map nameToValue, Map valueToName)
+		{
+			mapNameToValue = nameToValue;
+			mapValueToName = valueToName;
+		}
+
+		/**
+		 * {@inheritDoc}
+		 */
+		public void add(String name, int value)
+		{
+			mapNameToValue.put(name, new Integer(value));
+			mapValueToName.put(new Integer(value), name);
+		}
+
+		/**
+		 * {@inheritDoc}
+		 */
+		public String name(int value)
+		{
+			return (String)mapValueToName.get(new Integer(value));
+		}
+
+		/**
+		 * {@inheritDoc}
+		 */
+		public int value(String name)
+		{
+			Object value = mapNameToValue.get(name);
+			if (value == null)
+			{
+				return -1;
+			}
+			return ((Integer)value).intValue();
+		}
+	}
+
+	static class HashEntityMap extends MapIntMap
+	{
+		/**
+		 * Constructs a new instance of <code>HashEntityMap</code>.
+		 */
+		public HashEntityMap()
+		{
+			super(new HashMap(), new HashMap());
+		}
+	}
+
+	static class TreeEntityMap extends MapIntMap
+	{
+		/**
+		 * Constructs a new instance of <code>TreeEntityMap</code>.
+		 */
+		public TreeEntityMap()
+		{
+			super(new TreeMap(), new TreeMap());
+		}
+	}
+
+	static class LookupEntityMap extends PrimitiveEntityMap
+	{
+		// TODO this class is not thread-safe
+		private String[] lookupTable;
+
+		private static final int LOOKUP_TABLE_SIZE = 256;
+
+		/**
+		 * {@inheritDoc}
+		 */
+		public String name(int value)
+		{
+			if (value < LOOKUP_TABLE_SIZE)
+			{
+				return lookupTable()[value];
+			}
+			return super.name(value);
+		}
+
+		/**
+		 * <p>
+		 * Returns the lookup table for this entity map. The lookup table is created if it has not
+		 * been previously.
+		 * </p>
+		 * 
+		 * @return the lookup table
+		 */
+		private String[] lookupTable()
+		{
+			if (lookupTable == null)
+			{
+				createLookupTable();
+			}
+			return lookupTable;
+		}
+
+		/**
+		 * <p>
+		 * Creates an entity lookup table of LOOKUP_TABLE_SIZE elements, initialized with entity
+		 * names.
+		 * </p>
+		 */
+		private void createLookupTable()
+		{
+			lookupTable = new String[LOOKUP_TABLE_SIZE];
+			for (int i = 0; i < LOOKUP_TABLE_SIZE; ++i)
+			{
+				lookupTable[i] = super.name(i);
+			}
+		}
+	}
+
+	static class ArrayEntityMap implements EntityMap
+	{
+		// TODO this class is not thread-safe
+		protected final int growBy;
+
+		protected int size = 0;
+
+		protected String[] names;
+
+		protected int[] values;
+
+		/**
+		 * Constructs a new instance of <code>ArrayEntityMap</code>.
+		 */
+		public ArrayEntityMap()
+		{
+			growBy = 100;
+			names = new String[growBy];
+			values = new int[growBy];
+		}
+
+		/**
+		 * Constructs a new instance of <code>ArrayEntityMap</code> specifying the size by which the
+		 * array should grow.
+		 * 
+		 * @param growBy
+		 *            array will be initialized to and will grow by this amount
+		 */
+		public ArrayEntityMap(int growBy)
+		{
+			this.growBy = growBy;
+			names = new String[growBy];
+			values = new int[growBy];
+		}
+
+		/**
+		 * {@inheritDoc}
+		 */
+		public void add(String name, int value)
+		{
+			ensureCapacity(size + 1);
+			names[size] = name;
+			values[size] = value;
+			size++;
+		}
+
+		/**
+		 * Verifies the capacity of the entity array, adjusting the size if necessary.
+		 * 
+		 * @param capacity
+		 *            size the array should be
+		 */
+		protected void ensureCapacity(int capacity)
+		{
+			if (capacity > names.length)
+			{
+				int newSize = Math.max(capacity, size + growBy);
+				String[] newNames = new String[newSize];
+				System.arraycopy(names, 0, newNames, 0, size);
+				names = newNames;
+				int[] newValues = new int[newSize];
+				System.arraycopy(values, 0, newValues, 0, size);
+				values = newValues;
+			}
+		}
+
+		/**
+		 * {@inheritDoc}
+		 */
+		public String name(int value)
+		{
+			for (int i = 0; i < size; ++i)
+			{
+				if (values[i] == value)
+				{
+					return names[i];
+				}
+			}
+			return null;
+		}
+
+		/**
+		 * {@inheritDoc}
+		 */
+		public int value(String name)
+		{
+			for (int i = 0; i < size; ++i)
+			{
+				if (names[i].equals(name))
+				{
+					return values[i];
+				}
+			}
+			return -1;
+		}
+	}
+
+	static class BinaryEntityMap extends ArrayEntityMap
+	{
+
+		// TODO - not thread-safe, because parent is not. Also references size.
+
+		/**
+		 * Constructs a new instance of <code>BinaryEntityMap</code>.
+		 */
+		public BinaryEntityMap()
+		{
+			super();
+		}
+
+		/**
+		 * Constructs a new instance of <code>ArrayEntityMap</code> specifying the size by which the
+		 * underlying array should grow.
+		 * 
+		 * @param growBy
+		 *            array will be initialized to and will grow by this amount
+		 */
+		public BinaryEntityMap(int growBy)
+		{
+			super(growBy);
+		}
+
+		/**
+		 * Performs a binary search of the entity array for the specified key. This method is based
+		 * on code in {@link java.util.Arrays}.
+		 * 
+		 * @param key
+		 *            the key to be found
+		 * @return the index of the entity array matching the specified key
+		 */
+		private int binarySearch(int key)
+		{
+			int low = 0;
+			int high = size - 1;
+
+			while (low <= high)
+			{
+				int mid = (low + high) >>> 1;
+				int midVal = values[mid];
+
+				if (midVal < key)
+				{
+					low = mid + 1;
+				}
+				else if (midVal > key)
+				{
+					high = mid - 1;
+				}
+				else
+				{
+					return mid; // key found
+				}
+			}
+			return -(low + 1); // key not found.
+		}
+
+		/**
+		 * {@inheritDoc}
+		 */
+		public void add(String name, int value)
+		{
+			ensureCapacity(size + 1);
+			int insertAt = binarySearch(value);
+			if (insertAt > 0)
+			{
+				return; // note: this means you can't insert the same value twice
+			}
+			insertAt = -(insertAt + 1); // binarySearch returns it negative and off-by-one
+			System.arraycopy(values, insertAt, values, insertAt + 1, size - insertAt);
+			values[insertAt] = value;
+			System.arraycopy(names, insertAt, names, insertAt + 1, size - insertAt);
+			names[insertAt] = name;
+			size++;
+		}
+
+		/**
+		 * {@inheritDoc}
+		 */
+		public String name(int value)
+		{
+			int index = binarySearch(value);
+			if (index < 0)
+			{
+				return null;
+			}
+			return names[index];
+		}
+	}
+
+	private final EntityMap map;
+
+	/**
+	 * Default constructor.
+	 */
+	public Entities()
+	{
+		map = new Entities.LookupEntityMap();
+	}
+
+	/**
+	 * package scoped constructor for testing.
+	 * 
+	 * @param emap
+	 *            entity map.
+	 */
+	Entities(EntityMap emap)
+	{
+		map = emap;
+	}
+
+	/**
+	 * <p>
+	 * Adds entities to this entity.
+	 * </p>
+	 * 
+	 * @param entityArray
+	 *            array of entities to be added
+	 */
+	public void addEntities(String[][] entityArray)
+	{
+		for (int i = 0; i < entityArray.length; ++i)
+		{
+			addEntity(entityArray[i][0], Integer.parseInt(entityArray[i][1]));
+		}
+	}
+
+	/**
+	 * <p>
+	 * Add an entity to this entity.
+	 * </p>
+	 * 
+	 * @param name
+	 *            name of the entity
+	 * @param value
+	 *            vale of the entity
+	 */
+	public void addEntity(String name, int value)
+	{
+		map.add(name, value);
+	}
+
+	/**
+	 * <p>
+	 * Returns the name of the entity identified by the specified value.
+	 * </p>
+	 * 
+	 * @param value
+	 *            the value to locate
+	 * @return entity name associated with the specified value
+	 */
+	public String entityName(int value)
+	{
+		return map.name(value);
+	}
+
+	/**
+	 * <p>
+	 * Returns the value of the entity identified by the specified name.
+	 * </p>
+	 * 
+	 * @param name
+	 *            the name to locate
+	 * @return entity value associated with the specified name
+	 */
+	public int entityValue(String name)
+	{
+		return map.value(name);
+	}
+
+	/**
+	 * <p>
+	 * Escapes the characters in a <code>String</code>.
+	 * </p>
+	 * 
+	 * <p>
+	 * For example, if you have called addEntity(&quot;foo&quot;, 0xA1), escape(&quot;\u00A1&quot;)
+	 * will return &quot;&amp;foo;&quot;
+	 * </p>
+	 * 
+	 * @param str
+	 *            The <code>String</code> to escape.
+	 * @return A new escaped <code>String</code>.
+	 */
+	public String escape(String str)
+	{
+		StringWriter stringWriter = createStringWriter(str);
+		try
+		{
+			this.escape(stringWriter, str);
+		}
+		catch (IOException e)
+		{
+			// This should never happen because ALL the StringWriter methods called by
+// #escape(Writer, String) do not
+			// throw IOExceptions.
+			throw new RuntimeException(e);
+		}
+		return stringWriter.toString();
+	}
+
+	/**
+	 * <p>
+	 * Escapes the characters in the <code>String</code> passed and writes the result to the
+	 * <code>Writer</code> passed.
+	 * </p>
+	 * 
+	 * @param writer
+	 *            The <code>Writer</code> to write the results of the escaping to. Assumed to be a
+	 *            non-null value.
+	 * @param str
+	 *            The <code>String</code> to escape. Assumed to be a non-null value.
+	 * @throws IOException
+	 *             when <code>Writer</code> passed throws the exception from calls to the
+	 *             {@link Writer#write(int)} methods.
+	 * 
+	 * @see #escape(String)
+	 * @see Writer
+	 */
+	public void escape(Writer writer, String str) throws IOException
+	{
+		int len = str.length();
+		for (int i = 0; i < len; i++)
+		{
+			char c = str.charAt(i);
+			String entityName = entityName(c);
+			if (entityName == null)
+			{
+				if (c > 0x7F)
+				{
+					writer.write("&#");
+					writer.write(Integer.toString(c, 10));
+					writer.write(';');
+				}
+				else
+				{
+					writer.write(c);
+				}
+			}
+			else
+			{
+				writer.write('&');
+				writer.write(entityName);
+				writer.write(';');
+			}
+		}
+	}
+
+	/**
+	 * <p>
+	 * Unescapes the entities in a <code>String</code>.
+	 * </p>
+	 * 
+	 * <p>
+	 * For example, if you have called addEntity(&quot;foo&quot;, 0xA1),
+	 * unescape(&quot;&amp;foo;&quot;) will return &quot;\u00A1&quot;
+	 * </p>
+	 * 
+	 * @param str
+	 *            The <code>String</code> to escape.
+	 * @return A new escaped <code>String</code>.
+	 */
+	public String unescape(String str)
+	{
+		int firstAmp = str.indexOf('&');
+		if (firstAmp < 0)
+		{
+			return str;
+		}
+		else
+		{
+			StringWriter stringWriter = createStringWriter(str);
+			try
+			{
+				doUnescape(stringWriter, str, firstAmp);
+			}
+			catch (IOException e)
+			{
+				// This should never happen because ALL the StringWriter methods called by
+// #escape(Writer, String)
+				// do not throw IOExceptions.
+				throw new RuntimeException(e);
+			}
+			return stringWriter.toString();
+		}
+	}
+
+	/**
+	 * Make the StringWriter 10% larger than the source String to avoid growing the writer
+	 * 
+	 * @param str
+	 *            The source string
+	 * @return A newly created StringWriter
+	 */
+	private StringWriter createStringWriter(String str)
+	{
+		return new StringWriter((int)(str.length() + (str.length() * 0.1)));
+	}
+
+	/**
+	 * <p>
+	 * Unescapes the escaped entities in the <code>String</code> passed and writes the result to the
+	 * <code>Writer</code> passed.
+	 * </p>
+	 * 
+	 * @param writer
+	 *            The <code>Writer</code> to write the results to; assumed to be non-null.
+	 * @param str
+	 *            The source <code>String</code> to unescape; assumed to be non-null.
+	 * @throws IOException
+	 *             when <code>Writer</code> passed throws the exception from calls to the
+	 *             {@link Writer#write(int)} methods.
+	 * 
+	 * @see #escape(String)
+	 * @see Writer
+	 */
+	public void unescape(Writer writer, String str) throws IOException
+	{
+		int firstAmp = str.indexOf('&');
+		if (firstAmp < 0)
+		{
+			writer.write(str);
+			return;
+		}
+		else
+		{
+			doUnescape(writer, str, firstAmp);
+		}
+	}
+
+	/**
+	 * Underlying unescape method that allows the optimisation of not starting from the 0 index
+	 * again.
+	 * 
+	 * @param writer
+	 *            The <code>Writer</code> to write the results to; assumed to be non-null.
+	 * @param str
+	 *            The source <code>String</code> to unescape; assumed to be non-null.
+	 * @param firstAmp
+	 *            The <code>int</code> index of the first ampersand in the source String.
+	 * @throws IOException
+	 *             when <code>Writer</code> passed throws the exception from calls to the
+	 *             {@link Writer#write(int)} methods.
+	 */
+	private void doUnescape(Writer writer, String str, int firstAmp) throws IOException
+	{
+		writer.write(str, 0, firstAmp);
+		int len = str.length();
+		for (int i = firstAmp; i < len; i++)
+		{
+			char c = str.charAt(i);
+			if (c == '&')
+			{
+				int nextIdx = i + 1;
+				int semiColonIdx = str.indexOf(';', nextIdx);
+				if (semiColonIdx == -1)
+				{
+					writer.write(c);
+					continue;
+				}
+				int amphersandIdx = str.indexOf('&', i + 1);
+				if (amphersandIdx != -1 && amphersandIdx < semiColonIdx)
+				{
+					// Then the text looks like &...&...;
+					writer.write(c);
+					continue;
+				}
+				String entityContent = str.substring(nextIdx, semiColonIdx);
+				int entityValue = -1;
+				int entityContentLen = entityContent.length();
+				if (entityContentLen > 0)
+				{
+					if (entityContent.charAt(0) == '#')
+					{ // escaped value content is an integer (decimal or
+						// hexidecimal)
+						if (entityContentLen > 1)
+						{
+							char isHexChar = entityContent.charAt(1);
+							try
+							{
+								switch (isHexChar)
+								{
+									case 'X' :
+									case 'x' : {
+										entityValue = Integer.parseInt(entityContent.substring(2),
+											16);
+										break;
+									}
+									default : {
+										entityValue = Integer.parseInt(entityContent.substring(1),
+											10);
+									}
+								}
+								if (entityValue > 0xFFFF)
+								{
+									entityValue = -1;
+								}
+							}
+							catch (NumberFormatException e)
+							{
+								entityValue = -1;
+							}
+						}
+					}
+					else
+					{ // escaped value content is an entity name
+						entityValue = entityValue(entityContent);
+					}
+				}
+
+				if (entityValue == -1)
+				{
+					writer.write('&');
+					writer.write(entityContent);
+					writer.write(';');
+				}
+				else
+				{
+					writer.write(entityValue);
+				}
+				i = semiColonIdx; // move index up to the semi-colon
+			}
+			else
+			{
+				writer.write(c);
+			}
+		}
+	}
+
+}

Propchange: wicket/trunk/wicket-util/src/main/java/org/apache/wicket/util/string/Entities.java
------------------------------------------------------------------------------
    svn:executable = *

Added: wicket/trunk/wicket-util/src/main/java/org/apache/wicket/util/string/StringEscapeUtils.java
URL: http://svn.apache.org/viewvc/wicket/trunk/wicket-util/src/main/java/org/apache/wicket/util/string/StringEscapeUtils.java?rev=1099592&view=auto
==============================================================================
--- wicket/trunk/wicket-util/src/main/java/org/apache/wicket/util/string/StringEscapeUtils.java (added)
+++ wicket/trunk/wicket-util/src/main/java/org/apache/wicket/util/string/StringEscapeUtils.java Wed May  4 20:24:59 2011
@@ -0,0 +1,428 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.wicket.util.string;
+
+import java.io.IOException;
+import java.io.StringWriter;
+import java.io.Writer;
+import java.util.Locale;
+
+/**
+ * <p>
+ * Escapes and unescapes <code>String</code>s for Java, Java Script, HTML, XML, and SQL.
+ * </p>
+ * 
+ * <p>
+ * #ThreadSafe#
+ * </p>
+ * 
+ * @author Apache Software Foundation
+ * @author Apache Jakarta Turbine
+ * @author Purple Technology
+ * @author <a href="mailto:alex@purpletech.com">Alexander Day Chaffee</a>
+ * @author Antony Riley
+ * @author Helge Tesgaard
+ * @author <a href="sean@boohai.com">Sean Brown</a>
+ * @author <a href="mailto:ggregory@seagullsw.com">Gary Gregory</a>
+ * @author Phil Steitz
+ * @author Pete Gieser
+ * @since 2.0
+ * @version $Id$
+ */
+// Copy from commons-lang ver. 2.6. Non-html/xml methods were removed
+class StringEscapeUtils
+{
+
+	/**
+	 * <p>
+	 * <code>StringEscapeUtils</code> instances should NOT be constructed in standard programming.
+	 * </p>
+	 * 
+	 * <p>
+	 * Instead, the class should be used as:
+	 * 
+	 * <pre>
+	 * StringEscapeUtils.escapeJava(&quot;foo&quot;);
+	 * </pre>
+	 * 
+	 * </p>
+	 * 
+	 * <p>
+	 * This constructor is public to permit tools that require a JavaBean instance to operate.
+	 * </p>
+	 */
+	public StringEscapeUtils()
+	{
+		super();
+	}
+
+	/**
+	 * <p>
+	 * Returns an upper case hexadecimal <code>String</code> for the given character.
+	 * </p>
+	 * 
+	 * @param ch
+	 *            The character to convert.
+	 * @return An upper case hexadecimal <code>String</code>
+	 */
+	private static String hex(char ch)
+	{
+		return Integer.toHexString(ch).toUpperCase(Locale.ENGLISH);
+	}
+
+	// HTML and XML
+	// --------------------------------------------------------------------------
+	/**
+	 * <p>
+	 * Escapes the characters in a <code>String</code> using HTML entities.
+	 * </p>
+	 * 
+	 * <p>
+	 * For example:
+	 * </p>
+	 * <p>
+	 * <code>"bread" & "butter"</code>
+	 * </p>
+	 * becomes:
+	 * <p>
+	 * <code>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</code>.
+	 * </p>
+	 * 
+	 * <p>
+	 * Supports all known HTML 4.0 entities, including funky accents. Note that the commonly used
+	 * apostrophe escape character (&amp;apos;) is not a legal entity and so is not supported).
+	 * </p>
+	 * 
+	 * @param str
+	 *            the <code>String</code> to escape, may be null
+	 * @return a new escaped <code>String</code>, <code>null</code> if null string input
+	 * 
+	 * @see #unescapeHtml(String)
+	 * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO
+	 *      Entities</a>
+	 * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO
+	 *      Latin-1</a>
+	 * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity
+	 *      references</a>
+	 * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character
+	 *      References</a>
+	 * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code
+	 *      positions</a>
+	 */
+	public static String escapeHtml(String str)
+	{
+		if (str == null)
+		{
+			return null;
+		}
+		try
+		{
+			StringWriter writer = new StringWriter((int)(str.length() * 1.5));
+			escapeHtml(writer, str);
+			return writer.toString();
+		}
+		catch (IOException ioe)
+		{
+			// should be impossible
+			throw new RuntimeException(ioe);
+		}
+	}
+
+	/**
+	 * <p>
+	 * Escapes the characters in a <code>String</code> using HTML entities and writes them to a
+	 * <code>Writer</code>.
+	 * </p>
+	 * 
+	 * <p>
+	 * For example:
+	 * </p>
+	 * <code>"bread" & "butter"</code>
+	 * <p>
+	 * becomes:
+	 * </p>
+	 * <code>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</code>.
+	 * 
+	 * <p>
+	 * Supports all known HTML 4.0 entities, including funky accents. Note that the commonly used
+	 * apostrophe escape character (&amp;apos;) is not a legal entity and so is not supported).
+	 * </p>
+	 * 
+	 * @param writer
+	 *            the writer receiving the escaped string, not null
+	 * @param string
+	 *            the <code>String</code> to escape, may be null
+	 * @throws IllegalArgumentException
+	 *             if the writer is null
+	 * @throws IOException
+	 *             when <code>Writer</code> passed throws the exception from calls to the
+	 *             {@link Writer#write(int)} methods.
+	 * 
+	 * @see #escapeHtml(String)
+	 * @see #unescapeHtml(String)
+	 * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO
+	 *      Entities</a>
+	 * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO
+	 *      Latin-1</a>
+	 * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity
+	 *      references</a>
+	 * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character
+	 *      References</a>
+	 * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code
+	 *      positions</a>
+	 */
+	public static void escapeHtml(Writer writer, String string) throws IOException
+	{
+		if (writer == null)
+		{
+			throw new IllegalArgumentException("The Writer must not be null.");
+		}
+		if (string == null)
+		{
+			return;
+		}
+		Entities.HTML40.escape(writer, string);
+	}
+
+	// -----------------------------------------------------------------------
+	/**
+	 * <p>
+	 * Unescapes a string containing entity escapes to a string containing the actual Unicode
+	 * characters corresponding to the escapes. Supports HTML 4.0 entities.
+	 * </p>
+	 * 
+	 * <p>
+	 * For example, the string "&amp;lt;Fran&amp;ccedil;ais&amp;gt;" will become
+	 * "&lt;Fran&ccedil;ais&gt;"
+	 * </p>
+	 * 
+	 * <p>
+	 * If an entity is unrecognized, it is left alone, and inserted verbatim into the result string.
+	 * e.g. "&amp;gt;&amp;zzzz;x" will become "&gt;&amp;zzzz;x".
+	 * </p>
+	 * 
+	 * @param str
+	 *            the <code>String</code> to unescape, may be null
+	 * @return a new unescaped <code>String</code>, <code>null</code> if null string input
+	 * @see #escapeHtml(Writer, String)
+	 */
+	public static String unescapeHtml(String str)
+	{
+		if (str == null)
+		{
+			return null;
+		}
+		try
+		{
+			StringWriter writer = new StringWriter((int)(str.length() * 1.5));
+			unescapeHtml(writer, str);
+			return writer.toString();
+		}
+		catch (IOException ioe)
+		{
+			// should be impossible
+			throw new RuntimeException(ioe);
+		}
+	}
+
+	/**
+	 * <p>
+	 * Unescapes a string containing entity escapes to a string containing the actual Unicode
+	 * characters corresponding to the escapes. Supports HTML 4.0 entities.
+	 * </p>
+	 * 
+	 * <p>
+	 * For example, the string "&amp;lt;Fran&amp;ccedil;ais&amp;gt;" will become
+	 * "&lt;Fran&ccedil;ais&gt;"
+	 * </p>
+	 * 
+	 * <p>
+	 * If an entity is unrecognized, it is left alone, and inserted verbatim into the result string.
+	 * e.g. "&amp;gt;&amp;zzzz;x" will become "&gt;&amp;zzzz;x".
+	 * </p>
+	 * 
+	 * @param writer
+	 *            the writer receiving the unescaped string, not null
+	 * @param string
+	 *            the <code>String</code> to unescape, may be null
+	 * @throws IllegalArgumentException
+	 *             if the writer is null
+	 * @throws IOException
+	 *             if an IOException occurs
+	 * @see #escapeHtml(String)
+	 */
+	public static void unescapeHtml(Writer writer, String string) throws IOException
+	{
+		if (writer == null)
+		{
+			throw new IllegalArgumentException("The Writer must not be null.");
+		}
+		if (string == null)
+		{
+			return;
+		}
+		Entities.HTML40.unescape(writer, string);
+	}
+
+	// -----------------------------------------------------------------------
+	/**
+	 * <p>
+	 * Escapes the characters in a <code>String</code> using XML entities.
+	 * </p>
+	 * 
+	 * <p>
+	 * For example: <tt>"bread" & "butter"</tt> =>
+	 * <tt>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</tt>.
+	 * </p>
+	 * 
+	 * <p>
+	 * Supports only the five basic XML entities (gt, lt, quot, amp, apos). Does not support DTDs or
+	 * external entities.
+	 * </p>
+	 * 
+	 * <p>
+	 * Note that unicode characters greater than 0x7f are currently escaped to their numerical \\u
+	 * equivalent. This may change in future releases.
+	 * </p>
+	 * 
+	 * @param writer
+	 *            the writer receiving the unescaped string, not null
+	 * @param str
+	 *            the <code>String</code> to escape, may be null
+	 * @throws IllegalArgumentException
+	 *             if the writer is null
+	 * @throws IOException
+	 *             if there is a problem writing
+	 * @see #unescapeXml(java.lang.String)
+	 */
+	public static void escapeXml(Writer writer, String str) throws IOException
+	{
+		if (writer == null)
+		{
+			throw new IllegalArgumentException("The Writer must not be null.");
+		}
+		if (str == null)
+		{
+			return;
+		}
+		Entities.XML.escape(writer, str);
+	}
+
+	/**
+	 * <p>
+	 * Escapes the characters in a <code>String</code> using XML entities.
+	 * </p>
+	 * 
+	 * <p>
+	 * For example: <tt>"bread" & "butter"</tt> =>
+	 * <tt>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</tt>.
+	 * </p>
+	 * 
+	 * <p>
+	 * Supports only the five basic XML entities (gt, lt, quot, amp, apos). Does not support DTDs or
+	 * external entities.
+	 * </p>
+	 * 
+	 * <p>
+	 * Note that unicode characters greater than 0x7f are currently escaped to their numerical \\u
+	 * equivalent. This may change in future releases.
+	 * </p>
+	 * 
+	 * @param str
+	 *            the <code>String</code> to escape, may be null
+	 * @return a new escaped <code>String</code>, <code>null</code> if null string input
+	 * @see #unescapeXml(java.lang.String)
+	 */
+	public static String escapeXml(String str)
+	{
+		if (str == null)
+		{
+			return null;
+		}
+		return Entities.XML.escape(str);
+	}
+
+	// -----------------------------------------------------------------------
+	/**
+	 * <p>
+	 * Unescapes a string containing XML entity escapes to a string containing the actual Unicode
+	 * characters corresponding to the escapes.
+	 * </p>
+	 * 
+	 * <p>
+	 * Supports only the five basic XML entities (gt, lt, quot, amp, apos). Does not support DTDs or
+	 * external entities.
+	 * </p>
+	 * 
+	 * <p>
+	 * Note that numerical \\u unicode codes are unescaped to their respective unicode characters.
+	 * This may change in future releases.
+	 * </p>
+	 * 
+	 * @param writer
+	 *            the writer receiving the unescaped string, not null
+	 * @param str
+	 *            the <code>String</code> to unescape, may be null
+	 * @throws IllegalArgumentException
+	 *             if the writer is null
+	 * @throws IOException
+	 *             if there is a problem writing
+	 * @see #escapeXml(String)
+	 */
+	public static void unescapeXml(Writer writer, String str) throws IOException
+	{
+		if (writer == null)
+		{
+			throw new IllegalArgumentException("The Writer must not be null.");
+		}
+		if (str == null)
+		{
+			return;
+		}
+		Entities.XML.unescape(writer, str);
+	}
+
+	/**
+	 * <p>
+	 * Unescapes a string containing XML entity escapes to a string containing the actual Unicode
+	 * characters corresponding to the escapes.
+	 * </p>
+	 * 
+	 * <p>
+	 * Supports only the five basic XML entities (gt, lt, quot, amp, apos). Does not support DTDs or
+	 * external entities.
+	 * </p>
+	 * 
+	 * <p>
+	 * Note that numerical \\u unicode codes are unescaped to their respective unicode characters.
+	 * This may change in future releases.
+	 * </p>
+	 * 
+	 * @param str
+	 *            the <code>String</code> to unescape, may be null
+	 * @return a new unescaped <code>String</code>, <code>null</code> if null string input
+	 * @see #escapeXml(String)
+	 */
+	public static String unescapeXml(String str)
+	{
+		if (str == null)
+		{
+			return null;
+		}
+		return Entities.XML.unescape(str);
+	}
+}

Propchange: wicket/trunk/wicket-util/src/main/java/org/apache/wicket/util/string/StringEscapeUtils.java
------------------------------------------------------------------------------
    svn:executable = *

Modified: wicket/trunk/wicket-util/src/main/java/org/apache/wicket/util/string/Strings.java
URL: http://svn.apache.org/viewvc/wicket/trunk/wicket-util/src/main/java/org/apache/wicket/util/string/Strings.java?rev=1099592&r1=1099591&r2=1099592&view=diff
==============================================================================
--- wicket/trunk/wicket-util/src/main/java/org/apache/wicket/util/string/Strings.java (original)
+++ wicket/trunk/wicket-util/src/main/java/org/apache/wicket/util/string/Strings.java Wed May  4 20:24:59 2011
@@ -375,6 +375,19 @@ public final class Strings
 	}
 
 	/**
+	 * Unescapes the escaped entities in the <code>markup</code> passed.
+	 * 
+	 * @param markup
+	 *            The source <code>String</code> to unescape.
+	 * @return the unescaped markup or <code>null</null> if the input is <code>null</code>
+	 */
+	public static CharSequence unescapeMarkup(final String markup)
+	{
+		String unescapedMarkup = StringEscapeUtils.unescapeHtml(markup);
+		return unescapedMarkup;
+	}
+
+	/**
 	 * Gets the first path component of a path using a given separator. If the separator cannot be
 	 * found, the path itself is returned.
 	 * <p>

Re: svn commit: r1099592 - in /wicket/trunk: wicket-core/src/main/java/org/apache/wicket/markup/parser/ wicket-core/src/test/java/org/apache/wicket/markup/ wicket-util/src/main/java/org/apache/wicket/util/string/

Posted by Martin Grigorov <mg...@apache.org>.

Hi Juergen,

Can you review the part of this changelist related to XmlTag and
TagAttributes when you have time ?
Everything seems to work OK so far.
The only problem is that now TagAttributes logs this warning:
WARNING: Please use component.setMarkupId(String) to change the tag's
'id' attribute.

The stack is:
Thread [qtp513694835-15] (Suspended (breakpoint at line 71 in TagAttributes))	
	TagAttributes.checkIdAttribute(String) line: 71	
	TagAttributes.put(String, Object) line: 60	
	TagAttributes.put(Object, Object) line: 30	
	XmlTag.put(String, CharSequence) line: 405	
	ComponentTag.put(String, CharSequence) line: 481	
	Label(Component).onComponentTag(ComponentTag) line: 3835	
	Label.onComponentTag(ComponentTag) line: 122	
	Label(Component).internalRenderComponent() line: 2534	
	Label(WebComponent).onRender() line: 56	
	Label(Component).internalRender() line: 2394	
	Label(Component).render() line: 2322	
....


On Wed, May 4, 2011 at 10:24 PM,  <mg...@apache.org> wrote:
> Author: mgrigorov
> Date: Wed May  4 20:24:59 2011
> New Revision: 1099592
>
> URL: http://svn.apache.org/viewvc?rev=1099592&view=rev
> Log:
> WICKET-3608 input button escapes escaped value
>
> Unescape html entities while reading them. ComponentTag.writeOutput() will encode them when writing.
> Uses StringEscapeUtils (only html/xml related methods left) and Entities (non-modified) from commons-lang 2.6.
>
>
> Added:
>    wicket/trunk/wicket-core/src/test/java/org/apache/wicket/markup/ComponentTagAttributeEscapingTest.java
>    wicket/trunk/wicket-util/src/main/java/org/apache/wicket/util/string/Entities.java   (with props)
>    wicket/trunk/wicket-util/src/main/java/org/apache/wicket/util/string/StringEscapeUtils.java   (with props)
> Modified:
>    wicket/trunk/wicket-core/src/main/java/org/apache/wicket/markup/parser/TagAttributes.java
>    wicket/trunk/wicket-core/src/main/java/org/apache/wicket/markup/parser/XmlTag.java
>    wicket/trunk/wicket-util/src/main/java/org/apache/wicket/util/string/Strings.java
>
> Modified: wicket/trunk/wicket-core/src/main/java/org/apache/wicket/markup/parser/TagAttributes.java
> URL: http://svn.apache.org/viewvc/wicket/trunk/wicket-core/src/main/java/org/apache/wicket/markup/parser/TagAttributes.java?rev=1099592&r1=1099591&r2=1099592&view=diff
> ==============================================================================
> --- wicket/trunk/wicket-core/src/main/java/org/apache/wicket/markup/parser/TagAttributes.java (original)
> +++ wicket/trunk/wicket-core/src/main/java/org/apache/wicket/markup/parser/TagAttributes.java Wed May  4 20:24:59 2011
> @@ -18,6 +18,7 @@ package org.apache.wicket.markup.parser;
>
>  import java.util.Map;
>
> +import org.apache.wicket.util.string.Strings;
>  import org.apache.wicket.util.value.IValueMap;
>  import org.apache.wicket.util.value.ValueMap;
>  import org.slf4j.Logger;
> @@ -57,7 +58,7 @@ public class TagAttributes extends Value
>        public final Object put(String key, Object value)
>        {
>                checkIdAttribute(key);
> -               return super.put(key, value);
> +               return putInternal(key, value);
>        }
>
>        /**
> @@ -81,7 +82,7 @@ public class TagAttributes extends Value
>         */
>        public final Object putInternal(String key, Object value)
>        {
> -               return super.put(key, value);
> +               return super.put(key, unescapeHtml(value));
>        }
>
>        @Override
> @@ -95,4 +96,24 @@ public class TagAttributes extends Value
>
>                super.putAll(map);
>        }
> +
> +       /**
> +        * Unescapes the HTML entities from the <code>value</code> if it is a {@link CharSequence} and
> +        * there are any
> +        *
> +        * @param value
> +        *            the attribute value
> +        * @return the HTML unescaped value or the non-modified input
> +        */
> +       private static final Object unescapeHtml(Object value)
> +       {
> +               if (value instanceof CharSequence)
> +               {
> +                       return Strings.unescapeMarkup(value.toString());
> +               }
> +               else
> +               {
> +                       return value;
> +               }
> +       }
>  }
> \ No newline at end of file
>
> Modified: wicket/trunk/wicket-core/src/main/java/org/apache/wicket/markup/parser/XmlTag.java
> URL: http://svn.apache.org/viewvc/wicket/trunk/wicket-core/src/main/java/org/apache/wicket/markup/parser/XmlTag.java?rev=1099592&r1=1099591&r2=1099592&view=diff
> ==============================================================================
> --- wicket/trunk/wicket-core/src/main/java/org/apache/wicket/markup/parser/XmlTag.java (original)
> +++ wicket/trunk/wicket-core/src/main/java/org/apache/wicket/markup/parser/XmlTag.java Wed May  4 20:24:59 2011
> @@ -25,7 +25,6 @@ import org.apache.wicket.util.string.App
>  import org.apache.wicket.util.string.StringValue;
>  import org.apache.wicket.util.string.Strings;
>  import org.apache.wicket.util.value.IValueMap;
> -import org.apache.wicket.util.value.ValueMap;
>  import org.slf4j.Logger;
>  import org.slf4j.LoggerFactory;
>
> @@ -354,7 +353,7 @@ public class XmlTag
>                dest.copyOf = copyOf;
>                if (attributes != null)
>                {
> -                       dest.attributes = new ValueMap(attributes);
> +                       dest.attributes = new TagAttributes(attributes);
>                }
>        }
>
>
> Added: wicket/trunk/wicket-core/src/test/java/org/apache/wicket/markup/ComponentTagAttributeEscapingTest.java
> URL: http://svn.apache.org/viewvc/wicket/trunk/wicket-core/src/test/java/org/apache/wicket/markup/ComponentTagAttributeEscapingTest.java?rev=1099592&view=auto
> ==============================================================================
> --- wicket/trunk/wicket-core/src/test/java/org/apache/wicket/markup/ComponentTagAttributeEscapingTest.java (added)
> +++ wicket/trunk/wicket-core/src/test/java/org/apache/wicket/markup/ComponentTagAttributeEscapingTest.java Wed May  4 20:24:59 2011
> @@ -0,0 +1,98 @@
> +/*
> + * Licensed to the Apache Software Foundation (ASF) under one or more
> + * contributor license agreements.  See the NOTICE file distributed with
> + * this work for additional information regarding copyright ownership.
> + * The ASF licenses this file to You under the Apache License, Version 2.0
> + * (the "License"); you may not use this file except in compliance with
> + * the License.  You may obtain a copy of the License at
> + *
> + *      http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +package org.apache.wicket.markup;
> +
> +import junit.framework.TestCase;
> +
> +import org.apache.wicket.MarkupContainer;
> +import org.apache.wicket.markup.html.WebPage;
> +import org.apache.wicket.markup.html.form.Button;
> +import org.apache.wicket.markup.html.link.Link;
> +import org.apache.wicket.util.resource.IResourceStream;
> +import org.apache.wicket.util.resource.StringResourceStream;
> +import org.apache.wicket.util.tester.WicketTester;
> +
> +/**
> + * @author Pedro Santos
> + */
> +public class ComponentTagAttributeEscapingTest extends TestCase
> +{
> +
> +       /**
> +        * @throws Exception
> +        */
> +       public void testComponentAttributesNotDoubleEscaped() throws Exception
> +       {
> +               WicketTester tester = new WicketTester();
> +               tester.startPage(ButtonValuePage.class);
> +               String response = tester.getLastResponseAsString();
> +               System.out.println(response);
> +               assertTrue("One of the pound entity representations is missing: &pound; or &#163;",
> +                       response.contains("Â£Â£"));
> +               assertTrue("must not be double escaped", response.contains("Watch escaped value: &gt;&gt;"));
> +               assertTrue("following the last assert logic, this one would true",
> +                       response.contains("alerting: &amp;"));
> +               assertTrue("not double escape manually add attributes",
> +                       response.contains("some_attribute=\"&amp;\""));
> +       }
> +
> +       /**
> +        * Just two distinct components with escaped characters in markup attribute.
> +        * */
> +       public static class ButtonValuePage extends WebPage implements IMarkupResourceStreamProvider
> +       {
> +               /** */
> +               private static final long serialVersionUID = 1L;
> +
> +               /**
> +                * Construct.
> +                */
> +               public ButtonValuePage()
> +               {
> +                       add(new Button("button"));
> +                       add(new Link<Void>("link")
> +                       {
> +                               /** */
> +                               private static final long serialVersionUID = 1L;
> +
> +                               @Override
> +                               public void onClick()
> +                               {
> +                               }
> +
> +                               @Override
> +                               protected void onComponentTag(ComponentTag tag)
> +                               {
> +                                       super.onComponentTag(tag);
> +                                       tag.put("some_attribute", "&amp;");
> +                               }
> +                       });
> +               }
> +
> +               public IResourceStream getMarkupResourceStream(MarkupContainer container,
> +                       Class<?> containerClass)
> +               {
> +                       return new StringResourceStream(
> +                               "<html>"//
> +                                       + "<body>"//
> +                                       + "<a wicket:id=\"link\" onclick=\"alert('alerting: &amp; &pound;&#163; ')\">link</a>"//
> +                                       + "<input type=\"submit\" wicket:id=\"button\" value=\"Watch escaped value: &gt;&gt;\"/>"//
> +                                       + "</body>" + //
> +                                       "</html>");
> +               }
> +       }
> +}
> \ No newline at end of file
>
> Added: wicket/trunk/wicket-util/src/main/java/org/apache/wicket/util/string/Entities.java
> URL: http://svn.apache.org/viewvc/wicket/trunk/wicket-util/src/main/java/org/apache/wicket/util/string/Entities.java?rev=1099592&view=auto
> ==============================================================================
> --- wicket/trunk/wicket-util/src/main/java/org/apache/wicket/util/string/Entities.java (added)
> +++ wicket/trunk/wicket-util/src/main/java/org/apache/wicket/util/string/Entities.java Wed May  4 20:24:59 2011
> @@ -0,0 +1,1151 @@
> +/*
> + * Licensed to the Apache Software Foundation (ASF) under one or more
> + * contributor license agreements.  See the NOTICE file distributed with
> + * this work for additional information regarding copyright ownership.
> + * The ASF licenses this file to You under the Apache License, Version 2.0
> + * (the "License"); you may not use this file except in compliance with
> + * the License.  You may obtain a copy of the License at
> + *
> + *      http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +package org.apache.wicket.util.string;
> +
> +import java.io.IOException;
> +import java.io.StringWriter;
> +import java.io.Writer;
> +import java.util.HashMap;
> +import java.util.Map;
> +import java.util.TreeMap;
> +
> +import org.apache.wicket.util.collections.IntHashMap;
> +
> +/**
> + * <p>
> + * Provides HTML and XML entity utilities.
> + * </p>
> + *
> + * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
> + * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO
> + *      Latin-1</a>
> + * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity
> + *      references</a>
> + * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
> + * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code
> + *      positions</a>
> + *
> + * @author <a href="mailto:alex@purpletech.com">Alexander Day Chaffee</a>
> + * @author <a href="mailto:ggregory@seagullsw.com">Gary Gregory</a>
> + * @since 2.0
> + * @version $Id$
> + */
> +// Copy from commons-lang ver. 2.6. Non-modified.
> +class Entities
> +{
> +
> +       private static final String[][] BASIC_ARRAY = { { "quot", "34" }, // " - double-quote
> +                       { "amp", "38" }, // & - ampersand
> +                       { "lt", "60" }, // < - less-than
> +                       { "gt", "62" }, // > - greater-than
> +       };
> +
> +       private static final String[][] APOS_ARRAY = { { "apos", "39" }, // XML apostrophe
> +       };
> +
> +       // package scoped for testing
> +       static final String[][] ISO8859_1_ARRAY = { { "nbsp", "160" }, // non-breaking space
> +                       { "iexcl", "161" }, // inverted exclamation mark
> +                       { "cent", "162" }, // cent sign
> +                       { "pound", "163" }, // pound sign
> +                       { "curren", "164" }, // currency sign
> +                       { "yen", "165" }, // yen sign = yuan sign
> +                       { "brvbar", "166" }, // broken bar = broken vertical bar
> +                       { "sect", "167" }, // section sign
> +                       { "uml", "168" }, // diaeresis = spacing diaeresis
> +                       { "copy", "169" }, // ï¿½ - copyright sign
> +                       { "ordf", "170" }, // feminine ordinal indicator
> +                       { "laquo", "171" }, // left-pointing double angle quotation mark = left pointing
> +// guillemet
> +                       { "not", "172" }, // not sign
> +                       { "shy", "173" }, // soft hyphen = discretionary hyphen
> +                       { "reg", "174" }, // ï¿½ - registered trademark sign
> +                       { "macr", "175" }, // macron = spacing macron = overline = APL overbar
> +                       { "deg", "176" }, // degree sign
> +                       { "plusmn", "177" }, // plus-minus sign = plus-or-minus sign
> +                       { "sup2", "178" }, // superscript two = superscript digit two = squared
> +                       { "sup3", "179" }, // superscript three = superscript digit three = cubed
> +                       { "acute", "180" }, // acute accent = spacing acute
> +                       { "micro", "181" }, // micro sign
> +                       { "para", "182" }, // pilcrow sign = paragraph sign
> +                       { "middot", "183" }, // middle dot = Georgian comma = Greek middle dot
> +                       { "cedil", "184" }, // cedilla = spacing cedilla
> +                       { "sup1", "185" }, // superscript one = superscript digit one
> +                       { "ordm", "186" }, // masculine ordinal indicator
> +                       { "raquo", "187" }, // right-pointing double angle quotation mark = right pointing
> +// guillemet
> +                       { "frac14", "188" }, // vulgar fraction one quarter = fraction one quarter
> +                       { "frac12", "189" }, // vulgar fraction one half = fraction one half
> +                       { "frac34", "190" }, // vulgar fraction three quarters = fraction three quarters
> +                       { "iquest", "191" }, // inverted question mark = turned question mark
> +                       { "Agrave", "192" }, // ï¿½ - uppercase A, grave accent
> +                       { "Aacute", "193" }, // ï¿½ - uppercase A, acute accent
> +                       { "Acirc", "194" }, // ï¿½ - uppercase A, circumflex accent
> +                       { "Atilde", "195" }, // ï¿½ - uppercase A, tilde
> +                       { "Auml", "196" }, // ï¿½ - uppercase A, umlaut
> +                       { "Aring", "197" }, // ï¿½ - uppercase A, ring
> +                       { "AElig", "198" }, // ï¿½ - uppercase AE
> +                       { "Ccedil", "199" }, // ï¿½ - uppercase C, cedilla
> +                       { "Egrave", "200" }, // ï¿½ - uppercase E, grave accent
> +                       { "Eacute", "201" }, // ï¿½ - uppercase E, acute accent
> +                       { "Ecirc", "202" }, // ï¿½ - uppercase E, circumflex accent
> +                       { "Euml", "203" }, // ï¿½ - uppercase E, umlaut
> +                       { "Igrave", "204" }, // ï¿½ - uppercase I, grave accent
> +                       { "Iacute", "205" }, // ï¿½ - uppercase I, acute accent
> +                       { "Icirc", "206" }, // ï¿½ - uppercase I, circumflex accent
> +                       { "Iuml", "207" }, // ï¿½ - uppercase I, umlaut
> +                       { "ETH", "208" }, // ï¿½ - uppercase Eth, Icelandic
> +                       { "Ntilde", "209" }, // ï¿½ - uppercase N, tilde
> +                       { "Ograve", "210" }, // ï¿½ - uppercase O, grave accent
> +                       { "Oacute", "211" }, // ï¿½ - uppercase O, acute accent
> +                       { "Ocirc", "212" }, // ï¿½ - uppercase O, circumflex accent
> +                       { "Otilde", "213" }, // ï¿½ - uppercase O, tilde
> +                       { "Ouml", "214" }, // ï¿½ - uppercase O, umlaut
> +                       { "times", "215" }, // multiplication sign
> +                       { "Oslash", "216" }, // ï¿½ - uppercase O, slash
> +                       { "Ugrave", "217" }, // ï¿½ - uppercase U, grave accent
> +                       { "Uacute", "218" }, // ï¿½ - uppercase U, acute accent
> +                       { "Ucirc", "219" }, // ï¿½ - uppercase U, circumflex accent
> +                       { "Uuml", "220" }, // ï¿½ - uppercase U, umlaut
> +                       { "Yacute", "221" }, // ï¿½ - uppercase Y, acute accent
> +                       { "THORN", "222" }, // ï¿½ - uppercase THORN, Icelandic
> +                       { "szlig", "223" }, // ï¿½ - lowercase sharps, German
> +                       { "agrave", "224" }, // ï¿½ - lowercase a, grave accent
> +                       { "aacute", "225" }, // ï¿½ - lowercase a, acute accent
> +                       { "acirc", "226" }, // ï¿½ - lowercase a, circumflex accent
> +                       { "atilde", "227" }, // ï¿½ - lowercase a, tilde
> +                       { "auml", "228" }, // ï¿½ - lowercase a, umlaut
> +                       { "aring", "229" }, // ï¿½ - lowercase a, ring
> +                       { "aelig", "230" }, // ï¿½ - lowercase ae
> +                       { "ccedil", "231" }, // ï¿½ - lowercase c, cedilla
> +                       { "egrave", "232" }, // ï¿½ - lowercase e, grave accent
> +                       { "eacute", "233" }, // ï¿½ - lowercase e, acute accent
> +                       { "ecirc", "234" }, // ï¿½ - lowercase e, circumflex accent
> +                       { "euml", "235" }, // ï¿½ - lowercase e, umlaut
> +                       { "igrave", "236" }, // ï¿½ - lowercase i, grave accent
> +                       { "iacute", "237" }, // ï¿½ - lowercase i, acute accent
> +                       { "icirc", "238" }, // ï¿½ - lowercase i, circumflex accent
> +                       { "iuml", "239" }, // ï¿½ - lowercase i, umlaut
> +                       { "eth", "240" }, // ï¿½ - lowercase eth, Icelandic
> +                       { "ntilde", "241" }, // ï¿½ - lowercase n, tilde
> +                       { "ograve", "242" }, // ï¿½ - lowercase o, grave accent
> +                       { "oacute", "243" }, // ï¿½ - lowercase o, acute accent
> +                       { "ocirc", "244" }, // ï¿½ - lowercase o, circumflex accent
> +                       { "otilde", "245" }, // ï¿½ - lowercase o, tilde
> +                       { "ouml", "246" }, // ï¿½ - lowercase o, umlaut
> +                       { "divide", "247" }, // division sign
> +                       { "oslash", "248" }, // ï¿½ - lowercase o, slash
> +                       { "ugrave", "249" }, // ï¿½ - lowercase u, grave accent
> +                       { "uacute", "250" }, // ï¿½ - lowercase u, acute accent
> +                       { "ucirc", "251" }, // ï¿½ - lowercase u, circumflex accent
> +                       { "uuml", "252" }, // ï¿½ - lowercase u, umlaut
> +                       { "yacute", "253" }, // ï¿½ - lowercase y, acute accent
> +                       { "thorn", "254" }, // ï¿½ - lowercase thorn, Icelandic
> +                       { "yuml", "255" }, // ï¿½ - lowercase y, umlaut
> +       };
> +
> +       // http://www.w3.org/TR/REC-html40/sgml/entities.html
> +       // package scoped for testing
> +       static final String[][] HTML40_ARRAY = {
> +                       // <!-- Latin Extended-B -->
> +                       { "fnof", "402" }, // latin small f with hook = function= florin, U+0192 ISOtech -->
> +                       // <!-- Greek -->
> +                       { "Alpha", "913" }, // greek capital letter alpha, U+0391 -->
> +                       { "Beta", "914" }, // greek capital letter beta, U+0392 -->
> +                       { "Gamma", "915" }, // greek capital letter gamma,U+0393 ISOgrk3 -->
> +                       { "Delta", "916" }, // greek capital letter delta,U+0394 ISOgrk3 -->
> +                       { "Epsilon", "917" }, // greek capital letter epsilon, U+0395 -->
> +                       { "Zeta", "918" }, // greek capital letter zeta, U+0396 -->
> +                       { "Eta", "919" }, // greek capital letter eta, U+0397 -->
> +                       { "Theta", "920" }, // greek capital letter theta,U+0398 ISOgrk3 -->
> +                       { "Iota", "921" }, // greek capital letter iota, U+0399 -->
> +                       { "Kappa", "922" }, // greek capital letter kappa, U+039A -->
> +                       { "Lambda", "923" }, // greek capital letter lambda,U+039B ISOgrk3 -->
> +                       { "Mu", "924" }, // greek capital letter mu, U+039C -->
> +                       { "Nu", "925" }, // greek capital letter nu, U+039D -->
> +                       { "Xi", "926" }, // greek capital letter xi, U+039E ISOgrk3 -->
> +                       { "Omicron", "927" }, // greek capital letter omicron, U+039F -->
> +                       { "Pi", "928" }, // greek capital letter pi, U+03A0 ISOgrk3 -->
> +                       { "Rho", "929" }, // greek capital letter rho, U+03A1 -->
> +                       // <!-- there is no Sigmaf, and no U+03A2 character either -->
> +                       { "Sigma", "931" }, // greek capital letter sigma,U+03A3 ISOgrk3 -->
> +                       { "Tau", "932" }, // greek capital letter tau, U+03A4 -->
> +                       { "Upsilon", "933" }, // greek capital letter upsilon,U+03A5 ISOgrk3 -->
> +                       { "Phi", "934" }, // greek capital letter phi,U+03A6 ISOgrk3 -->
> +                       { "Chi", "935" }, // greek capital letter chi, U+03A7 -->
> +                       { "Psi", "936" }, // greek capital letter psi,U+03A8 ISOgrk3 -->
> +                       { "Omega", "937" }, // greek capital letter omega,U+03A9 ISOgrk3 -->
> +                       { "alpha", "945" }, // greek small letter alpha,U+03B1 ISOgrk3 -->
> +                       { "beta", "946" }, // greek small letter beta, U+03B2 ISOgrk3 -->
> +                       { "gamma", "947" }, // greek small letter gamma,U+03B3 ISOgrk3 -->
> +                       { "delta", "948" }, // greek small letter delta,U+03B4 ISOgrk3 -->
> +                       { "epsilon", "949" }, // greek small letter epsilon,U+03B5 ISOgrk3 -->
> +                       { "zeta", "950" }, // greek small letter zeta, U+03B6 ISOgrk3 -->
> +                       { "eta", "951" }, // greek small letter eta, U+03B7 ISOgrk3 -->
> +                       { "theta", "952" }, // greek small letter theta,U+03B8 ISOgrk3 -->
> +                       { "iota", "953" }, // greek small letter iota, U+03B9 ISOgrk3 -->
> +                       { "kappa", "954" }, // greek small letter kappa,U+03BA ISOgrk3 -->
> +                       { "lambda", "955" }, // greek small letter lambda,U+03BB ISOgrk3 -->
> +                       { "mu", "956" }, // greek small letter mu, U+03BC ISOgrk3 -->
> +                       { "nu", "957" }, // greek small letter nu, U+03BD ISOgrk3 -->
> +                       { "xi", "958" }, // greek small letter xi, U+03BE ISOgrk3 -->
> +                       { "omicron", "959" }, // greek small letter omicron, U+03BF NEW -->
> +                       { "pi", "960" }, // greek small letter pi, U+03C0 ISOgrk3 -->
> +                       { "rho", "961" }, // greek small letter rho, U+03C1 ISOgrk3 -->
> +                       { "sigmaf", "962" }, // greek small letter final sigma,U+03C2 ISOgrk3 -->
> +                       { "sigma", "963" }, // greek small letter sigma,U+03C3 ISOgrk3 -->
> +                       { "tau", "964" }, // greek small letter tau, U+03C4 ISOgrk3 -->
> +                       { "upsilon", "965" }, // greek small letter upsilon,U+03C5 ISOgrk3 -->
> +                       { "phi", "966" }, // greek small letter phi, U+03C6 ISOgrk3 -->
> +                       { "chi", "967" }, // greek small letter chi, U+03C7 ISOgrk3 -->
> +                       { "psi", "968" }, // greek small letter psi, U+03C8 ISOgrk3 -->
> +                       { "omega", "969" }, // greek small letter omega,U+03C9 ISOgrk3 -->
> +                       { "thetasym", "977" }, // greek small letter theta symbol,U+03D1 NEW -->
> +                       { "upsih", "978" }, // greek upsilon with hook symbol,U+03D2 NEW -->
> +                       { "piv", "982" }, // greek pi symbol, U+03D6 ISOgrk3 -->
> +                       // <!-- General Punctuation -->
> +                       { "bull", "8226" }, // bullet = black small circle,U+2022 ISOpub -->
> +                       // <!-- bullet is NOT the same as bullet operator, U+2219 -->
> +                       { "hellip", "8230" }, // horizontal ellipsis = three dot leader,U+2026 ISOpub -->
> +                       { "prime", "8242" }, // prime = minutes = feet, U+2032 ISOtech -->
> +                       { "Prime", "8243" }, // double prime = seconds = inches,U+2033 ISOtech -->
> +                       { "oline", "8254" }, // overline = spacing overscore,U+203E NEW -->
> +                       { "frasl", "8260" }, // fraction slash, U+2044 NEW -->
> +                       // <!-- Letterlike Symbols -->
> +                       { "weierp", "8472" }, // script capital P = power set= Weierstrass p, U+2118 ISOamso -->
> +                       { "image", "8465" }, // blackletter capital I = imaginary part,U+2111 ISOamso -->
> +                       { "real", "8476" }, // blackletter capital R = real part symbol,U+211C ISOamso -->
> +                       { "trade", "8482" }, // trade mark sign, U+2122 ISOnum -->
> +                       { "alefsym", "8501" }, // alef symbol = first transfinite cardinal,U+2135 NEW -->
> +                       // <!-- alef symbol is NOT the same as hebrew letter alef,U+05D0 although the
> +                       // same glyph could be used to depict both characters -->
> +                       // <!-- Arrows -->
> +                       { "larr", "8592" }, // leftwards arrow, U+2190 ISOnum -->
> +                       { "uarr", "8593" }, // upwards arrow, U+2191 ISOnum-->
> +                       { "rarr", "8594" }, // rightwards arrow, U+2192 ISOnum -->
> +                       { "darr", "8595" }, // downwards arrow, U+2193 ISOnum -->
> +                       { "harr", "8596" }, // left right arrow, U+2194 ISOamsa -->
> +                       { "crarr", "8629" }, // downwards arrow with corner leftwards= carriage return, U+21B5
> +// NEW -->
> +                       { "lArr", "8656" }, // leftwards double arrow, U+21D0 ISOtech -->
> +                       // <!-- ISO 10646 does not say that lArr is the same as the 'is implied by'
> +                       // arrow but also does not have any other character for that function.
> +                       // So ? lArr canbe used for 'is implied by' as ISOtech suggests -->
> +                       { "uArr", "8657" }, // upwards double arrow, U+21D1 ISOamsa -->
> +                       { "rArr", "8658" }, // rightwards double arrow,U+21D2 ISOtech -->
> +                       // <!-- ISO 10646 does not say this is the 'implies' character but does not
> +                       // have another character with this function so ?rArr can be used for
> +                       // 'implies' as ISOtech suggests -->
> +                       { "dArr", "8659" }, // downwards double arrow, U+21D3 ISOamsa -->
> +                       { "hArr", "8660" }, // left right double arrow,U+21D4 ISOamsa -->
> +                       // <!-- Mathematical Operators -->
> +                       { "forall", "8704" }, // for all, U+2200 ISOtech -->
> +                       { "part", "8706" }, // partial differential, U+2202 ISOtech -->
> +                       { "exist", "8707" }, // there exists, U+2203 ISOtech -->
> +                       { "empty", "8709" }, // empty set = null set = diameter,U+2205 ISOamso -->
> +                       { "nabla", "8711" }, // nabla = backward difference,U+2207 ISOtech -->
> +                       { "isin", "8712" }, // element of, U+2208 ISOtech -->
> +                       { "notin", "8713" }, // not an element of, U+2209 ISOtech -->
> +                       { "ni", "8715" }, // contains as member, U+220B ISOtech -->
> +                       // <!-- should there be a more memorable name than 'ni'? -->
> +                       { "prod", "8719" }, // n-ary product = product sign,U+220F ISOamsb -->
> +                       // <!-- prod is NOT the same character as U+03A0 'greek capital letter pi'
> +                       // though the same glyph might be used for both -->
> +                       { "sum", "8721" }, // n-ary summation, U+2211 ISOamsb -->
> +                       // <!-- sum is NOT the same character as U+03A3 'greek capital letter sigma'
> +                       // though the same glyph might be used for both -->
> +                       { "minus", "8722" }, // minus sign, U+2212 ISOtech -->
> +                       { "lowast", "8727" }, // asterisk operator, U+2217 ISOtech -->
> +                       { "radic", "8730" }, // square root = radical sign,U+221A ISOtech -->
> +                       { "prop", "8733" }, // proportional to, U+221D ISOtech -->
> +                       { "infin", "8734" }, // infinity, U+221E ISOtech -->
> +                       { "ang", "8736" }, // angle, U+2220 ISOamso -->
> +                       { "and", "8743" }, // logical and = wedge, U+2227 ISOtech -->
> +                       { "or", "8744" }, // logical or = vee, U+2228 ISOtech -->
> +                       { "cap", "8745" }, // intersection = cap, U+2229 ISOtech -->
> +                       { "cup", "8746" }, // union = cup, U+222A ISOtech -->
> +                       { "int", "8747" }, // integral, U+222B ISOtech -->
> +                       { "there4", "8756" }, // therefore, U+2234 ISOtech -->
> +                       { "sim", "8764" }, // tilde operator = varies with = similar to,U+223C ISOtech -->
> +                       // <!-- tilde operator is NOT the same character as the tilde, U+007E,although
> +                       // the same glyph might be used to represent both -->
> +                       { "cong", "8773" }, // approximately equal to, U+2245 ISOtech -->
> +                       { "asymp", "8776" }, // almost equal to = asymptotic to,U+2248 ISOamsr -->
> +                       { "ne", "8800" }, // not equal to, U+2260 ISOtech -->
> +                       { "equiv", "8801" }, // identical to, U+2261 ISOtech -->
> +                       { "le", "8804" }, // less-than or equal to, U+2264 ISOtech -->
> +                       { "ge", "8805" }, // greater-than or equal to,U+2265 ISOtech -->
> +                       { "sub", "8834" }, // subset of, U+2282 ISOtech -->
> +                       { "sup", "8835" }, // superset of, U+2283 ISOtech -->
> +                       // <!-- note that nsup, 'not a superset of, U+2283' is not covered by the
> +                       // Symbol font encoding and is not included. Should it be, for symmetry?
> +                       // It is in ISOamsn --> <!ENTITY nsub", "8836"},
> +                       // not a subset of, U+2284 ISOamsn -->
> +                       { "sube", "8838" }, // subset of or equal to, U+2286 ISOtech -->
> +                       { "supe", "8839" }, // superset of or equal to,U+2287 ISOtech -->
> +                       { "oplus", "8853" }, // circled plus = direct sum,U+2295 ISOamsb -->
> +                       { "otimes", "8855" }, // circled times = vector product,U+2297 ISOamsb -->
> +                       { "perp", "8869" }, // up tack = orthogonal to = perpendicular,U+22A5 ISOtech -->
> +                       { "sdot", "8901" }, // dot operator, U+22C5 ISOamsb -->
> +                       // <!-- dot operator is NOT the same character as U+00B7 middle dot -->
> +                       // <!-- Miscellaneous Technical -->
> +                       { "lceil", "8968" }, // left ceiling = apl upstile,U+2308 ISOamsc -->
> +                       { "rceil", "8969" }, // right ceiling, U+2309 ISOamsc -->
> +                       { "lfloor", "8970" }, // left floor = apl downstile,U+230A ISOamsc -->
> +                       { "rfloor", "8971" }, // right floor, U+230B ISOamsc -->
> +                       { "lang", "9001" }, // left-pointing angle bracket = bra,U+2329 ISOtech -->
> +                       // <!-- lang is NOT the same character as U+003C 'less than' or U+2039 'single
> +// left-pointing angle quotation
> +                       // mark' -->
> +                       { "rang", "9002" }, // right-pointing angle bracket = ket,U+232A ISOtech -->
> +                       // <!-- rang is NOT the same character as U+003E 'greater than' or U+203A
> +                       // 'single right-pointing angle quotation mark' -->
> +                       // <!-- Geometric Shapes -->
> +                       { "loz", "9674" }, // lozenge, U+25CA ISOpub -->
> +                       // <!-- Miscellaneous Symbols -->
> +                       { "spades", "9824" }, // black spade suit, U+2660 ISOpub -->
> +                       // <!-- black here seems to mean filled as opposed to hollow -->
> +                       { "clubs", "9827" }, // black club suit = shamrock,U+2663 ISOpub -->
> +                       { "hearts", "9829" }, // black heart suit = valentine,U+2665 ISOpub -->
> +                       { "diams", "9830" }, // black diamond suit, U+2666 ISOpub -->
> +
> +                       // <!-- Latin Extended-A -->
> +                       { "OElig", "338" }, // -- latin capital ligature OE,U+0152 ISOlat2 -->
> +                       { "oelig", "339" }, // -- latin small ligature oe, U+0153 ISOlat2 -->
> +                       // <!-- ligature is a misnomer, this is a separate character in some languages -->
> +                       { "Scaron", "352" }, // -- latin capital letter S with caron,U+0160 ISOlat2 -->
> +                       { "scaron", "353" }, // -- latin small letter s with caron,U+0161 ISOlat2 -->
> +                       { "Yuml", "376" }, // -- latin capital letter Y with diaeresis,U+0178 ISOlat2 -->
> +                       // <!-- Spacing Modifier Letters -->
> +                       { "circ", "710" }, // -- modifier letter circumflex accent,U+02C6 ISOpub -->
> +                       { "tilde", "732" }, // small tilde, U+02DC ISOdia -->
> +                       // <!-- General Punctuation -->
> +                       { "ensp", "8194" }, // en space, U+2002 ISOpub -->
> +                       { "emsp", "8195" }, // em space, U+2003 ISOpub -->
> +                       { "thinsp", "8201" }, // thin space, U+2009 ISOpub -->
> +                       { "zwnj", "8204" }, // zero width non-joiner,U+200C NEW RFC 2070 -->
> +                       { "zwj", "8205" }, // zero width joiner, U+200D NEW RFC 2070 -->
> +                       { "lrm", "8206" }, // left-to-right mark, U+200E NEW RFC 2070 -->
> +                       { "rlm", "8207" }, // right-to-left mark, U+200F NEW RFC 2070 -->
> +                       { "ndash", "8211" }, // en dash, U+2013 ISOpub -->
> +                       { "mdash", "8212" }, // em dash, U+2014 ISOpub -->
> +                       { "lsquo", "8216" }, // left single quotation mark,U+2018 ISOnum -->
> +                       { "rsquo", "8217" }, // right single quotation mark,U+2019 ISOnum -->
> +                       { "sbquo", "8218" }, // single low-9 quotation mark, U+201A NEW -->
> +                       { "ldquo", "8220" }, // left double quotation mark,U+201C ISOnum -->
> +                       { "rdquo", "8221" }, // right double quotation mark,U+201D ISOnum -->
> +                       { "bdquo", "8222" }, // double low-9 quotation mark, U+201E NEW -->
> +                       { "dagger", "8224" }, // dagger, U+2020 ISOpub -->
> +                       { "Dagger", "8225" }, // double dagger, U+2021 ISOpub -->
> +                       { "permil", "8240" }, // per mille sign, U+2030 ISOtech -->
> +                       { "lsaquo", "8249" }, // single left-pointing angle quotation mark,U+2039 ISO proposed
> +// -->
> +                       // <!-- lsaquo is proposed but not yet ISO standardized -->
> +                       { "rsaquo", "8250" }, // single right-pointing angle quotation mark,U+203A ISO proposed
> +// -->
> +                       // <!-- rsaquo is proposed but not yet ISO standardized -->
> +                       { "euro", "8364" }, // -- euro sign, U+20AC NEW -->
> +       };
> +
> +       /**
> +        * <p>
> +        * The set of entities supported by standard XML.
> +        * </p>
> +        */
> +       public static final Entities XML;
> +
> +       /**
> +        * <p>
> +        * The set of entities supported by HTML 3.2.
> +        * </p>
> +        */
> +       public static final Entities HTML32;
> +
> +       /**
> +        * <p>
> +        * The set of entities supported by HTML 4.0.
> +        * </p>
> +        */
> +       public static final Entities HTML40;
> +
> +       static
> +       {
> +               Entities xml = new Entities();
> +               xml.addEntities(BASIC_ARRAY);
> +               xml.addEntities(APOS_ARRAY);
> +               XML = xml;
> +       }
> +
> +       static
> +       {
> +               Entities html32 = new Entities();
> +               html32.addEntities(BASIC_ARRAY);
> +               html32.addEntities(ISO8859_1_ARRAY);
> +               HTML32 = html32;
> +       }
> +
> +       static
> +       {
> +               Entities html40 = new Entities();
> +               fillWithHtml40Entities(html40);
> +               HTML40 = html40;
> +       }
> +
> +       /**
> +        * <p>
> +        * Fills the specified entities instance with HTML 40 entities.
> +        * </p>
> +        *
> +        * @param entities
> +        *            the instance to be filled.
> +        */
> +       static void fillWithHtml40Entities(Entities entities)
> +       {
> +               entities.addEntities(BASIC_ARRAY);
> +               entities.addEntities(ISO8859_1_ARRAY);
> +               entities.addEntities(HTML40_ARRAY);
> +       }
> +
> +       static interface EntityMap
> +       {
> +               /**
> +                * <p>
> +                * Add an entry to this entity map.
> +                * </p>
> +                *
> +                * @param name
> +                *            the entity name
> +                * @param value
> +                *            the entity value
> +                */
> +               void add(String name, int value);
> +
> +               /**
> +                * <p>
> +                * Returns the name of the entity identified by the specified value.
> +                * </p>
> +                *
> +                * @param value
> +                *            the value to locate
> +                * @return entity name associated with the specified value
> +                */
> +               String name(int value);
> +
> +               /**
> +                * <p>
> +                * Returns the value of the entity identified by the specified name.
> +                * </p>
> +                *
> +                * @param name
> +                *            the name to locate
> +                * @return entity value associated with the specified name
> +                */
> +               int value(String name);
> +       }
> +
> +       static class PrimitiveEntityMap implements EntityMap
> +       {
> +               private final Map mapNameToValue = new HashMap();
> +
> +               private final IntHashMap mapValueToName = new IntHashMap();
> +
> +               /**
> +                * {@inheritDoc}
> +                */
> +               // TODO not thread-safe as there is a window between changing the two maps
> +               public void add(String name, int value)
> +               {
> +                       mapNameToValue.put(name, new Integer(value));
> +                       mapValueToName.put(value, name);
> +               }
> +
> +               /**
> +                * {@inheritDoc}
> +                */
> +               public String name(int value)
> +               {
> +                       return (String)mapValueToName.get(value);
> +               }
> +
> +               /**
> +                * {@inheritDoc}
> +                */
> +               public int value(String name)
> +               {
> +                       Object value = mapNameToValue.get(name);
> +                       if (value == null)
> +                       {
> +                               return -1;
> +                       }
> +                       return ((Integer)value).intValue();
> +               }
> +       }
> +
> +       static abstract class MapIntMap implements Entities.EntityMap
> +       {
> +               protected final Map mapNameToValue;
> +
> +               protected final Map mapValueToName;
> +
> +               /**
> +                * Construct a new instance with specified maps.
> +                *
> +                * @param nameToValue
> +                *            name to value map
> +                * @param valueToName
> +                *            value to namee map
> +                */
> +               MapIntMap(Map nameToValue, Map valueToName)
> +               {
> +                       mapNameToValue = nameToValue;
> +                       mapValueToName = valueToName;
> +               }
> +
> +               /**
> +                * {@inheritDoc}
> +                */
> +               public void add(String name, int value)
> +               {
> +                       mapNameToValue.put(name, new Integer(value));
> +                       mapValueToName.put(new Integer(value), name);
> +               }
> +
> +               /**
> +                * {@inheritDoc}
> +                */
> +               public String name(int value)
> +               {
> +                       return (String)mapValueToName.get(new Integer(value));
> +               }
> +
> +               /**
> +                * {@inheritDoc}
> +                */
> +               public int value(String name)
> +               {
> +                       Object value = mapNameToValue.get(name);
> +                       if (value == null)
> +                       {
> +                               return -1;
> +                       }
> +                       return ((Integer)value).intValue();
> +               }
> +       }
> +
> +       static class HashEntityMap extends MapIntMap
> +       {
> +               /**
> +                * Constructs a new instance of <code>HashEntityMap</code>.
> +                */
> +               public HashEntityMap()
> +               {
> +                       super(new HashMap(), new HashMap());
> +               }
> +       }
> +
> +       static class TreeEntityMap extends MapIntMap
> +       {
> +               /**
> +                * Constructs a new instance of <code>TreeEntityMap</code>.
> +                */
> +               public TreeEntityMap()
> +               {
> +                       super(new TreeMap(), new TreeMap());
> +               }
> +       }
> +
> +       static class LookupEntityMap extends PrimitiveEntityMap
> +       {
> +               // TODO this class is not thread-safe
> +               private String[] lookupTable;
> +
> +               private static final int LOOKUP_TABLE_SIZE = 256;
> +
> +               /**
> +                * {@inheritDoc}
> +                */
> +               public String name(int value)
> +               {
> +                       if (value < LOOKUP_TABLE_SIZE)
> +                       {
> +                               return lookupTable()[value];
> +                       }
> +                       return super.name(value);
> +               }
> +
> +               /**
> +                * <p>
> +                * Returns the lookup table for this entity map. The lookup table is created if it has not
> +                * been previously.
> +                * </p>
> +                *
> +                * @return the lookup table
> +                */
> +               private String[] lookupTable()
> +               {
> +                       if (lookupTable == null)
> +                       {
> +                               createLookupTable();
> +                       }
> +                       return lookupTable;
> +               }
> +
> +               /**
> +                * <p>
> +                * Creates an entity lookup table of LOOKUP_TABLE_SIZE elements, initialized with entity
> +                * names.
> +                * </p>
> +                */
> +               private void createLookupTable()
> +               {
> +                       lookupTable = new String[LOOKUP_TABLE_SIZE];
> +                       for (int i = 0; i < LOOKUP_TABLE_SIZE; ++i)
> +                       {
> +                               lookupTable[i] = super.name(i);
> +                       }
> +               }
> +       }
> +
> +       static class ArrayEntityMap implements EntityMap
> +       {
> +               // TODO this class is not thread-safe
> +               protected final int growBy;
> +
> +               protected int size = 0;
> +
> +               protected String[] names;
> +
> +               protected int[] values;
> +
> +               /**
> +                * Constructs a new instance of <code>ArrayEntityMap</code>.
> +                */
> +               public ArrayEntityMap()
> +               {
> +                       growBy = 100;
> +                       names = new String[growBy];
> +                       values = new int[growBy];
> +               }
> +
> +               /**
> +                * Constructs a new instance of <code>ArrayEntityMap</code> specifying the size by which the
> +                * array should grow.
> +                *
> +                * @param growBy
> +                *            array will be initialized to and will grow by this amount
> +                */
> +               public ArrayEntityMap(int growBy)
> +               {
> +                       this.growBy = growBy;
> +                       names = new String[growBy];
> +                       values = new int[growBy];
> +               }
> +
> +               /**
> +                * {@inheritDoc}
> +                */
> +               public void add(String name, int value)
> +               {
> +                       ensureCapacity(size + 1);
> +                       names[size] = name;
> +                       values[size] = value;
> +                       size++;
> +               }
> +
> +               /**
> +                * Verifies the capacity of the entity array, adjusting the size if necessary.
> +                *
> +                * @param capacity
> +                *            size the array should be
> +                */
> +               protected void ensureCapacity(int capacity)
> +               {
> +                       if (capacity > names.length)
> +                       {
> +                               int newSize = Math.max(capacity, size + growBy);
> +                               String[] newNames = new String[newSize];
> +                               System.arraycopy(names, 0, newNames, 0, size);
> +                               names = newNames;
> +                               int[] newValues = new int[newSize];
> +                               System.arraycopy(values, 0, newValues, 0, size);
> +                               values = newValues;
> +                       }
> +               }
> +
> +               /**
> +                * {@inheritDoc}
> +                */
> +               public String name(int value)
> +               {
> +                       for (int i = 0; i < size; ++i)
> +                       {
> +                               if (values[i] == value)
> +                               {
> +                                       return names[i];
> +                               }
> +                       }
> +                       return null;
> +               }
> +
> +               /**
> +                * {@inheritDoc}
> +                */
> +               public int value(String name)
> +               {
> +                       for (int i = 0; i < size; ++i)
> +                       {
> +                               if (names[i].equals(name))
> +                               {
> +                                       return values[i];
> +                               }
> +                       }
> +                       return -1;
> +               }
> +       }
> +
> +       static class BinaryEntityMap extends ArrayEntityMap
> +       {
> +
> +               // TODO - not thread-safe, because parent is not. Also references size.
> +
> +               /**
> +                * Constructs a new instance of <code>BinaryEntityMap</code>.
> +                */
> +               public BinaryEntityMap()
> +               {
> +                       super();
> +               }
> +
> +               /**
> +                * Constructs a new instance of <code>ArrayEntityMap</code> specifying the size by which the
> +                * underlying array should grow.
> +                *
> +                * @param growBy
> +                *            array will be initialized to and will grow by this amount
> +                */
> +               public BinaryEntityMap(int growBy)
> +               {
> +                       super(growBy);
> +               }
> +
> +               /**
> +                * Performs a binary search of the entity array for the specified key. This method is based
> +                * on code in {@link java.util.Arrays}.
> +                *
> +                * @param key
> +                *            the key to be found
> +                * @return the index of the entity array matching the specified key
> +                */
> +               private int binarySearch(int key)
> +               {
> +                       int low = 0;
> +                       int high = size - 1;
> +
> +                       while (low <= high)
> +                       {
> +                               int mid = (low + high) >>> 1;
> +                               int midVal = values[mid];
> +
> +                               if (midVal < key)
> +                               {
> +                                       low = mid + 1;
> +                               }
> +                               else if (midVal > key)
> +                               {
> +                                       high = mid - 1;
> +                               }
> +                               else
> +                               {
> +                                       return mid; // key found
> +                               }
> +                       }
> +                       return -(low + 1); // key not found.
> +               }
> +
> +               /**
> +                * {@inheritDoc}
> +                */
> +               public void add(String name, int value)
> +               {
> +                       ensureCapacity(size + 1);
> +                       int insertAt = binarySearch(value);
> +                       if (insertAt > 0)
> +                       {
> +                               return; // note: this means you can't insert the same value twice
> +                       }
> +                       insertAt = -(insertAt + 1); // binarySearch returns it negative and off-by-one
> +                       System.arraycopy(values, insertAt, values, insertAt + 1, size - insertAt);
> +                       values[insertAt] = value;
> +                       System.arraycopy(names, insertAt, names, insertAt + 1, size - insertAt);
> +                       names[insertAt] = name;
> +                       size++;
> +               }
> +
> +               /**
> +                * {@inheritDoc}
> +                */
> +               public String name(int value)
> +               {
> +                       int index = binarySearch(value);
> +                       if (index < 0)
> +                       {
> +                               return null;
> +                       }
> +                       return names[index];
> +               }
> +       }
> +
> +       private final EntityMap map;
> +
> +       /**
> +        * Default constructor.
> +        */
> +       public Entities()
> +       {
> +               map = new Entities.LookupEntityMap();
> +       }
> +
> +       /**
> +        * package scoped constructor for testing.
> +        *
> +        * @param emap
> +        *            entity map.
> +        */
> +       Entities(EntityMap emap)
> +       {
> +               map = emap;
> +       }
> +
> +       /**
> +        * <p>
> +        * Adds entities to this entity.
> +        * </p>
> +        *
> +        * @param entityArray
> +        *            array of entities to be added
> +        */
> +       public void addEntities(String[][] entityArray)
> +       {
> +               for (int i = 0; i < entityArray.length; ++i)
> +               {
> +                       addEntity(entityArray[i][0], Integer.parseInt(entityArray[i][1]));
> +               }
> +       }
> +
> +       /**
> +        * <p>
> +        * Add an entity to this entity.
> +        * </p>
> +        *
> +        * @param name
> +        *            name of the entity
> +        * @param value
> +        *            vale of the entity
> +        */
> +       public void addEntity(String name, int value)
> +       {
> +               map.add(name, value);
> +       }
> +
> +       /**
> +        * <p>
> +        * Returns the name of the entity identified by the specified value.
> +        * </p>
> +        *
> +        * @param value
> +        *            the value to locate
> +        * @return entity name associated with the specified value
> +        */
> +       public String entityName(int value)
> +       {
> +               return map.name(value);
> +       }
> +
> +       /**
> +        * <p>
> +        * Returns the value of the entity identified by the specified name.
> +        * </p>
> +        *
> +        * @param name
> +        *            the name to locate
> +        * @return entity value associated with the specified name
> +        */
> +       public int entityValue(String name)
> +       {
> +               return map.value(name);
> +       }
> +
> +       /**
> +        * <p>
> +        * Escapes the characters in a <code>String</code>.
> +        * </p>
> +        *
> +        * <p>
> +        * For example, if you have called addEntity(&quot;foo&quot;, 0xA1), escape(&quot;\u00A1&quot;)
> +        * will return &quot;&amp;foo;&quot;
> +        * </p>
> +        *
> +        * @param str
> +        *            The <code>String</code> to escape.
> +        * @return A new escaped <code>String</code>.
> +        */
> +       public String escape(String str)
> +       {
> +               StringWriter stringWriter = createStringWriter(str);
> +               try
> +               {
> +                       this.escape(stringWriter, str);
> +               }
> +               catch (IOException e)
> +               {
> +                       // This should never happen because ALL the StringWriter methods called by
> +// #escape(Writer, String) do not
> +                       // throw IOExceptions.
> +                       throw new RuntimeException(e);
> +               }
> +               return stringWriter.toString();
> +       }
> +
> +       /**
> +        * <p>
> +        * Escapes the characters in the <code>String</code> passed and writes the result to the
> +        * <code>Writer</code> passed.
> +        * </p>
> +        *
> +        * @param writer
> +        *            The <code>Writer</code> to write the results of the escaping to. Assumed to be a
> +        *            non-null value.
> +        * @param str
> +        *            The <code>String</code> to escape. Assumed to be a non-null value.
> +        * @throws IOException
> +        *             when <code>Writer</code> passed throws the exception from calls to the
> +        *             {@link Writer#write(int)} methods.
> +        *
> +        * @see #escape(String)
> +        * @see Writer
> +        */
> +       public void escape(Writer writer, String str) throws IOException
> +       {
> +               int len = str.length();
> +               for (int i = 0; i < len; i++)
> +               {
> +                       char c = str.charAt(i);
> +                       String entityName = entityName(c);
> +                       if (entityName == null)
> +                       {
> +                               if (c > 0x7F)
> +                               {
> +                                       writer.write("&#");
> +                                       writer.write(Integer.toString(c, 10));
> +                                       writer.write(';');
> +                               }
> +                               else
> +                               {
> +                                       writer.write(c);
> +                               }
> +                       }
> +                       else
> +                       {
> +                               writer.write('&');
> +                               writer.write(entityName);
> +                               writer.write(';');
> +                       }
> +               }
> +       }
> +
> +       /**
> +        * <p>
> +        * Unescapes the entities in a <code>String</code>.
> +        * </p>
> +        *
> +        * <p>
> +        * For example, if you have called addEntity(&quot;foo&quot;, 0xA1),
> +        * unescape(&quot;&amp;foo;&quot;) will return &quot;\u00A1&quot;
> +        * </p>
> +        *
> +        * @param str
> +        *            The <code>String</code> to escape.
> +        * @return A new escaped <code>String</code>.
> +        */
> +       public String unescape(String str)
> +       {
> +               int firstAmp = str.indexOf('&');
> +               if (firstAmp < 0)
> +               {
> +                       return str;
> +               }
> +               else
> +               {
> +                       StringWriter stringWriter = createStringWriter(str);
> +                       try
> +                       {
> +                               doUnescape(stringWriter, str, firstAmp);
> +                       }
> +                       catch (IOException e)
> +                       {
> +                               // This should never happen because ALL the StringWriter methods called by
> +// #escape(Writer, String)
> +                               // do not throw IOExceptions.
> +                               throw new RuntimeException(e);
> +                       }
> +                       return stringWriter.toString();
> +               }
> +       }
> +
> +       /**
> +        * Make the StringWriter 10% larger than the source String to avoid growing the writer
> +        *
> +        * @param str
> +        *            The source string
> +        * @return A newly created StringWriter
> +        */
> +       private StringWriter createStringWriter(String str)
> +       {
> +               return new StringWriter((int)(str.length() + (str.length() * 0.1)));
> +       }
> +
> +       /**
> +        * <p>
> +        * Unescapes the escaped entities in the <code>String</code> passed and writes the result to the
> +        * <code>Writer</code> passed.
> +        * </p>
> +        *
> +        * @param writer
> +        *            The <code>Writer</code> to write the results to; assumed to be non-null.
> +        * @param str
> +        *            The source <code>String</code> to unescape; assumed to be non-null.
> +        * @throws IOException
> +        *             when <code>Writer</code> passed throws the exception from calls to the
> +        *             {@link Writer#write(int)} methods.
> +        *
> +        * @see #escape(String)
> +        * @see Writer
> +        */
> +       public void unescape(Writer writer, String str) throws IOException
> +       {
> +               int firstAmp = str.indexOf('&');
> +               if (firstAmp < 0)
> +               {
> +                       writer.write(str);
> +                       return;
> +               }
> +               else
> +               {
> +                       doUnescape(writer, str, firstAmp);
> +               }
> +       }
> +
> +       /**
> +        * Underlying unescape method that allows the optimisation of not starting from the 0 index
> +        * again.
> +        *
> +        * @param writer
> +        *            The <code>Writer</code> to write the results to; assumed to be non-null.
> +        * @param str
> +        *            The source <code>String</code> to unescape; assumed to be non-null.
> +        * @param firstAmp
> +        *            The <code>int</code> index of the first ampersand in the source String.
> +        * @throws IOException
> +        *             when <code>Writer</code> passed throws the exception from calls to the
> +        *             {@link Writer#write(int)} methods.
> +        */
> +       private void doUnescape(Writer writer, String str, int firstAmp) throws IOException
> +       {
> +               writer.write(str, 0, firstAmp);
> +               int len = str.length();
> +               for (int i = firstAmp; i < len; i++)
> +               {
> +                       char c = str.charAt(i);
> +                       if (c == '&')
> +                       {
> +                               int nextIdx = i + 1;
> +                               int semiColonIdx = str.indexOf(';', nextIdx);
> +                               if (semiColonIdx == -1)
> +                               {
> +                                       writer.write(c);
> +                                       continue;
> +                               }
> +                               int amphersandIdx = str.indexOf('&', i + 1);
> +                               if (amphersandIdx != -1 && amphersandIdx < semiColonIdx)
> +                               {
> +                                       // Then the text looks like &...&...;
> +                                       writer.write(c);
> +                                       continue;
> +                               }
> +                               String entityContent = str.substring(nextIdx, semiColonIdx);
> +                               int entityValue = -1;
> +                               int entityContentLen = entityContent.length();
> +                               if (entityContentLen > 0)
> +                               {
> +                                       if (entityContent.charAt(0) == '#')
> +                                       { // escaped value content is an integer (decimal or
> +                                               // hexidecimal)
> +                                               if (entityContentLen > 1)
> +                                               {
> +                                                       char isHexChar = entityContent.charAt(1);
> +                                                       try
> +                                                       {
> +                                                               switch (isHexChar)
> +                                                               {
> +                                                                       case 'X' :
> +                                                                       case 'x' : {
> +                                                                               entityValue = Integer.parseInt(entityContent.substring(2),
> +                                                                                       16);
> +                                                                               break;
> +                                                                       }
> +                                                                       default : {
> +                                                                               entityValue = Integer.parseInt(entityContent.substring(1),
> +                                                                                       10);
> +                                                                       }
> +                                                               }
> +                                                               if (entityValue > 0xFFFF)
> +                                                               {
> +                                                                       entityValue = -1;
> +                                                               }
> +                                                       }
> +                                                       catch (NumberFormatException e)
> +                                                       {
> +                                                               entityValue = -1;
> +                                                       }
> +                                               }
> +                                       }
> +                                       else
> +                                       { // escaped value content is an entity name
> +                                               entityValue = entityValue(entityContent);
> +                                       }
> +                               }
> +
> +                               if (entityValue == -1)
> +                               {
> +                                       writer.write('&');
> +                                       writer.write(entityContent);
> +                                       writer.write(';');
> +                               }
> +                               else
> +                               {
> +                                       writer.write(entityValue);
> +                               }
> +                               i = semiColonIdx; // move index up to the semi-colon
> +                       }
> +                       else
> +                       {
> +                               writer.write(c);
> +                       }
> +               }
> +       }
> +
> +}
>
> Propchange: wicket/trunk/wicket-util/src/main/java/org/apache/wicket/util/string/Entities.java
> ------------------------------------------------------------------------------
>    svn:executable = *
>
> Added: wicket/trunk/wicket-util/src/main/java/org/apache/wicket/util/string/StringEscapeUtils.java
> URL: http://svn.apache.org/viewvc/wicket/trunk/wicket-util/src/main/java/org/apache/wicket/util/string/StringEscapeUtils.java?rev=1099592&view=auto
> ==============================================================================
> --- wicket/trunk/wicket-util/src/main/java/org/apache/wicket/util/string/StringEscapeUtils.java (added)
> +++ wicket/trunk/wicket-util/src/main/java/org/apache/wicket/util/string/StringEscapeUtils.java Wed May  4 20:24:59 2011
> @@ -0,0 +1,428 @@
> +/*
> + * Licensed to the Apache Software Foundation (ASF) under one or more
> + * contributor license agreements.  See the NOTICE file distributed with
> + * this work for additional information regarding copyright ownership.
> + * The ASF licenses this file to You under the Apache License, Version 2.0
> + * (the "License"); you may not use this file except in compliance with
> + * the License.  You may obtain a copy of the License at
> + *
> + *      http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +package org.apache.wicket.util.string;
> +
> +import java.io.IOException;
> +import java.io.StringWriter;
> +import java.io.Writer;
> +import java.util.Locale;
> +
> +/**
> + * <p>
> + * Escapes and unescapes <code>String</code>s for Java, Java Script, HTML, XML, and SQL.
> + * </p>
> + *
> + * <p>
> + * #ThreadSafe#
> + * </p>
> + *
> + * @author Apache Software Foundation
> + * @author Apache Jakarta Turbine
> + * @author Purple Technology
> + * @author <a href="mailto:alex@purpletech.com">Alexander Day Chaffee</a>
> + * @author Antony Riley
> + * @author Helge Tesgaard
> + * @author <a href="sean@boohai.com">Sean Brown</a>
> + * @author <a href="mailto:ggregory@seagullsw.com">Gary Gregory</a>
> + * @author Phil Steitz
> + * @author Pete Gieser
> + * @since 2.0
> + * @version $Id$
> + */
> +// Copy from commons-lang ver. 2.6. Non-html/xml methods were removed
> +class StringEscapeUtils
> +{
> +
> +       /**
> +        * <p>
> +        * <code>StringEscapeUtils</code> instances should NOT be constructed in standard programming.
> +        * </p>
> +        *
> +        * <p>
> +        * Instead, the class should be used as:
> +        *
> +        * <pre>
> +        * StringEscapeUtils.escapeJava(&quot;foo&quot;);
> +        * </pre>
> +        *
> +        * </p>
> +        *
> +        * <p>
> +        * This constructor is public to permit tools that require a JavaBean instance to operate.
> +        * </p>
> +        */
> +       public StringEscapeUtils()
> +       {
> +               super();
> +       }
> +
> +       /**
> +        * <p>
> +        * Returns an upper case hexadecimal <code>String</code> for the given character.
> +        * </p>
> +        *
> +        * @param ch
> +        *            The character to convert.
> +        * @return An upper case hexadecimal <code>String</code>
> +        */
> +       private static String hex(char ch)
> +       {
> +               return Integer.toHexString(ch).toUpperCase(Locale.ENGLISH);
> +       }
> +
> +       // HTML and XML
> +       // --------------------------------------------------------------------------
> +       /**
> +        * <p>
> +        * Escapes the characters in a <code>String</code> using HTML entities.
> +        * </p>
> +        *
> +        * <p>
> +        * For example:
> +        * </p>
> +        * <p>
> +        * <code>"bread" & "butter"</code>
> +        * </p>
> +        * becomes:
> +        * <p>
> +        * <code>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</code>.
> +        * </p>
> +        *
> +        * <p>
> +        * Supports all known HTML 4.0 entities, including funky accents. Note that the commonly used
> +        * apostrophe escape character (&amp;apos;) is not a legal entity and so is not supported).
> +        * </p>
> +        *
> +        * @param str
> +        *            the <code>String</code> to escape, may be null
> +        * @return a new escaped <code>String</code>, <code>null</code> if null string input
> +        *
> +        * @see #unescapeHtml(String)
> +        * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO
> +        *      Entities</a>
> +        * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO
> +        *      Latin-1</a>
> +        * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity
> +        *      references</a>
> +        * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character
> +        *      References</a>
> +        * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code
> +        *      positions</a>
> +        */
> +       public static String escapeHtml(String str)
> +       {
> +               if (str == null)
> +               {
> +                       return null;
> +               }
> +               try
> +               {
> +                       StringWriter writer = new StringWriter((int)(str.length() * 1.5));
> +                       escapeHtml(writer, str);
> +                       return writer.toString();
> +               }
> +               catch (IOException ioe)
> +               {
> +                       // should be impossible
> +                       throw new RuntimeException(ioe);
> +               }
> +       }
> +
> +       /**
> +        * <p>
> +        * Escapes the characters in a <code>String</code> using HTML entities and writes them to a
> +        * <code>Writer</code>.
> +        * </p>
> +        *
> +        * <p>
> +        * For example:
> +        * </p>
> +        * <code>"bread" & "butter"</code>
> +        * <p>
> +        * becomes:
> +        * </p>
> +        * <code>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</code>.
> +        *
> +        * <p>
> +        * Supports all known HTML 4.0 entities, including funky accents. Note that the commonly used
> +        * apostrophe escape character (&amp;apos;) is not a legal entity and so is not supported).
> +        * </p>
> +        *
> +        * @param writer
> +        *            the writer receiving the escaped string, not null
> +        * @param string
> +        *            the <code>String</code> to escape, may be null
> +        * @throws IllegalArgumentException
> +        *             if the writer is null
> +        * @throws IOException
> +        *             when <code>Writer</code> passed throws the exception from calls to the
> +        *             {@link Writer#write(int)} methods.
> +        *
> +        * @see #escapeHtml(String)
> +        * @see #unescapeHtml(String)
> +        * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO
> +        *      Entities</a>
> +        * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO
> +        *      Latin-1</a>
> +        * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity
> +        *      references</a>
> +        * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character
> +        *      References</a>
> +        * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code
> +        *      positions</a>
> +        */
> +       public static void escapeHtml(Writer writer, String string) throws IOException
> +       {
> +               if (writer == null)
> +               {
> +                       throw new IllegalArgumentException("The Writer must not be null.");
> +               }
> +               if (string == null)
> +               {
> +                       return;
> +               }
> +               Entities.HTML40.escape(writer, string);
> +       }
> +
> +       // -----------------------------------------------------------------------
> +       /**
> +        * <p>
> +        * Unescapes a string containing entity escapes to a string containing the actual Unicode
> +        * characters corresponding to the escapes. Supports HTML 4.0 entities.
> +        * </p>
> +        *
> +        * <p>
> +        * For example, the string "&amp;lt;Fran&amp;ccedil;ais&amp;gt;" will become
> +        * "&lt;Fran&ccedil;ais&gt;"
> +        * </p>
> +        *
> +        * <p>
> +        * If an entity is unrecognized, it is left alone, and inserted verbatim into the result string.
> +        * e.g. "&amp;gt;&amp;zzzz;x" will become "&gt;&amp;zzzz;x".
> +        * </p>
> +        *
> +        * @param str
> +        *            the <code>String</code> to unescape, may be null
> +        * @return a new unescaped <code>String</code>, <code>null</code> if null string input
> +        * @see #escapeHtml(Writer, String)
> +        */
> +       public static String unescapeHtml(String str)
> +       {
> +               if (str == null)
> +               {
> +                       return null;
> +               }
> +               try
> +               {
> +                       StringWriter writer = new StringWriter((int)(str.length() * 1.5));
> +                       unescapeHtml(writer, str);
> +                       return writer.toString();
> +               }
> +               catch (IOException ioe)
> +               {
> +                       // should be impossible
> +                       throw new RuntimeException(ioe);
> +               }
> +       }
> +
> +       /**
> +        * <p>
> +        * Unescapes a string containing entity escapes to a string containing the actual Unicode
> +        * characters corresponding to the escapes. Supports HTML 4.0 entities.
> +        * </p>
> +        *
> +        * <p>
> +        * For example, the string "&amp;lt;Fran&amp;ccedil;ais&amp;gt;" will become
> +        * "&lt;Fran&ccedil;ais&gt;"
> +        * </p>
> +        *
> +        * <p>
> +        * If an entity is unrecognized, it is left alone, and inserted verbatim into the result string.
> +        * e.g. "&amp;gt;&amp;zzzz;x" will become "&gt;&amp;zzzz;x".
> +        * </p>
> +        *
> +        * @param writer
> +        *            the writer receiving the unescaped string, not null
> +        * @param string
> +        *            the <code>String</code> to unescape, may be null
> +        * @throws IllegalArgumentException
> +        *             if the writer is null
> +        * @throws IOException
> +        *             if an IOException occurs
> +        * @see #escapeHtml(String)
> +        */
> +       public static void unescapeHtml(Writer writer, String string) throws IOException
> +       {
> +               if (writer == null)
> +               {
> +                       throw new IllegalArgumentException("The Writer must not be null.");
> +               }
> +               if (string == null)
> +               {
> +                       return;
> +               }
> +               Entities.HTML40.unescape(writer, string);
> +       }
> +
> +       // -----------------------------------------------------------------------
> +       /**
> +        * <p>
> +        * Escapes the characters in a <code>String</code> using XML entities.
> +        * </p>
> +        *
> +        * <p>
> +        * For example: <tt>"bread" & "butter"</tt> =>
> +        * <tt>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</tt>.
> +        * </p>
> +        *
> +        * <p>
> +        * Supports only the five basic XML entities (gt, lt, quot, amp, apos). Does not support DTDs or
> +        * external entities.
> +        * </p>
> +        *
> +        * <p>
> +        * Note that unicode characters greater than 0x7f are currently escaped to their numerical \\u
> +        * equivalent. This may change in future releases.
> +        * </p>
> +        *
> +        * @param writer
> +        *            the writer receiving the unescaped string, not null
> +        * @param str
> +        *            the <code>String</code> to escape, may be null
> +        * @throws IllegalArgumentException
> +        *             if the writer is null
> +        * @throws IOException
> +        *             if there is a problem writing
> +        * @see #unescapeXml(java.lang.String)
> +        */
> +       public static void escapeXml(Writer writer, String str) throws IOException
> +       {
> +               if (writer == null)
> +               {
> +                       throw new IllegalArgumentException("The Writer must not be null.");
> +               }
> +               if (str == null)
> +               {
> +                       return;
> +               }
> +               Entities.XML.escape(writer, str);
> +       }
> +
> +       /**
> +        * <p>
> +        * Escapes the characters in a <code>String</code> using XML entities.
> +        * </p>
> +        *
> +        * <p>
> +        * For example: <tt>"bread" & "butter"</tt> =>
> +        * <tt>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</tt>.
> +        * </p>
> +        *
> +        * <p>
> +        * Supports only the five basic XML entities (gt, lt, quot, amp, apos). Does not support DTDs or
> +        * external entities.
> +        * </p>
> +        *
> +        * <p>
> +        * Note that unicode characters greater than 0x7f are currently escaped to their numerical \\u
> +        * equivalent. This may change in future releases.
> +        * </p>
> +        *
> +        * @param str
> +        *            the <code>String</code> to escape, may be null
> +        * @return a new escaped <code>String</code>, <code>null</code> if null string input
> +        * @see #unescapeXml(java.lang.String)
> +        */
> +       public static String escapeXml(String str)
> +       {
> +               if (str == null)
> +               {
> +                       return null;
> +               }
> +               return Entities.XML.escape(str);
> +       }
> +
> +       // -----------------------------------------------------------------------
> +       /**
> +        * <p>
> +        * Unescapes a string containing XML entity escapes to a string containing the actual Unicode
> +        * characters corresponding to the escapes.
> +        * </p>
> +        *
> +        * <p>
> +        * Supports only the five basic XML entities (gt, lt, quot, amp, apos). Does not support DTDs or
> +        * external entities.
> +        * </p>
> +        *
> +        * <p>
> +        * Note that numerical \\u unicode codes are unescaped to their respective unicode characters.
> +        * This may change in future releases.
> +        * </p>
> +        *
> +        * @param writer
> +        *            the writer receiving the unescaped string, not null
> +        * @param str
> +        *            the <code>String</code> to unescape, may be null
> +        * @throws IllegalArgumentException
> +        *             if the writer is null
> +        * @throws IOException
> +        *             if there is a problem writing
> +        * @see #escapeXml(String)
> +        */
> +       public static void unescapeXml(Writer writer, String str) throws IOException
> +       {
> +               if (writer == null)
> +               {
> +                       throw new IllegalArgumentException("The Writer must not be null.");
> +               }
> +               if (str == null)
> +               {
> +                       return;
> +               }
> +               Entities.XML.unescape(writer, str);
> +       }
> +
> +       /**
> +        * <p>
> +        * Unescapes a string containing XML entity escapes to a string containing the actual Unicode
> +        * characters corresponding to the escapes.
> +        * </p>
> +        *
> +        * <p>
> +        * Supports only the five basic XML entities (gt, lt, quot, amp, apos). Does not support DTDs or
> +        * external entities.
> +        * </p>
> +        *
> +        * <p>
> +        * Note that numerical \\u unicode codes are unescaped to their respective unicode characters.
> +        * This may change in future releases.
> +        * </p>
> +        *
> +        * @param str
> +        *            the <code>String</code> to unescape, may be null
> +        * @return a new unescaped <code>String</code>, <code>null</code> if null string input
> +        * @see #escapeXml(String)
> +        */
> +       public static String unescapeXml(String str)
> +       {
> +               if (str == null)
> +               {
> +                       return null;
> +               }
> +               return Entities.XML.unescape(str);
> +       }
> +}
>
> Propchange: wicket/trunk/wicket-util/src/main/java/org/apache/wicket/util/string/StringEscapeUtils.java
> ------------------------------------------------------------------------------
>    svn:executable = *
>
> Modified: wicket/trunk/wicket-util/src/main/java/org/apache/wicket/util/string/Strings.java
> URL: http://svn.apache.org/viewvc/wicket/trunk/wicket-util/src/main/java/org/apache/wicket/util/string/Strings.java?rev=1099592&r1=1099591&r2=1099592&view=diff
> ==============================================================================
> --- wicket/trunk/wicket-util/src/main/java/org/apache/wicket/util/string/Strings.java (original)
> +++ wicket/trunk/wicket-util/src/main/java/org/apache/wicket/util/string/Strings.java Wed May  4 20:24:59 2011
> @@ -375,6 +375,19 @@ public final class Strings
>        }
>
>        /**
> +        * Unescapes the escaped entities in the <code>markup</code> passed.
> +        *
> +        * @param markup
> +        *            The source <code>String</code> to unescape.
> +        * @return the unescaped markup or <code>null</null> if the input is <code>null</code>
> +        */
> +       public static CharSequence unescapeMarkup(final String markup)
> +       {
> +               String unescapedMarkup = StringEscapeUtils.unescapeHtml(markup);
> +               return unescapedMarkup;
> +       }
> +
> +       /**
>         * Gets the first path component of a path using a given separator. If the separator cannot be
>         * found, the path itself is returned.
>         * <p>
>
>
>



-- 
Martin Grigorov
jWeekend
Training, Consulting, Development
http://jWeekend.com