You are viewing a plain text version of this content. The canonical link for it is here.
Posted to xindice-dev@xml.apache.org by na...@apache.org on 2008/03/10 01:52:17 UTC
svn commit: r635409 - in /xml/xindice/trunk/java/src/org/apache/xindice:
util/XMLUtilities.java xml/sax/SetContentHandler.java
Author: natalia
Date: Sun Mar 9 17:52:12 2008
New Revision: 635409
URL: http://svn.apache.org/viewvc?rev=635409&view=rev
Log:
Fix for XML escaping
Added:
xml/xindice/trunk/java/src/org/apache/xindice/util/XMLUtilities.java (with props)
Modified:
xml/xindice/trunk/java/src/org/apache/xindice/xml/sax/SetContentHandler.java
Added: xml/xindice/trunk/java/src/org/apache/xindice/util/XMLUtilities.java
URL: http://svn.apache.org/viewvc/xml/xindice/trunk/java/src/org/apache/xindice/util/XMLUtilities.java?rev=635409&view=auto
==============================================================================
--- xml/xindice/trunk/java/src/org/apache/xindice/util/XMLUtilities.java (added)
+++ xml/xindice/trunk/java/src/org/apache/xindice/util/XMLUtilities.java Sun Mar 9 17:52:12 2008
@@ -0,0 +1,225 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * $Id$
+ */
+
+package org.apache.xindice.util;
+
+/**
+ * Set of XML-related utilities.
+ *
+ * @version $Revision$, $Date$
+ */
+public class XMLUtilities {
+ private static final String REPLACEMENT = "�";
+
+ /**
+ * Converts input text into its XML representation by escaping all special symbols,
+ * if any are present.
+ *
+ * @param value Input array
+ * @param offset Start position in the array
+ * @param length Number of characters to process
+ * @param strict Method will throw an exception when it encounter illegal surrogate
+ * character if <code>strict</code> is true, otherwise illegal surrogate character
+ * will be replaced by character \uFFFD.
+ * @return String with all the special symbols escaped
+ * @throws XindiceRuntimeException If <code>strict</code> is true and <code>value</code>
+ * contains illegal surrogate character
+ */
+ public static String escape(char[] value, int offset, int length, boolean strict) {
+ StringBuffer buf = new StringBuffer();
+ int start = offset;
+ int blockLength = 0;
+
+ for (int i = offset; i < length; i++) {
+ String outval = escape(value[i], strict);
+
+ if (outval == null) {
+ if (isLeadingSurrogate(value[i])) {
+ if (i + 1 < length && isTrailingSurrogate(value[i + 1])) {
+ outval = getSurrogateValue(value[i], value[i + 1]);
+ i++;
+ } else {
+ if (strict) {
+ throw new XindiceRuntimeException("Leading surrogate &#" + Integer.toString(value[i]) + ";" +
+ "must be followed by trailing surrogate");
+ } else {
+ outval = REPLACEMENT;
+ }
+ }
+ } else {
+ blockLength++;
+ }
+ }
+
+ if (outval != null) {
+ if (blockLength > 0) {
+ buf.append(value, start, blockLength);
+ }
+ buf.append(outval);
+ start = i + 1;
+ blockLength = 0;
+ }
+ }
+
+ if (blockLength > 0 && start > offset) {
+ buf.append(value, start, blockLength);
+ }
+
+ return buf.length() > 0 ? buf.toString() : new String(value, offset, length);
+ }
+
+ /**
+ * Converts input text into its XML representation by escaping all special symbols,
+ * if any are present.
+ *
+ * @param value Input array
+ * @param offset Start position in the array
+ * @param length Number of characters to process
+ * @return String with all the special symbols escaped
+ */
+ public static String escape(char[] value, int offset, int length) {
+ return escape(value, offset, length, false);
+ }
+
+ /**
+ * Converts input text into its XML representation by escaping all special symbols,
+ * if any are present.
+ *
+ * @param text Input string
+ * @param strict Method will throw an exception when it encounter illegal surrogate
+ * character if <code>strict</code> is true, otherwise illegal surrogate character
+ * will be replaced by character \uFFFD.
+ * @return String with all the special symbols escaped
+ * @throws XindiceRuntimeException If <code>strict</code> is true and <code>text</code>
+ * contains illegal surrogate character
+ */
+ public static String escape(String text, boolean strict) {
+ StringBuffer buf = null;
+ int length = text.length();
+
+ for (int i = 0; i < length; i++) {
+ char ch = text.charAt(i);
+ String outval = escape(ch, strict);
+
+ if (outval == null) {
+ if (isLeadingSurrogate(ch)) {
+ if (i + 1 < length && isTrailingSurrogate(text.charAt(i + 1))) {
+ outval = getSurrogateValue(ch, text.charAt(i + 1));
+
+ if (buf == null) {
+ buf = new StringBuffer(text.substring(0, i));
+ }
+ i++;
+ } else {
+ if (strict) {
+ throw new XindiceRuntimeException("Leading surrogate &#" + Integer.toString(ch) + ";" +
+ "must be followed by trailing surrogate");
+ } else {
+ outval = REPLACEMENT;
+ }
+ }
+ }
+ }
+
+ if (outval != null && buf == null) {
+ buf = new StringBuffer(text.substring(0, i));
+ }
+
+ if (outval != null) {
+ buf.append(outval);
+ } else if (buf != null) {
+ buf.append(ch);
+ }
+ }
+
+ return buf != null ? buf.toString() : text;
+ }
+
+ /**
+ * Converts input text into its XML representation by escaping all special symbols,
+ * if any are present.
+ *
+ * @param text Input string
+ * @return String with all the special symbols escaped
+ */
+ public static String escape(String text) {
+ return escape(text, false);
+ }
+
+ private static String escape(char ch, boolean strict) {
+ String outval = null;
+
+ switch (ch) {
+ case '&':
+ outval = "&";
+ break;
+ case '\'':
+ outval = "'";
+ break;
+ case '\"':
+ outval = """;
+ break;
+ case '<':
+ outval = "<";
+ break;
+ case '>':
+ outval = ">";
+ break;
+ default:
+ if (isTrailingSurrogate(ch)) {
+ if (strict) {
+ throw new XindiceRuntimeException("Trailing surrogate &#" + Integer.toString(ch) +
+ "; must follow leading surrogate");
+ } else {
+ outval = REPLACEMENT;
+ }
+ } else if (!isLeadingSurrogate(ch) && !isLegal(ch)) {
+ outval = "&#" + Integer.toString(ch) + ";";
+ }
+ break;
+ }
+
+ return outval;
+ }
+
+ private static boolean isLegal(char ch) {
+ return ch == 0x9 || ch == 0xA || ch == 0xD ||
+ (ch >= 0x20 && ch <= 0xD7FF) ||
+ (ch >= 0xE000 && ch <= 0xFFFD);
+ }
+
+ /**
+ * Converts UTF-16 surrogate pair to UTF-8
+ * @param high Leading surrogate
+ * @param low Trailing surrogate
+ * @return String with escaped 4-byte value
+ */
+ private static String getSurrogateValue(char high, char low) {
+ int val = (high & 0x3FF) << 10 | (low & 0x3FF) + 0x10000;
+ return "&#" + Integer.toString(val) + ";";
+ }
+
+ private static boolean isLeadingSurrogate(char ch) {
+ return ch >= 0xD800 && ch <= 0xDBFF;
+ }
+
+ private static boolean isTrailingSurrogate(char ch) {
+ return ch >= 0xDC00 && ch <= 0xDFFF;
+ }
+}
Propchange: xml/xindice/trunk/java/src/org/apache/xindice/util/XMLUtilities.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: xml/xindice/trunk/java/src/org/apache/xindice/util/XMLUtilities.java
------------------------------------------------------------------------------
svn:keywords = Id Revision Author Date
Modified: xml/xindice/trunk/java/src/org/apache/xindice/xml/sax/SetContentHandler.java
URL: http://svn.apache.org/viewvc/xml/xindice/trunk/java/src/org/apache/xindice/xml/sax/SetContentHandler.java?rev=635409&r1=635408&r2=635409&view=diff
==============================================================================
--- xml/xindice/trunk/java/src/org/apache/xindice/xml/sax/SetContentHandler.java (original)
+++ xml/xindice/trunk/java/src/org/apache/xindice/xml/sax/SetContentHandler.java Sun Mar 9 17:52:12 2008
@@ -21,7 +21,8 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-
+import org.apache.xindice.util.XMLUtilities;
+import org.apache.xindice.util.XindiceRuntimeException;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
@@ -122,14 +123,12 @@
}
private String getQNameAtt(String uri, String localName) throws SAXException {
-
- String prefix = null;
-
if ("".equals(uri)) {
return localName;
}
/* Look for prefix */
+ String prefix = null;
Iterator prefixes = namespaces.keySet().iterator();
while (prefixes.hasNext()) {
String key = (String) prefixes.next();
@@ -149,8 +148,6 @@
private String getQNameElement(String uri, String localName) throws SAXException {
- String prefix = null;
-
if ("".equals(uri)) {
if (namespaces.get("") != null) {
throw new SAXException("default namespace is declared here!");
@@ -161,6 +158,7 @@
}
/* Look for prefix */
+ String prefix = null;
Iterator prefixes = namespaces.keySet().iterator();
while (prefixes.hasNext()) {
String key = (String) prefixes.next();
@@ -189,8 +187,7 @@
* @exception SAXException Description of Exception
* @see org.xml.sax.ContentHandler#startElement
*/
- public void startElement(String uri, String localName,
- String qName, Attributes attributes)
+ public void startElement(String uri, String localName, String qName, Attributes attributes)
throws SAXException {
newContent.append("<");
@@ -213,7 +210,11 @@
newContent.append(qn);
newContent.append("=");
newContent.append("\"");
- newContent.append(attributes.getValue(i));
+ try {
+ newContent.append(XMLUtilities.escape(attributes.getValue(i), true));
+ } catch (XindiceRuntimeException e) {
+ throw new SAXException(e);
+ }
newContent.append("\"");
// Avoid duplicate namespace declarations
@@ -277,38 +278,11 @@
* @exception SAXException Description of Exception
* @see org.xml.sax.ContentHandler#characters
*/
- public void characters(char ch[], int start, int length)
- throws SAXException {
- int i = 0;
- while (i < length) {
- char c = ch[start + i];
- switch (c) {
- case '&':
- newContent.append("&");
- break;
- case '<':
- newContent.append("<");
- break;
- case '>':
- newContent.append(">");
- break;
- case '"':
- newContent.append(""");
- break;
- case '\'':
- newContent.append("'");
- break;
- default:
- // If we're outside 7 bit ascii encode as a character ref.
- // Not sure what the proper behavior here should be.
- if ((int) c > 127) {
- newContent.append("&#" + (int) c + ";");
- } else {
- newContent.append(c);
- }
- }
-
- i++;
+ public void characters(char ch[], int start, int length) throws SAXException {
+ try {
+ XMLUtilities.escape(ch, start, length, true);
+ } catch (XindiceRuntimeException e) {
+ throw new SAXException(e);
}
}