You are viewing a plain text version of this content. The canonical link for it is here.
Posted to xindice-dev@xml.apache.org by na...@apache.org on 2008/03/10 01:52:17 UTC

svn commit: r635409 - in /xml/xindice/trunk/java/src/org/apache/xindice: util/XMLUtilities.java xml/sax/SetContentHandler.java

Author: natalia
Date: Sun Mar  9 17:52:12 2008
New Revision: 635409

URL: http://svn.apache.org/viewvc?rev=635409&view=rev
Log:
Fix for XML escaping

Added:
    xml/xindice/trunk/java/src/org/apache/xindice/util/XMLUtilities.java   (with props)
Modified:
    xml/xindice/trunk/java/src/org/apache/xindice/xml/sax/SetContentHandler.java

Added: xml/xindice/trunk/java/src/org/apache/xindice/util/XMLUtilities.java
URL: http://svn.apache.org/viewvc/xml/xindice/trunk/java/src/org/apache/xindice/util/XMLUtilities.java?rev=635409&view=auto
==============================================================================
--- xml/xindice/trunk/java/src/org/apache/xindice/util/XMLUtilities.java (added)
+++ xml/xindice/trunk/java/src/org/apache/xindice/util/XMLUtilities.java Sun Mar  9 17:52:12 2008
@@ -0,0 +1,225 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * $Id$
+ */
+
+package org.apache.xindice.util;
+
+/**
+ * Set of XML-related utilities.
+ *
+ * @version $Revision$, $Date$
+ */
+public class XMLUtilities {
+    private static final String REPLACEMENT = "�";
+
+    /**
+     * Converts input text into its XML representation by escaping all special symbols,
+     * if any are present.
+     *
+     * @param value Input array
+     * @param offset Start position in the array
+     * @param length Number of characters to process
+     * @param strict Method will throw an exception when it encounter illegal surrogate
+     * character if <code>strict</code> is true, otherwise illegal surrogate character
+     * will be replaced by character \uFFFD.
+     * @return String with all the special symbols escaped
+     * @throws XindiceRuntimeException If <code>strict</code> is true and <code>value</code>
+     * contains illegal surrogate character
+     */
+    public static String escape(char[] value, int offset, int length, boolean strict) {
+        StringBuffer buf = new StringBuffer();
+        int start = offset;
+        int blockLength = 0;
+
+        for (int i = offset; i < length; i++) {
+            String outval = escape(value[i], strict);
+
+            if (outval == null) {
+                if (isLeadingSurrogate(value[i])) {
+                    if (i + 1 < length && isTrailingSurrogate(value[i + 1])) {
+                        outval = getSurrogateValue(value[i], value[i + 1]);
+                        i++;
+                    } else {
+                        if (strict) {
+                            throw new XindiceRuntimeException("Leading surrogate &#" + Integer.toString(value[i]) + ";" +
+                                                              "must be followed by trailing surrogate");
+                        } else {
+                            outval = REPLACEMENT;
+                        }
+                    }
+                } else {
+                    blockLength++;
+                }
+            }
+
+            if (outval != null) {
+                if (blockLength > 0) {
+                    buf.append(value, start, blockLength);
+                }
+                buf.append(outval);
+                start = i + 1;
+                blockLength = 0;
+            }
+        }
+
+        if (blockLength > 0 && start > offset) {
+            buf.append(value, start, blockLength);
+        }
+
+        return buf.length() > 0 ? buf.toString() : new String(value, offset, length);
+    }
+
+    /**
+     * Converts input text into its XML representation by escaping all special symbols,
+     * if any are present.
+     *
+     * @param value Input array
+     * @param offset Start position in the array
+     * @param length Number of characters to process
+     * @return String with all the special symbols escaped
+     */
+    public static String escape(char[] value, int offset, int length) {
+        return escape(value, offset, length, false);
+    }
+
+    /**
+     * Converts input text into its XML representation by escaping all special symbols,
+     * if any are present.
+     *
+     * @param text Input string
+     * @param strict Method will throw an exception when it encounter illegal surrogate
+     * character if <code>strict</code> is true, otherwise illegal surrogate character
+     * will be replaced by character \uFFFD.
+     * @return String with all the special symbols escaped
+     * @throws XindiceRuntimeException If <code>strict</code> is true and <code>text</code>
+     * contains illegal surrogate character
+     */
+    public static String escape(String text, boolean strict) {
+        StringBuffer buf = null;
+        int length = text.length();
+
+        for (int i = 0; i < length; i++) {
+            char ch = text.charAt(i);
+            String outval = escape(ch, strict);
+
+            if (outval == null) {
+                if (isLeadingSurrogate(ch)) {
+                    if (i + 1 < length && isTrailingSurrogate(text.charAt(i + 1))) {
+                        outval = getSurrogateValue(ch, text.charAt(i + 1));
+
+                        if (buf == null) {
+                            buf = new StringBuffer(text.substring(0, i));
+                        }
+                        i++;
+                    } else {
+                        if (strict) {
+                            throw new XindiceRuntimeException("Leading surrogate &#" + Integer.toString(ch) + ";" +
+                                                              "must be followed by trailing surrogate");
+                        } else {
+                            outval = REPLACEMENT;
+                        }
+                    }
+                }
+            }
+
+            if (outval != null && buf == null) {
+                buf = new StringBuffer(text.substring(0, i));
+            }
+
+            if (outval != null) {
+                buf.append(outval);
+            } else if (buf != null) {
+                buf.append(ch);
+            }
+        }
+
+        return buf != null ? buf.toString() : text;
+    }
+
+    /**
+     * Converts input text into its XML representation by escaping all special symbols,
+     * if any are present.
+     *
+     * @param text Input string
+     * @return String with all the special symbols escaped
+     */
+    public static String escape(String text) {
+        return escape(text, false);
+    }
+
+    private static String escape(char ch, boolean strict) {
+        String outval = null;
+
+        switch (ch) {
+            case '&':
+                outval = "&amp;";
+                break;
+            case '\'':
+                outval = "&apos;";
+                break;
+            case '\"':
+                outval = "&quot;";
+                break;
+            case '<':
+                outval = "&lt;";
+                break;
+            case '>':
+                outval = "&gt;";
+                break;
+            default:
+                if (isTrailingSurrogate(ch)) {
+                    if (strict) {
+                        throw new XindiceRuntimeException("Trailing surrogate &#" + Integer.toString(ch) +
+                                                          "; must follow leading surrogate");
+                    } else {
+                        outval = REPLACEMENT;
+                    }
+                } else if (!isLeadingSurrogate(ch) && !isLegal(ch)) {
+                    outval = "&#" + Integer.toString(ch) + ";";
+                }
+                break;
+        }
+
+        return outval;
+    }
+
+    private static boolean isLegal(char ch) {
+        return ch == 0x9 || ch == 0xA || ch == 0xD ||
+               (ch >= 0x20 && ch <= 0xD7FF) ||
+               (ch >= 0xE000 && ch <= 0xFFFD);
+    }
+
+    /**
+     * Converts UTF-16 surrogate pair to UTF-8
+     * @param high Leading surrogate
+     * @param low  Trailing surrogate
+     * @return String with escaped 4-byte value
+     */
+    private static String getSurrogateValue(char high, char low) {
+        int val = (high & 0x3FF) << 10 | (low & 0x3FF) + 0x10000;
+        return "&#" + Integer.toString(val) + ";";
+    }
+
+    private static boolean isLeadingSurrogate(char ch) {
+        return ch >= 0xD800 && ch <= 0xDBFF;
+    }
+
+    private static boolean isTrailingSurrogate(char ch) {
+        return ch >= 0xDC00 && ch <= 0xDFFF;
+    }
+}

Propchange: xml/xindice/trunk/java/src/org/apache/xindice/util/XMLUtilities.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: xml/xindice/trunk/java/src/org/apache/xindice/util/XMLUtilities.java
------------------------------------------------------------------------------
    svn:keywords = Id Revision Author Date

Modified: xml/xindice/trunk/java/src/org/apache/xindice/xml/sax/SetContentHandler.java
URL: http://svn.apache.org/viewvc/xml/xindice/trunk/java/src/org/apache/xindice/xml/sax/SetContentHandler.java?rev=635409&r1=635408&r2=635409&view=diff
==============================================================================
--- xml/xindice/trunk/java/src/org/apache/xindice/xml/sax/SetContentHandler.java (original)
+++ xml/xindice/trunk/java/src/org/apache/xindice/xml/sax/SetContentHandler.java Sun Mar  9 17:52:12 2008
@@ -21,7 +21,8 @@
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-
+import org.apache.xindice.util.XMLUtilities;
+import org.apache.xindice.util.XindiceRuntimeException;
 import org.xml.sax.Attributes;
 import org.xml.sax.SAXException;
 import org.xml.sax.helpers.DefaultHandler;
@@ -122,14 +123,12 @@
     }
 
     private String getQNameAtt(String uri, String localName) throws SAXException {
-
-        String prefix = null;
-
         if ("".equals(uri)) {
             return localName;
         }
 
         /* Look for prefix */
+        String prefix = null;
         Iterator prefixes = namespaces.keySet().iterator();
         while (prefixes.hasNext()) {
             String key = (String) prefixes.next();
@@ -149,8 +148,6 @@
 
 
     private String getQNameElement(String uri, String localName) throws SAXException {
-        String prefix = null;
-
         if ("".equals(uri)) {
             if (namespaces.get("") != null) {
                 throw new SAXException("default namespace is declared here!");
@@ -161,6 +158,7 @@
         }
 
         /* Look for prefix */
+        String prefix = null;
         Iterator prefixes = namespaces.keySet().iterator();
         while (prefixes.hasNext()) {
             String key = (String) prefixes.next();
@@ -189,8 +187,7 @@
      * @exception SAXException Description of Exception
      * @see org.xml.sax.ContentHandler#startElement
      */
-    public void startElement(String uri, String localName,
-                             String qName, Attributes attributes)
+    public void startElement(String uri, String localName, String qName, Attributes attributes)
             throws SAXException {
 
         newContent.append("<");
@@ -213,7 +210,11 @@
             newContent.append(qn);
             newContent.append("=");
             newContent.append("\"");
-            newContent.append(attributes.getValue(i));
+            try {
+                newContent.append(XMLUtilities.escape(attributes.getValue(i), true));
+            } catch (XindiceRuntimeException e) {
+                throw new SAXException(e);
+            }
             newContent.append("\"");
 
             // Avoid duplicate namespace declarations
@@ -277,38 +278,11 @@
      * @exception SAXException Description of Exception
      * @see org.xml.sax.ContentHandler#characters
      */
-    public void characters(char ch[], int start, int length)
-            throws SAXException {
-        int i = 0;
-        while (i < length) {
-            char c = ch[start + i];
-            switch (c) {
-                case '&':
-                    newContent.append("&amp;");
-                    break;
-                case '<':
-                    newContent.append("&lt;");
-                    break;
-                case '>':
-                    newContent.append("&gt;");
-                    break;
-                case '"':
-                    newContent.append("&quot;");
-                    break;
-                case '\'':
-                    newContent.append("&apos;");
-                    break;
-                default:
-                    // If we're outside 7 bit ascii encode as a character ref.
-                    // Not sure what the proper behavior here should be.
-                    if ((int) c > 127) {
-                        newContent.append("&#" + (int) c + ";");
-                    } else {
-                        newContent.append(c);
-                    }
-            }
-
-            i++;
+    public void characters(char ch[], int start, int length) throws SAXException {
+        try {
+            XMLUtilities.escape(ch, start, length, true);
+        } catch (XindiceRuntimeException e) {
+            throw new SAXException(e);
         }
     }