You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ws.apache.org by ve...@apache.org on 2016/05/14 16:21:14 UTC

svn commit: r1743836 - in /webservices/axiom/trunk/aspects/core-aspects/src: main/java/org/apache/axiom/core/stream/serializer/ main/java/org/apache/axiom/core/stream/serializer/writer/ test/java/org/apache/axiom/core/stream/serializer/

Author: veithen
Date: Sat May 14 16:21:14 2016
New Revision: 1743836

URL: http://svn.apache.org/viewvc?rev=1743836&view=rev
Log:
Delegate conversion of unmappable characters to character references to the XmlWriter. This also ensures that we handle the scenario described in XALANJ-2593 correctly.

Modified:
    webservices/axiom/trunk/aspects/core-aspects/src/main/java/org/apache/axiom/core/stream/serializer/ToStream.java
    webservices/axiom/trunk/aspects/core-aspects/src/main/java/org/apache/axiom/core/stream/serializer/writer/AbstractXmlWriter.java
    webservices/axiom/trunk/aspects/core-aspects/src/main/java/org/apache/axiom/core/stream/serializer/writer/UnmappableCharacterHandler.java
    webservices/axiom/trunk/aspects/core-aspects/src/test/java/org/apache/axiom/core/stream/serializer/SerializerTest.java

Modified: webservices/axiom/trunk/aspects/core-aspects/src/main/java/org/apache/axiom/core/stream/serializer/ToStream.java
URL: http://svn.apache.org/viewvc/webservices/axiom/trunk/aspects/core-aspects/src/main/java/org/apache/axiom/core/stream/serializer/ToStream.java?rev=1743836&r1=1743835&r2=1743836&view=diff
==============================================================================
--- webservices/axiom/trunk/aspects/core-aspects/src/main/java/org/apache/axiom/core/stream/serializer/ToStream.java (original)
+++ webservices/axiom/trunk/aspects/core-aspects/src/main/java/org/apache/axiom/core/stream/serializer/ToStream.java Sat May 14 16:21:14 2016
@@ -34,6 +34,7 @@ import javax.xml.transform.OutputKeys;
 import org.apache.axiom.core.stream.StreamException;
 import org.apache.axiom.core.stream.serializer.utils.MsgKey;
 import org.apache.axiom.core.stream.serializer.utils.Utils;
+import org.apache.axiom.core.stream.serializer.writer.UnmappableCharacterHandler;
 import org.apache.axiom.core.stream.serializer.writer.WriterXmlWriter;
 import org.apache.axiom.core.stream.serializer.writer.XmlWriter;
 
@@ -1229,21 +1230,6 @@ abstract public class ToStream extends S
                         writer.write("
");
                         lastDirtyCharProcessed = i;
                     }
-                    else if (m_encodingInfo.isInEncoding(ch)) {
-                        // If the character is in the encoding, and
-                        // not in the normal ASCII range, we also
-                        // just leave it get added on to the clean characters
-                        
-                    }
-                    else {
-                        // This is a fallback plan, we should never get here
-                        // but if the character wasn't previously handled
-                        // (i.e. isn't in the encoding, etc.) then what
-                        // should we do?  We choose to write out an entity
-                        writeOutCleanChars(chars, i, lastDirtyCharProcessed);
-                        writer.writeCharacterReference(ch);
-                        lastDirtyCharProcessed = i;
-                    }
                 }
             }
             
@@ -1527,6 +1513,7 @@ abstract public class ToStream extends S
             m_startNewLine = true;
 
             final XmlWriter writer = m_writer;
+            writer.setUnmappableCharacterHandler(UnmappableCharacterHandler.THROW_EXCEPTION);
             writer.write('<');
             writer.write(name);
         }
@@ -1625,6 +1612,7 @@ abstract public class ToStream extends S
         String string)
         throws IOException
     {
+        writer.setUnmappableCharacterHandler(UnmappableCharacterHandler.CONVERT_TO_CHARACTER_REFERENCE);
         final int len = string.length();
         if (len > m_attrBuff.length)
         {
@@ -1690,22 +1678,12 @@ abstract public class ToStream extends S
                     // LINE SEPARATOR
                     writer.write("&#8232;");
                 }
-                else if (m_encodingInfo.isInEncoding(ch)) {
-                    // If the character is in the encoding, and
-                    // not in the normal ASCII range, we also
-                    // just write it out
-                    writer.write(ch);
-                }
                 else {
-                    // This is a fallback plan, we should never get here
-                    // but if the character wasn't previously handled
-                    // (i.e. isn't in the encoding, etc.) then what
-                    // should we do?  We choose to write out a character ref
-                    writer.writeCharacterReference(ch);
+                    writer.write(ch);
                 }
-                    
             }
         }
+        writer.setUnmappableCharacterHandler(UnmappableCharacterHandler.THROW_EXCEPTION);
     }
 
     /**
@@ -1747,10 +1725,12 @@ abstract public class ToStream extends S
                 if (m_cdataTagOpen)
                     closeCDATA();
 
+                writer.setUnmappableCharacterHandler(UnmappableCharacterHandler.THROW_EXCEPTION);
                 writer.write('<');
                 writer.write('/');
                 writer.write(name);
                 writer.write('>');
+                writer.setUnmappableCharacterHandler(UnmappableCharacterHandler.CONVERT_TO_CHARACTER_REFERENCE);
             }
         }
         catch (IOException e)
@@ -1942,6 +1922,7 @@ abstract public class ToStream extends S
             try
             {
                 m_writer.write('>');
+                m_writer.setUnmappableCharacterHandler(UnmappableCharacterHandler.CONVERT_TO_CHARACTER_REFERENCE);
             }
             catch (IOException e)
             {

Modified: webservices/axiom/trunk/aspects/core-aspects/src/main/java/org/apache/axiom/core/stream/serializer/writer/AbstractXmlWriter.java
URL: http://svn.apache.org/viewvc/webservices/axiom/trunk/aspects/core-aspects/src/main/java/org/apache/axiom/core/stream/serializer/writer/AbstractXmlWriter.java?rev=1743836&r1=1743835&r2=1743836&view=diff
==============================================================================
--- webservices/axiom/trunk/aspects/core-aspects/src/main/java/org/apache/axiom/core/stream/serializer/writer/AbstractXmlWriter.java (original)
+++ webservices/axiom/trunk/aspects/core-aspects/src/main/java/org/apache/axiom/core/stream/serializer/writer/AbstractXmlWriter.java Sat May 14 16:21:14 2016
@@ -44,11 +44,17 @@ abstract class AbstractXmlWriter extends
     public final void write(char c) throws IOException {
         if (highSurrogate != 0) {
             if (Character.isLowSurrogate(c)) {
-                writeCharacter(Character.toCodePoint(highSurrogate, c));
+                int codePoint = Character.toCodePoint(highSurrogate, c);
+                // Need to reset highSurrogate before writing because the character
+                // may be unmappable, resulting in a character reference being written
+                // (which means that this method must be reentrant).
                 highSurrogate = 0;
+                writeCharacter(codePoint);
             } else {
                 throw new IOException("Invalid surrogate pair");
             }
+        } else if (Character.isHighSurrogate(c)) {
+            highSurrogate = c;
         } else if (Character.isLowSurrogate(c)) {
             throw new IOException("Invalid surrogate pair");
         } else {

Modified: webservices/axiom/trunk/aspects/core-aspects/src/main/java/org/apache/axiom/core/stream/serializer/writer/UnmappableCharacterHandler.java
URL: http://svn.apache.org/viewvc/webservices/axiom/trunk/aspects/core-aspects/src/main/java/org/apache/axiom/core/stream/serializer/writer/UnmappableCharacterHandler.java?rev=1743836&r1=1743835&r2=1743836&view=diff
==============================================================================
--- webservices/axiom/trunk/aspects/core-aspects/src/main/java/org/apache/axiom/core/stream/serializer/writer/UnmappableCharacterHandler.java (original)
+++ webservices/axiom/trunk/aspects/core-aspects/src/main/java/org/apache/axiom/core/stream/serializer/writer/UnmappableCharacterHandler.java Sat May 14 16:21:14 2016
@@ -20,20 +20,20 @@ package org.apache.axiom.core.stream.ser
 
 import java.io.IOException;
 
-abstract class UnmappableCharacterHandler {
-    static final UnmappableCharacterHandler THROW_EXCEPTION = new UnmappableCharacterHandler() {
+public abstract class UnmappableCharacterHandler {
+    public static final UnmappableCharacterHandler THROW_EXCEPTION = new UnmappableCharacterHandler() {
         @Override
-        void processUnmappableCharacter(int codePoint, XmlWriter writer) throws IOException {
+        public void processUnmappableCharacter(int codePoint, XmlWriter writer) throws IOException {
             throw new IOException("Unmappable character (code point " + codePoint + ")");
         }
     };
 
-    static final UnmappableCharacterHandler CONVERT_TO_CHARACTER_REFERENCE = new UnmappableCharacterHandler() {
+    public static final UnmappableCharacterHandler CONVERT_TO_CHARACTER_REFERENCE = new UnmappableCharacterHandler() {
         @Override
-        void processUnmappableCharacter(int codePoint, XmlWriter writer) throws IOException {
+        public void processUnmappableCharacter(int codePoint, XmlWriter writer) throws IOException {
             writer.writeCharacterReference(codePoint);
         }
     };
 
-    abstract void processUnmappableCharacter(int codePoint, XmlWriter writer) throws IOException;
+    public abstract void processUnmappableCharacter(int codePoint, XmlWriter writer) throws IOException;
 }

Modified: webservices/axiom/trunk/aspects/core-aspects/src/test/java/org/apache/axiom/core/stream/serializer/SerializerTest.java
URL: http://svn.apache.org/viewvc/webservices/axiom/trunk/aspects/core-aspects/src/test/java/org/apache/axiom/core/stream/serializer/SerializerTest.java?rev=1743836&r1=1743835&r2=1743836&view=diff
==============================================================================
--- webservices/axiom/trunk/aspects/core-aspects/src/test/java/org/apache/axiom/core/stream/serializer/SerializerTest.java (original)
+++ webservices/axiom/trunk/aspects/core-aspects/src/test/java/org/apache/axiom/core/stream/serializer/SerializerTest.java Sat May 14 16:21:14 2016
@@ -67,6 +67,23 @@ public class SerializerTest {
         assertThat(new String(baos.toByteArray(), "ascii")).isEqualTo("<test attr=\"n&#233;ant\"/>");
     }
 
+    /**
+     * Tests the scenario described in XALANJ-2593.
+     * 
+     * @throws Exception
+     */
+    @Test
+    public void testUnmappableSurrogatePairInAttributeValue() throws Exception {
+        ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        SerializerXmlHandler handler = new SerializerXmlHandler(baos, "ascii");
+        handler.startElement("", "x", "");
+        handler.processAttribute("", "y", "", "\uD84C\uDFB4 - \uD841\uDE28", "CDATA", true);
+        handler.attributesCompleted();
+        handler.endElement();
+        handler.completed();
+        assertThat(new String(baos.toByteArray(), "ascii")).isEqualTo("<x y=\"&#144308; - &#132648;\"/>");
+    }
+
     @Test(expected=StreamException.class)
     public void testUnmappableCharacterInComment() throws Exception {
         SerializerXmlHandler handler = new SerializerXmlHandler(new NullOutputStream(), "iso-8859-1");