You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ws.apache.org by ve...@apache.org on 2022/11/08 23:28:25 UTC

[ws-axiom] branch master updated: [AXIOM-516] Make the serializer use hex numbers in character references

This is an automated email from the ASF dual-hosted git repository.

veithen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ws-axiom.git


The following commit(s) were added to refs/heads/master by this push:
     new 2aa7e79d0 [AXIOM-516] Make the serializer use hex numbers in character references
2aa7e79d0 is described below

commit 2aa7e79d0138cab1b895d08848eec4f5ce309d7d
Author: Andreas Veithen <an...@gmail.com>
AuthorDate: Tue Nov 8 23:28:17 2022 +0000

    [AXIOM-516] Make the serializer use hex numbers in character references
    
    Woodstox' XMLStreamWriter implementation generates character references
    using hexadecimal numbers. However, Axiom now uses its own serializer,
    and that generates character references with decimal numbers. That
    change is perfectly valid, but causes problems for broken clients.
---
 .../org/apache/axiom/core/stream/serializer/Serializer.java    |  8 ++++----
 .../apache/axiom/core/stream/serializer/writer/XmlWriter.java  | 10 +++++++---
 .../apache/axiom/core/stream/serializer/SerializerTest.java    |  6 +++---
 .../stream/serializer/writer/OutputStreamXmlWriterTest.java    |  2 +-
 4 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/components/core-streams/src/main/java/org/apache/axiom/core/stream/serializer/Serializer.java b/components/core-streams/src/main/java/org/apache/axiom/core/stream/serializer/Serializer.java
index ab8754895..6beb3bfcd 100644
--- a/components/core-streams/src/main/java/org/apache/axiom/core/stream/serializer/Serializer.java
+++ b/components/core-streams/src/main/java/org/apache/axiom/core/stream/serializer/Serializer.java
@@ -386,16 +386,16 @@ public final class Serializer implements XmlHandler, CharacterDataSink {
                         switch (ch) {
                             case 0x09:
                                 if (context == ATTRIBUTE_VALUE) {
-                                    replacement = "&#9;";
+                                    replacement = "&#x9;";
                                 }
                                 break;
                             case 0x0A:
                                 if (context == ATTRIBUTE_VALUE) {
-                                    replacement = "&#10;";
+                                    replacement = "&#xa;";
                                 }
                                 break;
                             case 0x0D:
-                                replacement = "&#13;";
+                                replacement = "&#xd;";
                                 // Leave whitespace carriage return as a real character
                                 break;
                             default:
@@ -426,7 +426,7 @@ public final class Serializer implements XmlHandler, CharacterDataSink {
                         generateCharacterReference = true;
                     } else if (ch == 0x2028) {
                         // LINE SEPARATOR
-                        replacement = "&#8232;";
+                        replacement = "&#x2028;";
                     }
 
                     if (ch == ']') {
diff --git a/components/core-streams/src/main/java/org/apache/axiom/core/stream/serializer/writer/XmlWriter.java b/components/core-streams/src/main/java/org/apache/axiom/core/stream/serializer/writer/XmlWriter.java
index 0904b053b..1dcace5c2 100644
--- a/components/core-streams/src/main/java/org/apache/axiom/core/stream/serializer/writer/XmlWriter.java
+++ b/components/core-streams/src/main/java/org/apache/axiom/core/stream/serializer/writer/XmlWriter.java
@@ -58,9 +58,13 @@ public abstract class XmlWriter {
     public abstract void flushBuffer() throws IOException;
 
     public final void writeCharacterReference(int codePoint) throws IOException {
-        write("&#");
-        // TODO: optimize this
-        write(Integer.toString(codePoint));
+        write("&#x");
+        int digits =
+                Math.max(((Integer.SIZE - Integer.numberOfLeadingZeros(codePoint)) + 3) / 4, 1);
+        for (int i = digits - 1; i >= 0; i--) {
+            int digit = (codePoint >> (4 * i)) & 0xf;
+            write((char) (digit + (digit < 10 ? '0' : 'a' - 10)));
+        }
         write(';');
     }
 }
diff --git a/components/core-streams/src/test/java/org/apache/axiom/core/stream/serializer/SerializerTest.java b/components/core-streams/src/test/java/org/apache/axiom/core/stream/serializer/SerializerTest.java
index 884ea49f3..60a1198c8 100644
--- a/components/core-streams/src/test/java/org/apache/axiom/core/stream/serializer/SerializerTest.java
+++ b/components/core-streams/src/test/java/org/apache/axiom/core/stream/serializer/SerializerTest.java
@@ -75,7 +75,7 @@ public class SerializerTest {
         handler.endElement();
         handler.completed();
         assertThat(new String(baos.toByteArray(), "iso-8859-15"))
-                .isEqualTo("<test>a&#931;\u20AC</test>");
+                .isEqualTo("<test>a&#x3a3;\u20AC</test>");
     }
 
     @Test
@@ -89,7 +89,7 @@ public class SerializerTest {
         handler.endElement();
         handler.completed();
         assertThat(new String(baos.toByteArray(), "ascii"))
-                .isEqualTo("<test attr=\"n&#233;ant\"/>");
+                .isEqualTo("<test attr=\"n&#xe9;ant\"/>");
     }
 
     /**
@@ -108,7 +108,7 @@ public class SerializerTest {
         handler.endElement();
         handler.completed();
         assertThat(new String(baos.toByteArray(), "ascii"))
-                .isEqualTo("<x y=\"&#144308; - &#132648;\"/>");
+                .isEqualTo("<x y=\"&#x233b4; - &#x20628;\"/>");
     }
 
     @Test(expected = StreamException.class)
diff --git a/components/core-streams/src/test/java/org/apache/axiom/core/stream/serializer/writer/OutputStreamXmlWriterTest.java b/components/core-streams/src/test/java/org/apache/axiom/core/stream/serializer/writer/OutputStreamXmlWriterTest.java
index df1ca8cb7..1291b281c 100644
--- a/components/core-streams/src/test/java/org/apache/axiom/core/stream/serializer/writer/OutputStreamXmlWriterTest.java
+++ b/components/core-streams/src/test/java/org/apache/axiom/core/stream/serializer/writer/OutputStreamXmlWriterTest.java
@@ -34,6 +34,6 @@ public class OutputStreamXmlWriterTest {
                 UnmappableCharacterHandler.CONVERT_TO_CHARACTER_REFERENCE);
         writer.write("abc\u20ACdef");
         writer.flushBuffer();
-        assertThat(baos.toString("iso-8859-1")).isEqualTo("abc&#8364;def");
+        assertThat(baos.toString("iso-8859-1")).isEqualTo("abc&#x20ac;def");
     }
 }