You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by gg...@apache.org on 2022/06/21 18:00:18 UTC

[commons-io] branch master updated: Add Add XmlStreamWriter(OutputStream, Charset)

This is an automated email from the ASF dual-hosted git repository.

ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-io.git


The following commit(s) were added to refs/heads/master by this push:
     new 9a257182 Add Add XmlStreamWriter(OutputStream, Charset)
9a257182 is described below

commit 9a2571827af7fc47565a716fada2807d20d4524d
Author: Gary Gregory <ga...@gmail.com>
AuthorDate: Tue Jun 21 14:00:11 2022 -0400

    Add Add XmlStreamWriter(OutputStream, Charset)
---
 src/changes/changes.xml                            |  3 ++
 .../apache/commons/io/output/XmlStreamWriter.java  | 58 ++++++++++++++--------
 .../commons/io/output/XmlStreamWriterTest.java     | 37 ++++++++------
 3 files changed, 63 insertions(+), 35 deletions(-)

diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index f0675cd9..ae41dec8 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -397,6 +397,9 @@ The <action> type attribute can be add,update,fix,remove.
         Add Charsets.toCharset(Charset, Charset).
         Add Charsets.toCharset(String, Charset).
       </action>
+      <action dev="ggregory" type="add" due-to="Gary Gregory">
+        Add XmlStreamWriter(OutputStream, Charset).
+      </action>
       <!-- UPDATE -->
       <action dev="kinow" type="update" due-to="Dependabot, Gary Gregory">
         Bump actions/cache from 2.1.6 to 3.0.4 #307, #337.
diff --git a/src/main/java/org/apache/commons/io/output/XmlStreamWriter.java b/src/main/java/org/apache/commons/io/output/XmlStreamWriter.java
index 4b3ba09e..37233fff 100644
--- a/src/main/java/org/apache/commons/io/output/XmlStreamWriter.java
+++ b/src/main/java/org/apache/commons/io/output/XmlStreamWriter.java
@@ -24,9 +24,13 @@ import java.io.OutputStream;
 import java.io.OutputStreamWriter;
 import java.io.StringWriter;
 import java.io.Writer;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
 import java.util.Locale;
+import java.util.Objects;
 import java.util.regex.Matcher;
 
+import org.apache.commons.io.Charsets;
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.io.input.XmlStreamReader;
 
@@ -43,13 +47,13 @@ public class XmlStreamWriter extends Writer {
 
     private final OutputStream out;
 
-    private final String defaultEncoding;
+    private final Charset defaultCharset;
 
-    private StringWriter xmlPrologWriter = new StringWriter(BUFFER_SIZE);
+    private StringWriter prologWriter = new StringWriter(BUFFER_SIZE);
 
     private Writer writer;
 
-    private String encoding;
+    private Charset charset;
 
     /**
      * Constructs a new XML stream writer for the specified file
@@ -84,7 +88,7 @@ public class XmlStreamWriter extends Writer {
      * @param out The output stream
      */
     public XmlStreamWriter(final OutputStream out) {
-        this(out, null);
+        this(out, StandardCharsets.UTF_8);
     }
 
     /**
@@ -95,8 +99,20 @@ public class XmlStreamWriter extends Writer {
      * @param defaultEncoding The default encoding if not encoding could be detected
      */
     public XmlStreamWriter(final OutputStream out, final String defaultEncoding) {
+        this(out, Charsets.toCharset(defaultEncoding, StandardCharsets.UTF_8));
+    }
+
+    /**
+     * Constructs a new XML stream writer for the specified output stream
+     * with the specified default encoding.
+     *
+     * @param out The output stream
+     * @param defaultEncoding The default encoding if not encoding could be detected
+     * @since 2.12.0
+     */
+    public XmlStreamWriter(final OutputStream out, final Charset defaultEncoding) {
         this.out = out;
-        this.defaultEncoding = defaultEncoding != null ? defaultEncoding : "UTF-8";
+        this.defaultCharset = Objects.requireNonNull(defaultEncoding);
     }
 
     /**
@@ -107,9 +123,9 @@ public class XmlStreamWriter extends Writer {
     @Override
     public void close() throws IOException {
         if (writer == null) {
-            encoding = defaultEncoding;
-            writer = new OutputStreamWriter(out, encoding);
-            writer.write(xmlPrologWriter.toString());
+            charset = defaultCharset;
+            writer = new OutputStreamWriter(out, charset);
+            writer.write(prologWriter.toString());
         }
         writer.close();
     }
@@ -125,11 +141,11 @@ public class XmlStreamWriter extends Writer {
     private void detectEncoding(final char[] cbuf, final int off, final int len)
             throws IOException {
         int size = len;
-        final StringBuffer xmlProlog = xmlPrologWriter.getBuffer();
+        final StringBuffer xmlProlog = prologWriter.getBuffer();
         if (xmlProlog.length() + len > BUFFER_SIZE) {
             size = BUFFER_SIZE - xmlProlog.length();
         }
-        xmlPrologWriter.write(cbuf, off, size);
+        prologWriter.write(cbuf, off, size);
 
         // try to determine encoding
         if (xmlProlog.length() >= 5) {
@@ -141,26 +157,26 @@ public class XmlStreamWriter extends Writer {
                     final Matcher m = XmlStreamReader.ENCODING_PATTERN.matcher(xmlProlog.substring(0,
                             xmlPrologEnd));
                     if (m.find()) {
-                        encoding = m.group(1).toUpperCase(Locale.ROOT);
-                        encoding = encoding.substring(1, encoding.length() - 1);
+                        final String encName = m.group(1).toUpperCase(Locale.ROOT);
+                        charset = Charset.forName(encName.substring(1, encName.length() - 1));
                     } else {
                         // no encoding found in XML prolog: using default
                         // encoding
-                        encoding = defaultEncoding;
+                        charset = defaultCharset;
                     }
                 } else if (xmlProlog.length() >= BUFFER_SIZE) {
                     // no encoding found in first characters: using default
                     // encoding
-                    encoding = defaultEncoding;
+                    charset = defaultCharset;
                 }
             } else {
                 // no XML prolog: using default encoding
-                encoding = defaultEncoding;
+                charset = defaultCharset;
             }
-            if (encoding != null) {
+            if (charset != null) {
                 // encoding has been chosen: let's do it
-                xmlPrologWriter = null;
-                writer = new OutputStreamWriter(out, encoding);
+                prologWriter = null;
+                writer = new OutputStreamWriter(out, charset);
                 writer.write(xmlProlog.toString());
                 if (len > size) {
                     writer.write(cbuf, off + size, len - size);
@@ -187,7 +203,7 @@ public class XmlStreamWriter extends Writer {
      * @return the default encoding
      */
     public String getDefaultEncoding() {
-        return defaultEncoding;
+        return defaultCharset.name();
     }
 
     /**
@@ -196,7 +212,7 @@ public class XmlStreamWriter extends Writer {
      * @return the detected encoding
      */
     public String getEncoding() {
-        return encoding;
+        return charset.name();
     }
 
     /**
@@ -209,7 +225,7 @@ public class XmlStreamWriter extends Writer {
      */
     @Override
     public void write(final char[] cbuf, final int off, final int len) throws IOException {
-        if (xmlPrologWriter != null) {
+        if (prologWriter != null) {
             detectEncoding(cbuf, off, len);
         } else {
             writer.write(cbuf, off, len);
diff --git a/src/test/java/org/apache/commons/io/output/XmlStreamWriterTest.java b/src/test/java/org/apache/commons/io/output/XmlStreamWriterTest.java
index 3526b516..a32c02d7 100644
--- a/src/test/java/org/apache/commons/io/output/XmlStreamWriterTest.java
+++ b/src/test/java/org/apache/commons/io/output/XmlStreamWriterTest.java
@@ -16,11 +16,14 @@
  */
 package org.apache.commons.io.output;
 
+import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertArrayEquals;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
 
 import org.junit.jupiter.api.Test;
 import org.junitpioneer.jupiter.DefaultLocale;
@@ -28,28 +31,35 @@ import org.junitpioneer.jupiter.DefaultLocale;
 /**
  */
 public class XmlStreamWriterTest {
-    /** french */
+
+    /** French */
     private static final String TEXT_LATIN1 = "eacute: \u00E9";
-    /** greek */
+
+    /** Greek */
     private static final String TEXT_LATIN7 = "alpha: \u03B1";
-    /** euro support */
+
+    /** Euro support */
     private static final String TEXT_LATIN15 = "euro: \u20AC";
-    /** japanese */
+
+    /** Japanese */
     private static final String TEXT_EUC_JP = "hiragana A: \u3042";
+
     /** Unicode: support everything */
     private static final String TEXT_UNICODE = TEXT_LATIN1 + ", " + TEXT_LATIN7
             + ", " + TEXT_LATIN15 + ", " + TEXT_EUC_JP;
 
-    private static void checkXmlContent(final String xml, final String encoding, final String defaultEncoding)
-            throws IOException {
+    private static void checkXmlContent(final String xml, final String encodingName, final String defaultEncodingName)
+        throws IOException {
         final ByteArrayOutputStream out = new ByteArrayOutputStream();
-        final XmlStreamWriter writer = new XmlStreamWriter(out, defaultEncoding);
+        final XmlStreamWriter writer = new XmlStreamWriter(out, defaultEncodingName);
         writer.write(xml);
         writer.close();
         final byte[] xmlContent = out.toByteArray();
-        assertTrue(encoding.equalsIgnoreCase(writer.getEncoding()));
-        assertArrayEquals(xml.getBytes(encoding), xmlContent);
-
+        final Charset charset = Charset.forName(encodingName);
+        final Charset writerCharset = Charset.forName(writer.getEncoding());
+        assertEquals(charset, writerCharset);
+        assertTrue(writerCharset.contains(charset), writerCharset.name());
+        assertArrayEquals(xml.getBytes(encodingName), xmlContent);
     }
 
     private static void checkXmlWriter(final String text, final String encoding)
@@ -62,7 +72,7 @@ public class XmlStreamWriterTest {
         final String xml = createXmlContent(text, encoding);
         String effectiveEncoding = encoding;
         if (effectiveEncoding == null) {
-            effectiveEncoding = defaultEncoding == null ? "UTF-8" : defaultEncoding;
+            effectiveEncoding = defaultEncoding == null ? StandardCharsets.UTF_8.name() : defaultEncoding;
         }
         checkXmlContent(xml, effectiveEncoding, defaultEncoding);
     }
@@ -121,7 +131,7 @@ public class XmlStreamWriterTest {
         checkXmlWriter(TEXT_LATIN7, "ISO-8859-7");
     }
 
-    // Turkish language has specific rules to convert dotted and dottless i character.
+    /** Turkish language has specific rules to convert dotted and dottless i character. */
     @Test
     @DefaultLocale(language = "tr")
     public void testLowerCaseEncodingWithTurkishLocale_IO_557() throws IOException {
@@ -132,8 +142,7 @@ public class XmlStreamWriterTest {
 
     @Test
     public void testNoXmlHeader() throws IOException {
-        final String xml = "<text>text with no XML header</text>";
-        checkXmlContent(xml, "UTF-8", null);
+        checkXmlContent("<text>text with no XML header</text>", "UTF-8", null);
     }
 
     @Test