You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by gg...@apache.org on 2022/06/21 18:00:18 UTC
[commons-io] branch master updated: Add Add XmlStreamWriter(OutputStream, Charset)
This is an automated email from the ASF dual-hosted git repository.
ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-io.git
The following commit(s) were added to refs/heads/master by this push:
new 9a257182 Add Add XmlStreamWriter(OutputStream, Charset)
9a257182 is described below
commit 9a2571827af7fc47565a716fada2807d20d4524d
Author: Gary Gregory <ga...@gmail.com>
AuthorDate: Tue Jun 21 14:00:11 2022 -0400
Add Add XmlStreamWriter(OutputStream, Charset)
---
src/changes/changes.xml | 3 ++
.../apache/commons/io/output/XmlStreamWriter.java | 58 ++++++++++++++--------
.../commons/io/output/XmlStreamWriterTest.java | 37 ++++++++------
3 files changed, 63 insertions(+), 35 deletions(-)
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index f0675cd9..ae41dec8 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -397,6 +397,9 @@ The <action> type attribute can be add,update,fix,remove.
Add Charsets.toCharset(Charset, Charset).
Add Charsets.toCharset(String, Charset).
</action>
+ <action dev="ggregory" type="add" due-to="Gary Gregory">
+ Add XmlStreamWriter(OutputStream, Charset).
+ </action>
<!-- UPDATE -->
<action dev="kinow" type="update" due-to="Dependabot, Gary Gregory">
Bump actions/cache from 2.1.6 to 3.0.4 #307, #337.
diff --git a/src/main/java/org/apache/commons/io/output/XmlStreamWriter.java b/src/main/java/org/apache/commons/io/output/XmlStreamWriter.java
index 4b3ba09e..37233fff 100644
--- a/src/main/java/org/apache/commons/io/output/XmlStreamWriter.java
+++ b/src/main/java/org/apache/commons/io/output/XmlStreamWriter.java
@@ -24,9 +24,13 @@ import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.StringWriter;
import java.io.Writer;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
import java.util.Locale;
+import java.util.Objects;
import java.util.regex.Matcher;
+import org.apache.commons.io.Charsets;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.input.XmlStreamReader;
@@ -43,13 +47,13 @@ public class XmlStreamWriter extends Writer {
private final OutputStream out;
- private final String defaultEncoding;
+ private final Charset defaultCharset;
- private StringWriter xmlPrologWriter = new StringWriter(BUFFER_SIZE);
+ private StringWriter prologWriter = new StringWriter(BUFFER_SIZE);
private Writer writer;
- private String encoding;
+ private Charset charset;
/**
* Constructs a new XML stream writer for the specified file
@@ -84,7 +88,7 @@ public class XmlStreamWriter extends Writer {
* @param out The output stream
*/
public XmlStreamWriter(final OutputStream out) {
- this(out, null);
+ this(out, StandardCharsets.UTF_8);
}
/**
@@ -95,8 +99,20 @@ public class XmlStreamWriter extends Writer {
* @param defaultEncoding The default encoding if not encoding could be detected
*/
public XmlStreamWriter(final OutputStream out, final String defaultEncoding) {
+ this(out, Charsets.toCharset(defaultEncoding, StandardCharsets.UTF_8));
+ }
+
+ /**
+ * Constructs a new XML stream writer for the specified output stream
+ * with the specified default encoding.
+ *
+ * @param out The output stream
+ * @param defaultEncoding The default encoding if not encoding could be detected
+ * @since 2.12.0
+ */
+ public XmlStreamWriter(final OutputStream out, final Charset defaultEncoding) {
this.out = out;
- this.defaultEncoding = defaultEncoding != null ? defaultEncoding : "UTF-8";
+ this.defaultCharset = Objects.requireNonNull(defaultEncoding);
}
/**
@@ -107,9 +123,9 @@ public class XmlStreamWriter extends Writer {
@Override
public void close() throws IOException {
if (writer == null) {
- encoding = defaultEncoding;
- writer = new OutputStreamWriter(out, encoding);
- writer.write(xmlPrologWriter.toString());
+ charset = defaultCharset;
+ writer = new OutputStreamWriter(out, charset);
+ writer.write(prologWriter.toString());
}
writer.close();
}
@@ -125,11 +141,11 @@ public class XmlStreamWriter extends Writer {
private void detectEncoding(final char[] cbuf, final int off, final int len)
throws IOException {
int size = len;
- final StringBuffer xmlProlog = xmlPrologWriter.getBuffer();
+ final StringBuffer xmlProlog = prologWriter.getBuffer();
if (xmlProlog.length() + len > BUFFER_SIZE) {
size = BUFFER_SIZE - xmlProlog.length();
}
- xmlPrologWriter.write(cbuf, off, size);
+ prologWriter.write(cbuf, off, size);
// try to determine encoding
if (xmlProlog.length() >= 5) {
@@ -141,26 +157,26 @@ public class XmlStreamWriter extends Writer {
final Matcher m = XmlStreamReader.ENCODING_PATTERN.matcher(xmlProlog.substring(0,
xmlPrologEnd));
if (m.find()) {
- encoding = m.group(1).toUpperCase(Locale.ROOT);
- encoding = encoding.substring(1, encoding.length() - 1);
+ final String encName = m.group(1).toUpperCase(Locale.ROOT);
+ charset = Charset.forName(encName.substring(1, encName.length() - 1));
} else {
// no encoding found in XML prolog: using default
// encoding
- encoding = defaultEncoding;
+ charset = defaultCharset;
}
} else if (xmlProlog.length() >= BUFFER_SIZE) {
// no encoding found in first characters: using default
// encoding
- encoding = defaultEncoding;
+ charset = defaultCharset;
}
} else {
// no XML prolog: using default encoding
- encoding = defaultEncoding;
+ charset = defaultCharset;
}
- if (encoding != null) {
+ if (charset != null) {
// encoding has been chosen: let's do it
- xmlPrologWriter = null;
- writer = new OutputStreamWriter(out, encoding);
+ prologWriter = null;
+ writer = new OutputStreamWriter(out, charset);
writer.write(xmlProlog.toString());
if (len > size) {
writer.write(cbuf, off + size, len - size);
@@ -187,7 +203,7 @@ public class XmlStreamWriter extends Writer {
* @return the default encoding
*/
public String getDefaultEncoding() {
- return defaultEncoding;
+ return defaultCharset.name();
}
/**
@@ -196,7 +212,7 @@ public class XmlStreamWriter extends Writer {
* @return the detected encoding
*/
public String getEncoding() {
- return encoding;
+ return charset.name();
}
/**
@@ -209,7 +225,7 @@ public class XmlStreamWriter extends Writer {
*/
@Override
public void write(final char[] cbuf, final int off, final int len) throws IOException {
- if (xmlPrologWriter != null) {
+ if (prologWriter != null) {
detectEncoding(cbuf, off, len);
} else {
writer.write(cbuf, off, len);
diff --git a/src/test/java/org/apache/commons/io/output/XmlStreamWriterTest.java b/src/test/java/org/apache/commons/io/output/XmlStreamWriterTest.java
index 3526b516..a32c02d7 100644
--- a/src/test/java/org/apache/commons/io/output/XmlStreamWriterTest.java
+++ b/src/test/java/org/apache/commons/io/output/XmlStreamWriterTest.java
@@ -16,11 +16,14 @@
*/
package org.apache.commons.io.output;
+import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
import org.junit.jupiter.api.Test;
import org.junitpioneer.jupiter.DefaultLocale;
@@ -28,28 +31,35 @@ import org.junitpioneer.jupiter.DefaultLocale;
/**
*/
public class XmlStreamWriterTest {
- /** french */
+
+ /** French */
private static final String TEXT_LATIN1 = "eacute: \u00E9";
- /** greek */
+
+ /** Greek */
private static final String TEXT_LATIN7 = "alpha: \u03B1";
- /** euro support */
+
+ /** Euro support */
private static final String TEXT_LATIN15 = "euro: \u20AC";
- /** japanese */
+
+ /** Japanese */
private static final String TEXT_EUC_JP = "hiragana A: \u3042";
+
/** Unicode: support everything */
private static final String TEXT_UNICODE = TEXT_LATIN1 + ", " + TEXT_LATIN7
+ ", " + TEXT_LATIN15 + ", " + TEXT_EUC_JP;
- private static void checkXmlContent(final String xml, final String encoding, final String defaultEncoding)
- throws IOException {
+ private static void checkXmlContent(final String xml, final String encodingName, final String defaultEncodingName)
+ throws IOException {
final ByteArrayOutputStream out = new ByteArrayOutputStream();
- final XmlStreamWriter writer = new XmlStreamWriter(out, defaultEncoding);
+ final XmlStreamWriter writer = new XmlStreamWriter(out, defaultEncodingName);
writer.write(xml);
writer.close();
final byte[] xmlContent = out.toByteArray();
- assertTrue(encoding.equalsIgnoreCase(writer.getEncoding()));
- assertArrayEquals(xml.getBytes(encoding), xmlContent);
-
+ final Charset charset = Charset.forName(encodingName);
+ final Charset writerCharset = Charset.forName(writer.getEncoding());
+ assertEquals(charset, writerCharset);
+ assertTrue(writerCharset.contains(charset), writerCharset.name());
+ assertArrayEquals(xml.getBytes(encodingName), xmlContent);
}
private static void checkXmlWriter(final String text, final String encoding)
@@ -62,7 +72,7 @@ public class XmlStreamWriterTest {
final String xml = createXmlContent(text, encoding);
String effectiveEncoding = encoding;
if (effectiveEncoding == null) {
- effectiveEncoding = defaultEncoding == null ? "UTF-8" : defaultEncoding;
+ effectiveEncoding = defaultEncoding == null ? StandardCharsets.UTF_8.name() : defaultEncoding;
}
checkXmlContent(xml, effectiveEncoding, defaultEncoding);
}
@@ -121,7 +131,7 @@ public class XmlStreamWriterTest {
checkXmlWriter(TEXT_LATIN7, "ISO-8859-7");
}
- // Turkish language has specific rules to convert dotted and dottless i character.
+ /** Turkish language has specific rules to convert dotted and dottless i character. */
@Test
@DefaultLocale(language = "tr")
public void testLowerCaseEncodingWithTurkishLocale_IO_557() throws IOException {
@@ -132,8 +142,7 @@ public class XmlStreamWriterTest {
@Test
public void testNoXmlHeader() throws IOException {
- final String xml = "<text>text with no XML header</text>";
- checkXmlContent(xml, "UTF-8", null);
+ checkXmlContent("<text>text with no XML header</text>", "UTF-8", null);
}
@Test