You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by gg...@apache.org on 2012/06/05 16:48:02 UTC
svn commit: r1346400 - in /commons/proper/io/trunk/src: changes/
main/java/org/apache/commons/io/input/ test/java/org/apache/commons/io/input/
test/java/org/apache/commons/io/input/compatibility/
Author: ggregory
Date: Tue Jun 5 14:48:01 2012
New Revision: 1346400
URL: http://svn.apache.org/viewvc?rev=1346400&view=rev
Log:
[IO-320] Add XmlStreamReader support for UTF-32.
[IO-331] BOMInputStream wrongly detects UTF-32LE_BOM files as UTF-16LE_BOM files in method getBOM().
Modified:
commons/proper/io/trunk/src/changes/changes.xml
commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/BOMInputStream.java
commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/XmlStreamReader.java
commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java
commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderUtilitiesTest.java
commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReader.java
commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReaderUtilitiesCompatibilityTest.java
Modified: commons/proper/io/trunk/src/changes/changes.xml
URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/changes/changes.xml?rev=1346400&r1=1346399&r2=1346400&view=diff
==============================================================================
--- commons/proper/io/trunk/src/changes/changes.xml (original)
+++ commons/proper/io/trunk/src/changes/changes.xml Tue Jun 5 14:48:01 2012
@@ -47,6 +47,12 @@ The <action> type attribute can be add,u
<body>
<!-- The release date is the date RC is cut -->
<release version="2.4" date="2012-TDB-TDB" description="">
+ <action issue="IO-320" dev="ggregory" type="add">
+ Add XmlStreamReader support for UTF-32.
+ </action>
+ <action issue="IO-331" dev="ggregory" type="add">
+ BOMInputStream wrongly detects UTF-32LE_BOM files as UTF-16LE_BOM files in method getBOM().
+ </action>
<action issue="IO-332" dev="ggregory" type="fix" due-to="liangly">
Improve tailer's reading performance.
</action>
Modified: commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/BOMInputStream.java
URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/BOMInputStream.java?rev=1346400&r1=1346399&r2=1346400&view=diff
==============================================================================
--- commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/BOMInputStream.java (original)
+++ commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/BOMInputStream.java Tue Jun 5 14:48:01 2012
@@ -19,54 +19,66 @@ package org.apache.commons.io.input;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
+import java.util.Comparator;
import java.util.List;
import org.apache.commons.io.ByteOrderMark;
/**
- * This class is used to wrap a stream that includes an encoded
- * {@link ByteOrderMark} as its first bytes.
- *
- * This class detects these bytes and, if required, can automatically skip them
- * and return the subsequent byte as the first byte in the stream.
- *
+ * This class is used to wrap a stream that includes an encoded {@link ByteOrderMark} as its first bytes.
+ *
+ * This class detects these bytes and, if required, can automatically skip them and return the subsequent byte as the
+ * first byte in the stream.
+ *
* The {@link ByteOrderMark} implementation has the following pre-defined BOMs:
* <ul>
- * <li>UTF-8 - {@link ByteOrderMark#UTF_8}</li>
- * <li>UTF-16BE - {@link ByteOrderMark#UTF_16LE}</li>
- * <li>UTF-16LE - {@link ByteOrderMark#UTF_16BE}</li>
+ * <li>UTF-8 - {@link ByteOrderMark#UTF_8}</li>
+ * <li>UTF-16BE - {@link ByteOrderMark#UTF_16LE}</li>
+ * <li>UTF-16LE - {@link ByteOrderMark#UTF_16BE}</li>
+ * <li>UTF-32BE - {@link ByteOrderMark#UTF_32LE}</li>
+ * <li>UTF-32LE - {@link ByteOrderMark#UTF_32BE}</li>
* </ul>
- *
- *
+ *
+ *
* <h3>Example 1 - Detect and exclude a UTF-8 BOM</h3>
+ *
* <pre>
- * BOMInputStream bomIn = new BOMInputStream(in);
- * if (bomIn.hasBOM()) {
- * // has a UTF-8 BOM
- * }
+ * BOMInputStream bomIn = new BOMInputStream(in);
+ * if (bomIn.hasBOM()) {
+ * // has a UTF-8 BOM
+ * }
* </pre>
- *
+ *
* <h3>Example 2 - Detect a UTF-8 BOM (but don't exclude it)</h3>
+ *
* <pre>
- * boolean include = true;
- * BOMInputStream bomIn = new BOMInputStream(in, include);
- * if (bomIn.hasBOM()) {
- * // has a UTF-8 BOM
- * }
+ * boolean include = true;
+ * BOMInputStream bomIn = new BOMInputStream(in, include);
+ * if (bomIn.hasBOM()) {
+ * // has a UTF-8 BOM
+ * }
* </pre>
- *
+ *
* <h3>Example 3 - Detect Multiple BOMs</h3>
+ *
* <pre>
- * BOMInputStream bomIn = new BOMInputStream(in, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE);
- * if (bomIn.hasBOM() == false) {
- * // No BOM found
- * } else if (bomIn.hasBOM(ByteOrderMark.UTF_16LE)) {
- * // has a UTF-16LE BOM
- * } else if (bomIn.hasBOM(ByteOrderMark.UTF_16BE)) {
- * // has a UTF-16BE BOM
- * }
+ * BOMInputStream bomIn = new BOMInputStream(in,
+ * ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE,
+ * ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE
+ * );
+ * if (bomIn.hasBOM() == false) {
+ * // No BOM found
+ * } else if (bomIn.hasBOM(ByteOrderMark.UTF_16LE)) {
+ * // has a UTF-16LE BOM
+ * } else if (bomIn.hasBOM(ByteOrderMark.UTF_16BE)) {
+ * // has a UTF-16BE BOM
+ * } else if (bomIn.hasBOM(ByteOrderMark.UTF_32LE)) {
+ * // has a UTF-32LE BOM
+ * } else if (bomIn.hasBOM(ByteOrderMark.UTF_32BE)) {
+ * // has a UTF-32BE BOM
+ * }
* </pre>
- *
+ *
* @see org.apache.commons.io.ByteOrderMark
* @see <a href="http://en.wikipedia.org/wiki/Byte_order_mark">Wikipedia - Byte Order Mark</a>
* @version $Id$
@@ -74,6 +86,9 @@ import org.apache.commons.io.ByteOrderMa
*/
public class BOMInputStream extends ProxyInputStream {
private final boolean include;
+ /**
+ * BOMs are sorted from longest to shortest.
+ */
private final List<ByteOrderMark> boms;
private ByteOrderMark byteOrderMark;
private int[] firstBytes;
@@ -83,42 +98,66 @@ public class BOMInputStream extends Prox
private boolean markedAtStart;
/**
- * Constructs a new BOM InputStream that excludes
- * a {@link ByteOrderMark#UTF_8} BOM.
- * @param delegate the InputStream to delegate to
+ * Constructs a new BOM InputStream that excludes a {@link ByteOrderMark#UTF_8} BOM.
+ *
+ * @param delegate
+ * the InputStream to delegate to
*/
public BOMInputStream(InputStream delegate) {
this(delegate, false, ByteOrderMark.UTF_8);
}
/**
- * Constructs a new BOM InputStream that detects a
- * a {@link ByteOrderMark#UTF_8} and optionally includes it.
- * @param delegate the InputStream to delegate to
- * @param include true to include the UTF-8 BOM or
- * false to exclude it
+ * Constructs a new BOM InputStream that detects a a {@link ByteOrderMark#UTF_8} and optionally includes it.
+ *
+ * @param delegate
+ * the InputStream to delegate to
+ * @param include
+ * true to include the UTF-8 BOM or false to exclude it
*/
public BOMInputStream(InputStream delegate, boolean include) {
this(delegate, include, ByteOrderMark.UTF_8);
}
/**
- * Constructs a new BOM InputStream that excludes
- * the specified BOMs.
- * @param delegate the InputStream to delegate to
- * @param boms The BOMs to detect and exclude
+ * Constructs a new BOM InputStream that excludes the specified BOMs.
+ *
+ * @param delegate
+ * the InputStream to delegate to
+ * @param boms
+ * The BOMs to detect and exclude
*/
public BOMInputStream(InputStream delegate, ByteOrderMark... boms) {
this(delegate, false, boms);
}
/**
- * Constructs a new BOM InputStream that detects the
- * specified BOMs and optionally includes them.
- * @param delegate the InputStream to delegate to
- * @param include true to include the specified BOMs or
- * false to exclude them
- * @param boms The BOMs to detect and optionally exclude
+ * Compares ByteOrderMark objects in descending length order.
+ */
+ private static final Comparator<ByteOrderMark> ByteOrderMarkLengthComparator = new Comparator<ByteOrderMark>() {
+
+ public int compare(ByteOrderMark bom1, ByteOrderMark bom2) {
+ int len1 = bom1.length();
+ int len2 = bom2.length();
+ if (len1 > len2) {
+ return -1;
+ }
+ if (len2 > len1) {
+ return 1;
+ }
+ return 0;
+ }
+ };
+
+ /**
+ * Constructs a new BOM InputStream that detects the specified BOMs and optionally includes them.
+ *
+ * @param delegate
+ * the InputStream to delegate to
+ * @param include
+ * true to include the specified BOMs or false to exclude them
+ * @param boms
+ * The BOMs to detect and optionally exclude
*/
public BOMInputStream(InputStream delegate, boolean include, ByteOrderMark... boms) {
super(delegate);
@@ -126,15 +165,18 @@ public class BOMInputStream extends Prox
throw new IllegalArgumentException("No BOMs specified");
}
this.include = include;
+ // Sort the BOMs to match the longest BOM first because some BOMs have the same starting two bytes.
+ Arrays.sort(boms, ByteOrderMarkLengthComparator);
this.boms = Arrays.asList(boms);
+
}
/**
* Indicates whether the stream contains one of the specified BOMs.
- *
- * @return true if the stream has one of the specified BOMs, otherwise false
- * if it does not
- * @throws IOException if an error reading the first bytes of the stream occurs
+ *
+ * @return true if the stream has one of the specified BOMs, otherwise false if it does not
+ * @throws IOException
+ * if an error reading the first bytes of the stream occurs
*/
public boolean hasBOM() throws IOException {
return getBOM() != null;
@@ -142,13 +184,14 @@ public class BOMInputStream extends Prox
/**
* Indicates whether the stream contains the specified BOM.
- *
- * @param bom The BOM to check for
- * @return true if the stream has the specified BOM, otherwise false
- * if it does not
- * @throws IllegalArgumentException if the BOM is not one the stream
- * is configured to detect
- * @throws IOException if an error reading the first bytes of the stream occurs
+ *
+ * @param bom
+ * The BOM to check for
+ * @return true if the stream has the specified BOM, otherwise false if it does not
+ * @throws IllegalArgumentException
+ * if the BOM is not one the stream is configured to detect
+ * @throws IOException
+ * if an error reading the first bytes of the stream occurs
*/
public boolean hasBOM(ByteOrderMark bom) throws IOException {
if (!boms.contains(bom)) {
@@ -159,31 +202,34 @@ public class BOMInputStream extends Prox
/**
* Return the BOM (Byte Order Mark).
- *
+ *
* @return The BOM or null if none
- * @throws IOException if an error reading the first bytes of the stream occurs
+ * @throws IOException
+ * if an error reading the first bytes of the stream occurs
*/
public ByteOrderMark getBOM() throws IOException {
if (firstBytes == null) {
fbLength = 0;
- int max = 0;
- for (ByteOrderMark bom : boms) {
- max = Math.max(max, bom.length());
- }
- firstBytes = new int[max];
+ // BOMs are sorted from longest to shortest
+ final int maxBomSize = boms.get(0).length();
+ firstBytes = new int[maxBomSize];
+ // Read first maxBomSize bytes
for (int i = 0; i < firstBytes.length; i++) {
firstBytes[i] = in.read();
fbLength++;
if (firstBytes[i] < 0) {
break;
}
-
- byteOrderMark = find();
- if (byteOrderMark != null) {
- if (!include) {
+ }
+ // match BOM in firstBytes
+ byteOrderMark = find();
+ if (byteOrderMark != null) {
+ if (!include) {
+ if (byteOrderMark.length() < firstBytes.length) {
+ fbIndex = byteOrderMark.length();
+ } else {
fbLength = 0;
}
- break;
}
}
}
@@ -192,9 +238,10 @@ public class BOMInputStream extends Prox
/**
* Return the BOM charset Name - {@link ByteOrderMark#getCharsetName()}.
- *
+ *
* @return The BOM charset Name or null if no BOM found
- * @throws IOException if an error reading the first bytes of the stream occurs
+ * @throws IOException
+ * if an error reading the first bytes of the stream occurs
*
*/
public String getBOMCharsetName() throws IOException {
@@ -203,12 +250,13 @@ public class BOMInputStream extends Prox
}
/**
- * This method reads and either preserves or skips the first bytes in the
- * stream. It behaves like the single-byte <code>read()</code> method,
- * either returning a valid byte or -1 to indicate that the initial bytes
- * have been processed already.
+ * This method reads and either preserves or skips the first bytes in the stream. It behaves like the single-byte
+ * <code>read()</code> method, either returning a valid byte or -1 to indicate that the initial bytes have been
+ * processed already.
+ *
* @return the byte read (excluding BOM) or -1 if the end of stream
- * @throws IOException if an I/O error occurs
+ * @throws IOException
+ * if an I/O error occurs
*/
private int readFirstBytes() throws IOException {
getBOM();
@@ -217,7 +265,7 @@ public class BOMInputStream extends Prox
/**
* Find a BOM with the specified bytes.
- *
+ *
* @return The matched BOM or null if none matched
*/
private ByteOrderMark find() {
@@ -231,14 +279,16 @@ public class BOMInputStream extends Prox
/**
* Check if the bytes match a BOM.
- *
- * @param bom The BOM
+ *
+ * @param bom
+ * The BOM
* @return true if the bytes match the bom, otherwise false
*/
private boolean matches(ByteOrderMark bom) {
- if (bom.length() != fbLength) {
- return false;
- }
+ // if (bom.length() != fbLength) {
+ // return false;
+ // }
+ // firstBytes may be bigger than the BOM bytes
for (int i = 0; i < bom.length(); i++) {
if (bom.get(i) != firstBytes[i]) {
return false;
@@ -247,15 +297,16 @@ public class BOMInputStream extends Prox
return true;
}
- //----------------------------------------------------------------------------
- // Implementation of InputStream
- //----------------------------------------------------------------------------
+ // ----------------------------------------------------------------------------
+ // Implementation of InputStream
+ // ----------------------------------------------------------------------------
/**
- * Invokes the delegate's <code>read()</code> method, detecting and
- * optionally skipping BOM.
+ * Invokes the delegate's <code>read()</code> method, detecting and optionally skipping BOM.
+ *
* @return the byte read (excluding BOM) or -1 if the end of stream
- * @throws IOException if an I/O error occurs
+ * @throws IOException
+ * if an I/O error occurs
*/
@Override
public int read() throws IOException {
@@ -264,13 +315,17 @@ public class BOMInputStream extends Prox
}
/**
- * Invokes the delegate's <code>read(byte[], int, int)</code> method, detecting
- * and optionally skipping BOM.
- * @param buf the buffer to read the bytes into
- * @param off The start offset
- * @param len The number of bytes to read (excluding BOM)
+ * Invokes the delegate's <code>read(byte[], int, int)</code> method, detecting and optionally skipping BOM.
+ *
+ * @param buf
+ * the buffer to read the bytes into
+ * @param off
+ * The start offset
+ * @param len
+ * The number of bytes to read (excluding BOM)
* @return the number of bytes read or -1 if the end of stream
- * @throws IOException if an I/O error occurs
+ * @throws IOException
+ * if an I/O error occurs
*/
@Override
public int read(byte[] buf, int off, int len) throws IOException {
@@ -289,12 +344,13 @@ public class BOMInputStream extends Prox
}
/**
- * Invokes the delegate's <code>read(byte[])</code> method, detecting and
- * optionally skipping BOM.
- * @param buf the buffer to read the bytes into
- * @return the number of bytes read (excluding BOM)
- * or -1 if the end of stream
- * @throws IOException if an I/O error occurs
+ * Invokes the delegate's <code>read(byte[])</code> method, detecting and optionally skipping BOM.
+ *
+ * @param buf
+ * the buffer to read the bytes into
+ * @return the number of bytes read (excluding BOM) or -1 if the end of stream
+ * @throws IOException
+ * if an I/O error occurs
*/
@Override
public int read(byte[] buf) throws IOException {
@@ -303,7 +359,9 @@ public class BOMInputStream extends Prox
/**
* Invokes the delegate's <code>mark(int)</code> method.
- * @param readlimit read ahead limit
+ *
+ * @param readlimit
+ * read ahead limit
*/
@Override
public synchronized void mark(int readlimit) {
@@ -314,7 +372,9 @@ public class BOMInputStream extends Prox
/**
* Invokes the delegate's <code>reset()</code> method.
- * @throws IOException if an I/O error occurs
+ *
+ * @throws IOException
+ * if an I/O error occurs
*/
@Override
public synchronized void reset() throws IOException {
@@ -327,11 +387,13 @@ public class BOMInputStream extends Prox
}
/**
- * Invokes the delegate's <code>skip(long)</code> method, detecting
- * and optionallyskipping BOM.
- * @param n the number of bytes to skip
+ * Invokes the delegate's <code>skip(long)</code> method, detecting and optionallyskipping BOM.
+ *
+ * @param n
+ * the number of bytes to skip
* @return the number of bytes to skipped or -1 if the end of stream
- * @throws IOException if an I/O error occurs
+ * @throws IOException
+ * if an I/O error occurs
*/
@Override
public long skip(long n) throws IOException {
Modified: commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/XmlStreamReader.java
URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/XmlStreamReader.java?rev=1346400&r1=1346399&r2=1346400&view=diff
==============================================================================
--- commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/XmlStreamReader.java (original)
+++ commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/XmlStreamReader.java Tue Jun 5 14:48:01 2012
@@ -74,23 +74,36 @@ public class XmlStreamReader extends Rea
private static final String UTF_16LE = "UTF-16LE";
+ private static final String UTF_32BE = "UTF-32BE";
+
+ private static final String UTF_32LE = "UTF-32LE";
+
private static final String UTF_16 = "UTF-16";
+ private static final String UTF_32 = "UTF-32";
+
private static final String EBCDIC = "CP1047";
private static final ByteOrderMark[] BOMS = new ByteOrderMark[] {
ByteOrderMark.UTF_8,
ByteOrderMark.UTF_16BE,
- ByteOrderMark.UTF_16LE
+ ByteOrderMark.UTF_16LE,
+ ByteOrderMark.UTF_32BE,
+ ByteOrderMark.UTF_32LE
};
+
+ // UTF_16LE and UTF_32LE have the same two starting BOM bytes.
private static final ByteOrderMark[] XML_GUESS_BYTES = new ByteOrderMark[] {
new ByteOrderMark(UTF_8, 0x3C, 0x3F, 0x78, 0x6D),
new ByteOrderMark(UTF_16BE, 0x00, 0x3C, 0x00, 0x3F),
new ByteOrderMark(UTF_16LE, 0x3C, 0x00, 0x3F, 0x00),
+ new ByteOrderMark(UTF_32BE, 0x00, 0x00, 0x00, 0x3C,
+ 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x6D),
+ new ByteOrderMark(UTF_32LE, 0x3C, 0x00, 0x00, 0x00,
+ 0x3F, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x6D, 0x00, 0x00, 0x00),
new ByteOrderMark(EBCDIC, 0x4C, 0x6F, 0xA7, 0x94)
};
-
private final Reader reader;
private final String encoding;
@@ -532,6 +545,19 @@ public class XmlStreamReader extends Rea
return bomEnc;
}
+ // BOM is UTF-32BE or UTF-32LE
+ if (bomEnc.equals(UTF_32BE) || bomEnc.equals(UTF_32LE)) {
+ if (xmlGuessEnc != null && !xmlGuessEnc.equals(bomEnc)) {
+ String msg = MessageFormat.format(RAW_EX_1, new Object[] { bomEnc, xmlGuessEnc, xmlEnc });
+ throw new XmlStreamReaderException(msg, bomEnc, xmlGuessEnc, xmlEnc);
+ }
+ if (xmlEnc != null && !xmlEnc.equals(UTF_32) && !xmlEnc.equals(bomEnc)) {
+ String msg = MessageFormat.format(RAW_EX_1, new Object[] { bomEnc, xmlGuessEnc, xmlEnc });
+ throw new XmlStreamReaderException(msg, bomEnc, xmlGuessEnc, xmlEnc);
+ }
+ return bomEnc;
+ }
+
// BOM is something else
String msg = MessageFormat.format(RAW_EX_2, new Object[] { bomEnc, xmlGuessEnc, xmlEnc });
throw new XmlStreamReaderException(msg, bomEnc, xmlGuessEnc, xmlEnc);
@@ -598,6 +624,24 @@ public class XmlStreamReader extends Rea
throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
}
+ // UTF-32BE or UTF-132E content type encoding
+ if (cTEnc.equals(UTF_32BE) || cTEnc.equals(UTF_32LE)) {
+ if (bomEnc != null) {
+ String msg = MessageFormat.format(HTTP_EX_1, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
+ throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
+ }
+ return cTEnc;
+ }
+
+ // UTF-32 content type encoding
+ if (cTEnc.equals(UTF_32)) {
+ if (bomEnc != null && bomEnc.startsWith(UTF_32)) {
+ return bomEnc;
+ }
+ String msg = MessageFormat.format(HTTP_EX_2, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
+ throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
+ }
+
return cTEnc;
}
Modified: commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java
URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java?rev=1346400&r1=1346399&r2=1346400&view=diff
==============================================================================
--- commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java (original)
+++ commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java Tue Jun 5 14:48:01 2012
@@ -31,7 +31,6 @@ import java.util.HashMap;
import java.util.Map;
import org.apache.commons.io.IOUtils;
-import org.junit.Ignore;
import org.junit.Test;
/**
@@ -96,13 +95,11 @@ public class XmlStreamReaderTest {
}
@Test
- @Ignore
public void testRawNoBomUtf32BE() throws Exception {
_testRawNoBomValid("UTF-32BE");
}
@Test
- @Ignore
public void testRawNoBomUtf32LE() throws Exception {
_testRawNoBomValid("UTF-32LE");
}
@@ -121,7 +118,7 @@ public class XmlStreamReaderTest {
InputStream is = getXmlStream(encoding + "-bom", XML3, encoding,
encoding);
XmlStreamReader xmlReader = new XmlStreamReader(is, false);
- if (!encoding.equals("UTF-16")) {
+ if (!encoding.equals("UTF-16") && !encoding.equals("UTF-32")) {
assertEquals(xmlReader.getEncoding(), encoding);
} else {
assertEquals(xmlReader.getEncoding()
@@ -135,7 +132,7 @@ public class XmlStreamReaderTest {
try {
XmlStreamReader xmlReader = new XmlStreamReader(is, false);
String foundEnc = xmlReader.getEncoding();
- fail("It should have failed for BOM " + bomEnc + ", streamEnc "
+ fail("Expected IOException for BOM " + bomEnc + ", streamEnc "
+ streamEnc + " and prologEnc " + prologEnc + ": found "
+ foundEnc);
} catch (IOException ex) {
@@ -154,6 +151,9 @@ public class XmlStreamReaderTest {
_testRawBomInvalid("UTF-16BE-bom", "UTF-16BE", "UTF-16LE");
_testRawBomInvalid("UTF-16LE-bom", "UTF-16LE", "UTF-16BE");
_testRawBomInvalid("UTF-16LE-bom", "UTF-16LE", "UTF-8");
+ _testRawBomInvalid("UTF-32BE-bom", "UTF-32BE", "UTF-32LE");
+ _testRawBomInvalid("UTF-32LE-bom", "UTF-32LE", "UTF-32BE");
+ _testRawBomInvalid("UTF-32LE-bom", "UTF-32LE", "UTF-8");
}
@Test
@@ -168,114 +168,105 @@ public class XmlStreamReaderTest {
}
@Test
- @Ignore
public void testRawBomUtf32() throws Exception {
_testRawBomValid("UTF-32BE");
_testRawBomValid("UTF-32LE");
_testRawBomValid("UTF-32");
- }
+
+ _testRawBomInvalid("UTF-32BE-bom", "UTF-32BE", "UTF-32LE");
+ _testRawBomInvalid("UTF-32LE-bom", "UTF-32LE", "UTF-32BE");
+ _testRawBomInvalid("UTF-32LE-bom", "UTF-32LE", "UTF-8");
+}
@Test
public void testHttp() throws Exception {
// niallp 2010-10-06 - remove following 2 tests - I reinstated
- // checks for non-UTF-16 encodings (18 tests) and these failed
- //_testHttpValid("application/xml", "no-bom", "US-ASCII", null);
- //_testHttpValid("application/xml", "UTF-8-bom", "US-ASCII", null);
+ // checks for non-UTF-16 encodings (18 tests) and these failed
+ // _testHttpValid("application/xml", "no-bom", "US-ASCII", null);
+ // _testHttpValid("application/xml", "UTF-8-bom", "US-ASCII", null);
_testHttpValid("application/xml", "UTF-8-bom", "UTF-8", null);
_testHttpValid("application/xml", "UTF-8-bom", "UTF-8", "UTF-8");
- _testHttpValid("application/xml;charset=UTF-8", "UTF-8-bom", "UTF-8",
- null);
- _testHttpValid("application/xml;charset=\"UTF-8\"", "UTF-8-bom",
- "UTF-8", null);
- _testHttpValid("application/xml;charset='UTF-8'", "UTF-8-bom", "UTF-8",
- null);
- _testHttpValid("application/xml;charset=UTF-8", "UTF-8-bom", "UTF-8",
- "UTF-8");
- _testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom",
- "UTF-16BE", null);
- _testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom",
- "UTF-16BE", "UTF-16");
- _testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom",
- "UTF-16BE", "UTF-16BE");
-
- _testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom",
- "UTF-16BE", null);
- _testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom",
- "UTF-16BE", "UTF-16");
- _testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom",
- "UTF-16BE", "UTF-16BE");
+ _testHttpValid("application/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", null);
+ _testHttpValid("application/xml;charset=\"UTF-8\"", "UTF-8-bom", "UTF-8", null);
+ _testHttpValid("application/xml;charset='UTF-8'", "UTF-8-bom", "UTF-8", null);
+ _testHttpValid("application/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", "UTF-8");
+ _testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", null);
+ _testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16");
+ _testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE");
+
+ _testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", null);
+ _testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16");
+ _testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE");
+
+ _testHttpInvalid("application/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", null);
+ _testHttpInvalid("application/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32");
+ _testHttpInvalid("application/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32BE");
+
_testHttpInvalid("application/xml", "UTF-8-bom", "US-ASCII", "US-ASCII");
- _testHttpInvalid("application/xml;charset=UTF-16", "UTF-16LE", "UTF-8",
- "UTF-8");
- _testHttpInvalid("application/xml;charset=UTF-16", "no-bom",
- "UTF-16BE", "UTF-16BE");
+ _testHttpInvalid("application/xml;charset=UTF-16", "UTF-16LE", "UTF-8", "UTF-8");
+ _testHttpInvalid("application/xml;charset=UTF-16", "no-bom", "UTF-16BE", "UTF-16BE");
+ _testHttpInvalid("application/xml;charset=UTF-32", "UTF-32LE", "UTF-8", "UTF-8");
+ _testHttpInvalid("application/xml;charset=UTF-32", "no-bom", "UTF-32BE", "UTF-32BE");
_testHttpValid("text/xml", "no-bom", "US-ASCII", null);
_testHttpValid("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", "UTF-8");
_testHttpValid("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", null);
- _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE",
- null);
- _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE",
- "UTF-16");
- _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE",
- "UTF-16BE");
+ _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", null);
+ _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16");
+ _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE");
+ _testHttpValid("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", null);
+ _testHttpValid("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", "UTF-32");
+ _testHttpValid("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", "UTF-32BE");
_testHttpValid("text/xml", "UTF-8-bom", "US-ASCII", null);
- _testAlternateDefaultEncoding("application/xml", "UTF-8-bom", "UTF-8",
- null, null);
- _testAlternateDefaultEncoding("application/xml", "no-bom", "US-ASCII",
- null, "US-ASCII");
- _testAlternateDefaultEncoding("application/xml", "UTF-8-bom", "UTF-8",
- null, "UTF-8");
- _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", null,
- null);
- _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", null,
- "US-ASCII");
- _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", null,
- "UTF-8");
-
- _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom",
- "UTF-16BE", null);
- _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom",
- "UTF-16BE", "UTF-16");
- _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom",
- "UTF-16BE", "UTF-16BE");
- _testHttpInvalid("text/xml;charset=UTF-16", "no-bom", "UTF-16BE",
- "UTF-16BE");
+ _testAlternateDefaultEncoding("application/xml", "UTF-8-bom", "UTF-8", null, null);
+ _testAlternateDefaultEncoding("application/xml", "no-bom", "US-ASCII", null, "US-ASCII");
+ _testAlternateDefaultEncoding("application/xml", "UTF-8-bom", "UTF-8", null, "UTF-8");
+ _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", null, null);
+ _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", null, "US-ASCII");
+ _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", null, "UTF-8");
+
+ _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", null);
+ _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16");
+ _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE");
+ _testHttpInvalid("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", "UTF-16BE");
_testHttpInvalid("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", null);
+ _testHttpInvalid("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", null);
+ _testHttpInvalid("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32");
+ _testHttpInvalid("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32BE");
+ _testHttpInvalid("text/xml;charset=UTF-32", "no-bom", "UTF-32BE", "UTF-32BE");
+ _testHttpInvalid("text/xml;charset=UTF-32", "no-bom", "UTF-32BE", null);
+
_testHttpLenient("text/xml", "no-bom", "US-ASCII", null, "US-ASCII");
- _testHttpLenient("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8",
- "UTF-8", "UTF-8");
- _testHttpLenient("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", null,
- "UTF-8");
- _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE",
- null, "UTF-16BE");
- _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE",
- "UTF-16", "UTF-16");
- _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE",
- "UTF-16BE", "UTF-16BE");
+ _testHttpLenient("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", "UTF-8", "UTF-8");
+ _testHttpLenient("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", null, "UTF-8");
+ _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", null, "UTF-16BE");
+ _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16", "UTF-16");
+ _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE", "UTF-16BE");
+ _testHttpLenient("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", null, "UTF-32BE");
+ _testHttpLenient("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", "UTF-32", "UTF-32");
+ _testHttpLenient("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", "UTF-32BE", "UTF-32BE");
_testHttpLenient("text/xml", "UTF-8-bom", "US-ASCII", null, "US-ASCII");
- _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom",
- "UTF-16BE", null, "UTF-16BE");
- _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom",
- "UTF-16BE", "UTF-16", "UTF-16");
- _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom",
- "UTF-16BE", "UTF-16BE", "UTF-16BE");
- _testHttpLenient("text/xml;charset=UTF-16", "no-bom", "UTF-16BE",
- "UTF-16BE", "UTF-16BE");
- _testHttpLenient("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", null,
- "UTF-16");
+ _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", null, "UTF-16BE");
+ _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16", "UTF-16");
+ _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE", "UTF-16BE");
+ _testHttpLenient("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", "UTF-16BE", "UTF-16BE");
+ _testHttpLenient("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", null, "UTF-16");
+
+ _testHttpLenient("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", null, "UTF-32BE");
+ _testHttpLenient("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32", "UTF-32");
+ _testHttpLenient("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32BE", "UTF-32BE");
+ _testHttpLenient("text/xml;charset=UTF-32", "no-bom", "UTF-32BE", "UTF-32BE", "UTF-32BE");
+ _testHttpLenient("text/xml;charset=UTF-32", "no-bom", "UTF-32BE", null, "UTF-32");
- _testHttpLenient("text/html", "no-bom", "US-ASCII", "US-ASCII",
- "US-ASCII");
+ _testHttpLenient("text/html", "no-bom", "US-ASCII", "US-ASCII", "US-ASCII");
_testHttpLenient("text/html", "no-bom", "US-ASCII", null, "US-ASCII");
- _testHttpLenient("text/html;charset=UTF-8", "no-bom", "US-ASCII",
- "UTF-8", "UTF-8");
- _testHttpLenient("text/html;charset=UTF-16BE", "no-bom", "US-ASCII",
- "UTF-8", "UTF-8");
+ _testHttpLenient("text/html;charset=UTF-8", "no-bom", "US-ASCII", "UTF-8", "UTF-8");
+ _testHttpLenient("text/html;charset=UTF-16BE", "no-bom", "US-ASCII", "UTF-8", "UTF-8");
+ _testHttpLenient("text/html;charset=UTF-32BE", "no-bom", "US-ASCII", "UTF-8", "UTF-8");
}
@Test
Modified: commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderUtilitiesTest.java
URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderUtilitiesTest.java?rev=1346400&r1=1346399&r2=1346400&view=diff
==============================================================================
--- commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderUtilitiesTest.java (original)
+++ commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderUtilitiesTest.java Tue Jun 5 14:48:01 2012
@@ -24,7 +24,6 @@ import static org.junit.Assert.fail;
import java.io.ByteArrayInputStream;
import java.io.IOException;
-import org.junit.Ignore;
import org.junit.Test;
/**
@@ -156,12 +155,13 @@ public class XmlStreamReaderUtilitiesTes
/** BOM calculateRawEncoding() Test */
@Test
- @Ignore
+ //@Ignore
public void testCalculateRawEncodingStandardUtf32() throws IOException {
// Standard BOM Checks BOM Other Default
+ testCalculateRawEncodingStandard("UTF-8", "UTF-32BE", "UTF-32LE");
testCalculateRawEncodingStandard("UTF-32BE", "UTF-8", "UTF-32LE");
testCalculateRawEncodingStandard("UTF-32LE", "UTF-8", "UTF-32BE");
- }
+}
private void testCalculateRawEncodingStandard(String bomEnc, String otherEnc, String defaultEnc) throws IOException {
// Expected BOM Guess XMLEnc Default
@@ -178,7 +178,7 @@ public class XmlStreamReaderUtilitiesTes
/** Additional UTF-16 calculateRawEncoding() Test */
@Test
- public void testCalculateRawEncodingAdditonalkUTF16() throws IOException {
+ public void testCalculateRawEncodingAdditonalUTF16() throws IOException {
// BOM Guess XML Default
checkRawError(RAWMGS1, "UTF-16BE", "UTF-16", null, null);
checkRawEncoding("UTF-16BE", "UTF-16BE", null, "UTF-16", null);
@@ -192,6 +192,22 @@ public class XmlStreamReaderUtilitiesTes
checkRawError(RAWMGS1, "UTF-16LE", "UTF-16LE", "UTF-16BE", null);
}
+ /** Additional UTF-32 calculateRawEncoding() Test */
+ @Test
+ public void testCalculateRawEncodingAdditonalUTF32() throws IOException {
+ // BOM Guess XML Default
+ checkRawError(RAWMGS1, "UTF-32BE", "UTF-32", null, null);
+ checkRawEncoding("UTF-32BE", "UTF-32BE", null, "UTF-32", null);
+ checkRawEncoding("UTF-32BE", "UTF-32BE", "UTF-32BE", "UTF-32", null);
+ checkRawError(RAWMGS1, "UTF-32BE", null, "UTF-32LE", null);
+ checkRawError(RAWMGS1, "UTF-32BE", "UTF-32BE", "UTF-32LE", null);
+ checkRawError(RAWMGS1, "UTF-32LE", "UTF-32", null, null);
+ checkRawEncoding("UTF-32LE", "UTF-32LE", null, "UTF-32", null);
+ checkRawEncoding("UTF-32LE", "UTF-32LE", "UTF-32LE", "UTF-32", null);
+ checkRawError(RAWMGS1, "UTF-32LE", null, "UTF-32BE", null);
+ checkRawError(RAWMGS1, "UTF-32LE", "UTF-32LE", "UTF-32BE", null);
+ }
+
private void checkRawEncoding(String expected,
String bomEnc, String xmlGuessEnc, String xmlEnc, String defaultEncoding) throws IOException {
StringBuilder builder = new StringBuilder();
@@ -207,8 +223,7 @@ public class XmlStreamReaderUtilitiesTes
protected String calculateRawEncoding(String bomEnc, String xmlGuessEnc, String xmlEnc,
String defaultEncoding) throws IOException {
MockXmlStreamReader mock = new MockXmlStreamReader(defaultEncoding);
- String encoding = mock.calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc);
- return encoding;
+ return mock.calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc);
}
private void checkRawError(String msgSuffix,
@@ -257,7 +272,7 @@ public class XmlStreamReaderUtilitiesTes
/** Test calculate HTTP Encoding */
@Test
- @Ignore
+ //@Ignore
public void testCalculateHttpEncodingUtf32() throws IOException {
// No BOM Expected Lenient cType BOM Guess XML Default
checkHttpEncoding("UTF-32LE", true, null, null, null, "UTF-32LE", null);
@@ -277,7 +292,7 @@ public class XmlStreamReaderUtilitiesTes
private void checkHttpEncoding(String expected, boolean lenient, String httpContentType,
String bomEnc, String xmlGuessEnc, String xmlEnc, String defaultEncoding) throws IOException {
StringBuilder builder = new StringBuilder();
- builder.append("HttpEncoding: ").append(bomEnc).append("], ");
+ builder.append("HttpEncoding=[").append(bomEnc).append("], ");
builder.append("lenient=[").append(lenient).append("], ");
builder.append("httpContentType=[").append(httpContentType).append("], ");
builder.append("bomEnc=[").append(bomEnc).append("], ");
@@ -291,8 +306,7 @@ public class XmlStreamReaderUtilitiesTes
protected String calculateHttpEncoding(String httpContentType, String bomEnc, String xmlGuessEnc,
String xmlEnc, boolean lenient, String defaultEncoding) throws IOException {
MockXmlStreamReader mock = new MockXmlStreamReader(defaultEncoding);
- String encoding = mock.calculateHttpEncoding(httpContentType, bomEnc, xmlGuessEnc, xmlEnc, lenient);
- return encoding;
+ return mock.calculateHttpEncoding(httpContentType, bomEnc, xmlGuessEnc, xmlEnc, lenient);
}
private void checkHttpError(String msgSuffix, boolean lenient, String httpContentType,
Modified: commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReader.java
URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReader.java?rev=1346400&r1=1346399&r2=1346400&view=diff
==============================================================================
--- commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReader.java (original)
+++ commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReader.java Tue Jun 5 14:48:01 2012
@@ -74,6 +74,12 @@ public class XmlStreamReader extends Rea
private static final String UTF_16 = "UTF-16";
+ private static final String UTF_32BE = "UTF-32BE";
+
+ private static final String UTF_32LE = "UTF-32LE";
+
+ private static final String UTF_32 = "UTF-32";
+
private static final String EBCDIC = "CP1047";
private static String staticDefaultEncoding = null;
@@ -447,6 +453,10 @@ public class XmlStreamReader extends Rea
&& (xmlGuessEnc.equals(UTF_16BE) || xmlGuessEnc
.equals(UTF_16LE))) {
encoding = xmlGuessEnc;
+ } else if (xmlEnc.equals(UTF_32)
+ && (xmlGuessEnc.equals(UTF_32BE) || xmlGuessEnc
+ .equals(UTF_32LE))) {
+ encoding = xmlGuessEnc;
} else {
encoding = xmlEnc;
}
@@ -474,6 +484,18 @@ public class XmlStreamReader extends Rea
bomEnc, xmlGuessEnc, xmlEnc, is);
}
encoding = bomEnc;
+ } else if (bomEnc.equals(UTF_32BE) || bomEnc.equals(UTF_32LE)) {
+ if (xmlGuessEnc != null && !xmlGuessEnc.equals(bomEnc)) {
+ throw new XmlStreamReaderException(RAW_EX_1.format(new Object[] { bomEnc,
+ xmlGuessEnc, xmlEnc }), bomEnc, xmlGuessEnc, xmlEnc, is);
+ }
+ if (xmlEnc != null && !xmlEnc.equals(UTF_32)
+ && !xmlEnc.equals(bomEnc)) {
+ throw new XmlStreamReaderException(RAW_EX_1
+ .format(new Object[] { bomEnc, xmlGuessEnc, xmlEnc }),
+ bomEnc, xmlGuessEnc, xmlEnc, is);
+ }
+ encoding = bomEnc;
} else {
throw new XmlStreamReaderException(RAW_EX_2.format(new Object[] {
bomEnc, xmlGuessEnc, xmlEnc }), bomEnc, xmlGuessEnc,
@@ -516,6 +538,21 @@ public class XmlStreamReader extends Rea
xmlGuessEnc, xmlEnc }), cTMime, cTEnc,
bomEnc, xmlGuessEnc, xmlEnc, is);
}
+ } else if (bomEnc != null
+ && (cTEnc.equals(UTF_32BE) || cTEnc.equals(UTF_32LE))) {
+ throw new XmlStreamReaderException(HTTP_EX_1
+ .format(new Object[] { cTMime, cTEnc, bomEnc,
+ xmlGuessEnc, xmlEnc }), cTMime, cTEnc,
+ bomEnc, xmlGuessEnc, xmlEnc, is);
+ } else if (cTEnc.equals(UTF_32)) {
+ if (bomEnc != null && bomEnc.startsWith(UTF_32)) {
+ encoding = bomEnc;
+ } else {
+ throw new XmlStreamReaderException(HTTP_EX_2
+ .format(new Object[] { cTMime, cTEnc, bomEnc,
+ xmlGuessEnc, xmlEnc }), cTMime, cTEnc,
+ bomEnc, xmlGuessEnc, xmlEnc, is);
+ }
} else {
encoding = cTEnc;
}
Modified: commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReaderUtilitiesCompatibilityTest.java
URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReaderUtilitiesCompatibilityTest.java?rev=1346400&r1=1346399&r2=1346400&view=diff
==============================================================================
--- commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReaderUtilitiesCompatibilityTest.java (original)
+++ commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReaderUtilitiesCompatibilityTest.java Tue Jun 5 14:48:01 2012
@@ -36,11 +36,10 @@ public class XmlStreamReaderUtilitiesCom
protected String calculateHttpEncoding(String httpContentType, String bomEnc, String xmlGuessEnc,
String xmlEnc, boolean lenient, String defaultEncoding) throws IOException {
MockXmlStreamReader mock = new MockXmlStreamReader(defaultEncoding);
- String encoding = mock.calculateHttpEncoding(
+ return mock.calculateHttpEncoding(
XmlStreamReader.getContentTypeMime(httpContentType),
XmlStreamReader.getContentTypeEncoding(httpContentType),
bomEnc, xmlGuessEnc, xmlEnc, null, lenient);
- return encoding;
}
/** Mock {@link XmlStreamReader} implementation */
Re: svn commit: r1346400 - in /commons/proper/io/trunk/src: changes/
main/java/org/apache/commons/io/input/ test/java/org/apache/commons/io/input/ test/java/org/apache/commons/io/input/compatibility/
Posted by Gary Gregory <ga...@gmail.com>.
On Jun 5, 2012, at 20:20, sebb <se...@gmail.com> wrote:
> On 5 June 2012 15:48, <gg...@apache.org> wrote:
>> Author: ggregory
>> Date: Tue Jun 5 14:48:01 2012
>> New Revision: 1346400
>>
>> URL: http://svn.apache.org/viewvc?rev=1346400&view=rev
>> Log:
>> [IO-320] Add XmlStreamReader support for UTF-32.
>> [IO-331] BOMInputStream wrongly detects UTF-32LE_BOM files as UTF-16LE_BOM files in method getBOM().
>
> Please try to keep commits to a single fix.
This *is* one fix. One JIRA is a different lower level expression of the other.
Gary
>
>>
>> Modified:
>> commons/proper/io/trunk/src/changes/changes.xml
>> commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/BOMInputStream.java
>> commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/XmlStreamReader.java
>> commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java
>> commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderUtilitiesTest.java
>> commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReader.java
>> commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReaderUtilitiesCompatibilityTest.java
>>
>> Modified: commons/proper/io/trunk/src/changes/changes.xml
>> URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/changes/changes.xml?rev=1346400&r1=1346399&r2=1346400&view=diff
>> ==============================================================================
>> --- commons/proper/io/trunk/src/changes/changes.xml (original)
>> +++ commons/proper/io/trunk/src/changes/changes.xml Tue Jun 5 14:48:01 2012
>> @@ -47,6 +47,12 @@ The <action> type attribute can be add,u
>> <body>
>> <!-- The release date is the date RC is cut -->
>> <release version="2.4" date="2012-TDB-TDB" description="">
>> + <action issue="IO-320" dev="ggregory" type="add">
>> + Add XmlStreamReader support for UTF-32.
>> + </action>
>> + <action issue="IO-331" dev="ggregory" type="add">
>> + BOMInputStream wrongly detects UTF-32LE_BOM files as UTF-16LE_BOM files in method getBOM().
>> + </action>
>> <action issue="IO-332" dev="ggregory" type="fix" due-to="liangly">
>> Improve tailer's reading performance.
>> </action>
>>
>> Modified: commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/BOMInputStream.java
>> URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/BOMInputStream.java?rev=1346400&r1=1346399&r2=1346400&view=diff
>> ==============================================================================
>> --- commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/BOMInputStream.java (original)
>> +++ commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/BOMInputStream.java Tue Jun 5 14:48:01 2012
>> @@ -19,54 +19,66 @@ package org.apache.commons.io.input;
>> import java.io.IOException;
>> import java.io.InputStream;
>> import java.util.Arrays;
>> +import java.util.Comparator;
>> import java.util.List;
>>
>> import org.apache.commons.io.ByteOrderMark;
>>
>> /**
>> - * This class is used to wrap a stream that includes an encoded
>> - * {@link ByteOrderMark} as its first bytes.
>> - *
>> - * This class detects these bytes and, if required, can automatically skip them
>> - * and return the subsequent byte as the first byte in the stream.
>> - *
>> + * This class is used to wrap a stream that includes an encoded {@link ByteOrderMark} as its first bytes.
>> + *
>> + * This class detects these bytes and, if required, can automatically skip them and return the subsequent byte as the
>> + * first byte in the stream.
>> + *
>> * The {@link ByteOrderMark} implementation has the following pre-defined BOMs:
>> * <ul>
>> - * <li>UTF-8 - {@link ByteOrderMark#UTF_8}</li>
>> - * <li>UTF-16BE - {@link ByteOrderMark#UTF_16LE}</li>
>> - * <li>UTF-16LE - {@link ByteOrderMark#UTF_16BE}</li>
>> + * <li>UTF-8 - {@link ByteOrderMark#UTF_8}</li>
>> + * <li>UTF-16BE - {@link ByteOrderMark#UTF_16LE}</li>
>> + * <li>UTF-16LE - {@link ByteOrderMark#UTF_16BE}</li>
>> + * <li>UTF-32BE - {@link ByteOrderMark#UTF_32LE}</li>
>> + * <li>UTF-32LE - {@link ByteOrderMark#UTF_32BE}</li>
>> * </ul>
>> - *
>> - *
>> + *
>> + *
>> * <h3>Example 1 - Detect and exclude a UTF-8 BOM</h3>
>> + *
>> * <pre>
>> - * BOMInputStream bomIn = new BOMInputStream(in);
>> - * if (bomIn.hasBOM()) {
>> - * // has a UTF-8 BOM
>> - * }
>> + * BOMInputStream bomIn = new BOMInputStream(in);
>> + * if (bomIn.hasBOM()) {
>> + * // has a UTF-8 BOM
>> + * }
>> * </pre>
>> - *
>> + *
>> * <h3>Example 2 - Detect a UTF-8 BOM (but don't exclude it)</h3>
>> + *
>> * <pre>
>> - * boolean include = true;
>> - * BOMInputStream bomIn = new BOMInputStream(in, include);
>> - * if (bomIn.hasBOM()) {
>> - * // has a UTF-8 BOM
>> - * }
>> + * boolean include = true;
>> + * BOMInputStream bomIn = new BOMInputStream(in, include);
>> + * if (bomIn.hasBOM()) {
>> + * // has a UTF-8 BOM
>> + * }
>> * </pre>
>> - *
>> + *
>> * <h3>Example 3 - Detect Multiple BOMs</h3>
>> + *
>> * <pre>
>> - * BOMInputStream bomIn = new BOMInputStream(in, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE);
>> - * if (bomIn.hasBOM() == false) {
>> - * // No BOM found
>> - * } else if (bomIn.hasBOM(ByteOrderMark.UTF_16LE)) {
>> - * // has a UTF-16LE BOM
>> - * } else if (bomIn.hasBOM(ByteOrderMark.UTF_16BE)) {
>> - * // has a UTF-16BE BOM
>> - * }
>> + * BOMInputStream bomIn = new BOMInputStream(in,
>> + * ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE,
>> + * ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE
>> + * );
>> + * if (bomIn.hasBOM() == false) {
>> + * // No BOM found
>> + * } else if (bomIn.hasBOM(ByteOrderMark.UTF_16LE)) {
>> + * // has a UTF-16LE BOM
>> + * } else if (bomIn.hasBOM(ByteOrderMark.UTF_16BE)) {
>> + * // has a UTF-16BE BOM
>> + * } else if (bomIn.hasBOM(ByteOrderMark.UTF_32LE)) {
>> + * // has a UTF-32LE BOM
>> + * } else if (bomIn.hasBOM(ByteOrderMark.UTF_32BE)) {
>> + * // has a UTF-32BE BOM
>> + * }
>> * </pre>
>> - *
>> + *
>> * @see org.apache.commons.io.ByteOrderMark
>> * @see <a href="http://en.wikipedia.org/wiki/Byte_order_mark">Wikipedia - Byte Order Mark</a>
>> * @version $Id$
>> @@ -74,6 +86,9 @@ import org.apache.commons.io.ByteOrderMa
>> */
>> public class BOMInputStream extends ProxyInputStream {
>> private final boolean include;
>> + /**
>> + * BOMs are sorted from longest to shortest.
>> + */
>> private final List<ByteOrderMark> boms;
>> private ByteOrderMark byteOrderMark;
>> private int[] firstBytes;
>> @@ -83,42 +98,66 @@ public class BOMInputStream extends Prox
>> private boolean markedAtStart;
>>
>> /**
>> - * Constructs a new BOM InputStream that excludes
>> - * a {@link ByteOrderMark#UTF_8} BOM.
>> - * @param delegate the InputStream to delegate to
>> + * Constructs a new BOM InputStream that excludes a {@link ByteOrderMark#UTF_8} BOM.
>> + *
>> + * @param delegate
>> + * the InputStream to delegate to
>> */
>> public BOMInputStream(InputStream delegate) {
>> this(delegate, false, ByteOrderMark.UTF_8);
>> }
>>
>> /**
>> - * Constructs a new BOM InputStream that detects a
>> - * a {@link ByteOrderMark#UTF_8} and optionally includes it.
>> - * @param delegate the InputStream to delegate to
>> - * @param include true to include the UTF-8 BOM or
>> - * false to exclude it
>> + * Constructs a new BOM InputStream that detects a a {@link ByteOrderMark#UTF_8} and optionally includes it.
>> + *
>> + * @param delegate
>> + * the InputStream to delegate to
>> + * @param include
>> + * true to include the UTF-8 BOM or false to exclude it
>> */
>> public BOMInputStream(InputStream delegate, boolean include) {
>> this(delegate, include, ByteOrderMark.UTF_8);
>> }
>>
>> /**
>> - * Constructs a new BOM InputStream that excludes
>> - * the specified BOMs.
>> - * @param delegate the InputStream to delegate to
>> - * @param boms The BOMs to detect and exclude
>> + * Constructs a new BOM InputStream that excludes the specified BOMs.
>> + *
>> + * @param delegate
>> + * the InputStream to delegate to
>> + * @param boms
>> + * The BOMs to detect and exclude
>> */
>> public BOMInputStream(InputStream delegate, ByteOrderMark... boms) {
>> this(delegate, false, boms);
>> }
>>
>> /**
>> - * Constructs a new BOM InputStream that detects the
>> - * specified BOMs and optionally includes them.
>> - * @param delegate the InputStream to delegate to
>> - * @param include true to include the specified BOMs or
>> - * false to exclude them
>> - * @param boms The BOMs to detect and optionally exclude
>> + * Compares ByteOrderMark objects in descending length order.
>> + */
>> + private static final Comparator<ByteOrderMark> ByteOrderMarkLengthComparator = new Comparator<ByteOrderMark>() {
>> +
>> + public int compare(ByteOrderMark bom1, ByteOrderMark bom2) {
>> + int len1 = bom1.length();
>> + int len2 = bom2.length();
>> + if (len1 > len2) {
>> + return -1;
>> + }
>> + if (len2 > len1) {
>> + return 1;
>> + }
>> + return 0;
>> + }
>> + };
>> +
>> + /**
>> + * Constructs a new BOM InputStream that detects the specified BOMs and optionally includes them.
>> + *
>> + * @param delegate
>> + * the InputStream to delegate to
>> + * @param include
>> + * true to include the specified BOMs or false to exclude them
>> + * @param boms
>> + * The BOMs to detect and optionally exclude
>> */
>> public BOMInputStream(InputStream delegate, boolean include, ByteOrderMark... boms) {
>> super(delegate);
>> @@ -126,15 +165,18 @@ public class BOMInputStream extends Prox
>> throw new IllegalArgumentException("No BOMs specified");
>> }
>> this.include = include;
>> + // Sort the BOMs to match the longest BOM first because some BOMs have the same starting two bytes.
>> + Arrays.sort(boms, ByteOrderMarkLengthComparator);
>> this.boms = Arrays.asList(boms);
>> +
>> }
>>
>> /**
>> * Indicates whether the stream contains one of the specified BOMs.
>> - *
>> - * @return true if the stream has one of the specified BOMs, otherwise false
>> - * if it does not
>> - * @throws IOException if an error reading the first bytes of the stream occurs
>> + *
>> + * @return true if the stream has one of the specified BOMs, otherwise false if it does not
>> + * @throws IOException
>> + * if an error reading the first bytes of the stream occurs
>> */
>> public boolean hasBOM() throws IOException {
>> return getBOM() != null;
>> @@ -142,13 +184,14 @@ public class BOMInputStream extends Prox
>>
>> /**
>> * Indicates whether the stream contains the specified BOM.
>> - *
>> - * @param bom The BOM to check for
>> - * @return true if the stream has the specified BOM, otherwise false
>> - * if it does not
>> - * @throws IllegalArgumentException if the BOM is not one the stream
>> - * is configured to detect
>> - * @throws IOException if an error reading the first bytes of the stream occurs
>> + *
>> + * @param bom
>> + * The BOM to check for
>> + * @return true if the stream has the specified BOM, otherwise false if it does not
>> + * @throws IllegalArgumentException
>> + * if the BOM is not one the stream is configured to detect
>> + * @throws IOException
>> + * if an error reading the first bytes of the stream occurs
>> */
>> public boolean hasBOM(ByteOrderMark bom) throws IOException {
>> if (!boms.contains(bom)) {
>> @@ -159,31 +202,34 @@ public class BOMInputStream extends Prox
>>
>> /**
>> * Return the BOM (Byte Order Mark).
>> - *
>> + *
>> * @return The BOM or null if none
>> - * @throws IOException if an error reading the first bytes of the stream occurs
>> + * @throws IOException
>> + * if an error reading the first bytes of the stream occurs
>> */
>> public ByteOrderMark getBOM() throws IOException {
>> if (firstBytes == null) {
>> fbLength = 0;
>> - int max = 0;
>> - for (ByteOrderMark bom : boms) {
>> - max = Math.max(max, bom.length());
>> - }
>> - firstBytes = new int[max];
>> + // BOMs are sorted from longest to shortest
>> + final int maxBomSize = boms.get(0).length();
>> + firstBytes = new int[maxBomSize];
>> + // Read first maxBomSize bytes
>> for (int i = 0; i < firstBytes.length; i++) {
>> firstBytes[i] = in.read();
>> fbLength++;
>> if (firstBytes[i] < 0) {
>> break;
>> }
>> -
>> - byteOrderMark = find();
>> - if (byteOrderMark != null) {
>> - if (!include) {
>> + }
>> + // match BOM in firstBytes
>> + byteOrderMark = find();
>> + if (byteOrderMark != null) {
>> + if (!include) {
>> + if (byteOrderMark.length() < firstBytes.length) {
>> + fbIndex = byteOrderMark.length();
>> + } else {
>> fbLength = 0;
>> }
>> - break;
>> }
>> }
>> }
>> @@ -192,9 +238,10 @@ public class BOMInputStream extends Prox
>>
>> /**
>> * Return the BOM charset Name - {@link ByteOrderMark#getCharsetName()}.
>> - *
>> + *
>> * @return The BOM charset Name or null if no BOM found
>> - * @throws IOException if an error reading the first bytes of the stream occurs
>> + * @throws IOException
>> + * if an error reading the first bytes of the stream occurs
>> *
>> */
>> public String getBOMCharsetName() throws IOException {
>> @@ -203,12 +250,13 @@ public class BOMInputStream extends Prox
>> }
>>
>> /**
>> - * This method reads and either preserves or skips the first bytes in the
>> - * stream. It behaves like the single-byte <code>read()</code> method,
>> - * either returning a valid byte or -1 to indicate that the initial bytes
>> - * have been processed already.
>> + * This method reads and either preserves or skips the first bytes in the stream. It behaves like the single-byte
>> + * <code>read()</code> method, either returning a valid byte or -1 to indicate that the initial bytes have been
>> + * processed already.
>> + *
>> * @return the byte read (excluding BOM) or -1 if the end of stream
>> - * @throws IOException if an I/O error occurs
>> + * @throws IOException
>> + * if an I/O error occurs
>> */
>> private int readFirstBytes() throws IOException {
>> getBOM();
>> @@ -217,7 +265,7 @@ public class BOMInputStream extends Prox
>>
>> /**
>> * Find a BOM with the specified bytes.
>> - *
>> + *
>> * @return The matched BOM or null if none matched
>> */
>> private ByteOrderMark find() {
>> @@ -231,14 +279,16 @@ public class BOMInputStream extends Prox
>>
>> /**
>> * Check if the bytes match a BOM.
>> - *
>> - * @param bom The BOM
>> + *
>> + * @param bom
>> + * The BOM
>> * @return true if the bytes match the bom, otherwise false
>> */
>> private boolean matches(ByteOrderMark bom) {
>> - if (bom.length() != fbLength) {
>> - return false;
>> - }
>> + // if (bom.length() != fbLength) {
>> + // return false;
>> + // }
>> + // firstBytes may be bigger than the BOM bytes
>> for (int i = 0; i < bom.length(); i++) {
>> if (bom.get(i) != firstBytes[i]) {
>> return false;
>> @@ -247,15 +297,16 @@ public class BOMInputStream extends Prox
>> return true;
>> }
>>
>> - //----------------------------------------------------------------------------
>> - // Implementation of InputStream
>> - //----------------------------------------------------------------------------
>> + // ----------------------------------------------------------------------------
>> + // Implementation of InputStream
>> + // ----------------------------------------------------------------------------
>>
>> /**
>> - * Invokes the delegate's <code>read()</code> method, detecting and
>> - * optionally skipping BOM.
>> + * Invokes the delegate's <code>read()</code> method, detecting and optionally skipping BOM.
>> + *
>> * @return the byte read (excluding BOM) or -1 if the end of stream
>> - * @throws IOException if an I/O error occurs
>> + * @throws IOException
>> + * if an I/O error occurs
>> */
>> @Override
>> public int read() throws IOException {
>> @@ -264,13 +315,17 @@ public class BOMInputStream extends Prox
>> }
>>
>> /**
>> - * Invokes the delegate's <code>read(byte[], int, int)</code> method, detecting
>> - * and optionally skipping BOM.
>> - * @param buf the buffer to read the bytes into
>> - * @param off The start offset
>> - * @param len The number of bytes to read (excluding BOM)
>> + * Invokes the delegate's <code>read(byte[], int, int)</code> method, detecting and optionally skipping BOM.
>> + *
>> + * @param buf
>> + * the buffer to read the bytes into
>> + * @param off
>> + * The start offset
>> + * @param len
>> + * The number of bytes to read (excluding BOM)
>> * @return the number of bytes read or -1 if the end of stream
>> - * @throws IOException if an I/O error occurs
>> + * @throws IOException
>> + * if an I/O error occurs
>> */
>> @Override
>> public int read(byte[] buf, int off, int len) throws IOException {
>> @@ -289,12 +344,13 @@ public class BOMInputStream extends Prox
>> }
>>
>> /**
>> - * Invokes the delegate's <code>read(byte[])</code> method, detecting and
>> - * optionally skipping BOM.
>> - * @param buf the buffer to read the bytes into
>> - * @return the number of bytes read (excluding BOM)
>> - * or -1 if the end of stream
>> - * @throws IOException if an I/O error occurs
>> + * Invokes the delegate's <code>read(byte[])</code> method, detecting and optionally skipping BOM.
>> + *
>> + * @param buf
>> + * the buffer to read the bytes into
>> + * @return the number of bytes read (excluding BOM) or -1 if the end of stream
>> + * @throws IOException
>> + * if an I/O error occurs
>> */
>> @Override
>> public int read(byte[] buf) throws IOException {
>> @@ -303,7 +359,9 @@ public class BOMInputStream extends Prox
>>
>> /**
>> * Invokes the delegate's <code>mark(int)</code> method.
>> - * @param readlimit read ahead limit
>> + *
>> + * @param readlimit
>> + * read ahead limit
>> */
>> @Override
>> public synchronized void mark(int readlimit) {
>> @@ -314,7 +372,9 @@ public class BOMInputStream extends Prox
>>
>> /**
>> * Invokes the delegate's <code>reset()</code> method.
>> - * @throws IOException if an I/O error occurs
>> + *
>> + * @throws IOException
>> + * if an I/O error occurs
>> */
>> @Override
>> public synchronized void reset() throws IOException {
>> @@ -327,11 +387,13 @@ public class BOMInputStream extends Prox
>> }
>>
>> /**
>> - * Invokes the delegate's <code>skip(long)</code> method, detecting
>> - * and optionallyskipping BOM.
>> - * @param n the number of bytes to skip
>> + * Invokes the delegate's <code>skip(long)</code> method, detecting and optionallyskipping BOM.
>> + *
>> + * @param n
>> + * the number of bytes to skip
>> * @return the number of bytes to skipped or -1 if the end of stream
>> - * @throws IOException if an I/O error occurs
>> + * @throws IOException
>> + * if an I/O error occurs
>> */
>> @Override
>> public long skip(long n) throws IOException {
>>
>> Modified: commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/XmlStreamReader.java
>> URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/XmlStreamReader.java?rev=1346400&r1=1346399&r2=1346400&view=diff
>> ==============================================================================
>> --- commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/XmlStreamReader.java (original)
>> +++ commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/XmlStreamReader.java Tue Jun 5 14:48:01 2012
>> @@ -74,23 +74,36 @@ public class XmlStreamReader extends Rea
>>
>> private static final String UTF_16LE = "UTF-16LE";
>>
>> + private static final String UTF_32BE = "UTF-32BE";
>> +
>> + private static final String UTF_32LE = "UTF-32LE";
>> +
>> private static final String UTF_16 = "UTF-16";
>>
>> + private static final String UTF_32 = "UTF-32";
>> +
>> private static final String EBCDIC = "CP1047";
>>
>> private static final ByteOrderMark[] BOMS = new ByteOrderMark[] {
>> ByteOrderMark.UTF_8,
>> ByteOrderMark.UTF_16BE,
>> - ByteOrderMark.UTF_16LE
>> + ByteOrderMark.UTF_16LE,
>> + ByteOrderMark.UTF_32BE,
>> + ByteOrderMark.UTF_32LE
>> };
>> +
>> + // UTF_16LE and UTF_32LE have the same two starting BOM bytes.
>> private static final ByteOrderMark[] XML_GUESS_BYTES = new ByteOrderMark[] {
>> new ByteOrderMark(UTF_8, 0x3C, 0x3F, 0x78, 0x6D),
>> new ByteOrderMark(UTF_16BE, 0x00, 0x3C, 0x00, 0x3F),
>> new ByteOrderMark(UTF_16LE, 0x3C, 0x00, 0x3F, 0x00),
>> + new ByteOrderMark(UTF_32BE, 0x00, 0x00, 0x00, 0x3C,
>> + 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x6D),
>> + new ByteOrderMark(UTF_32LE, 0x3C, 0x00, 0x00, 0x00,
>> + 0x3F, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x6D, 0x00, 0x00, 0x00),
>> new ByteOrderMark(EBCDIC, 0x4C, 0x6F, 0xA7, 0x94)
>> };
>>
>> -
>> private final Reader reader;
>>
>> private final String encoding;
>> @@ -532,6 +545,19 @@ public class XmlStreamReader extends Rea
>> return bomEnc;
>> }
>>
>> + // BOM is UTF-32BE or UTF-32LE
>> + if (bomEnc.equals(UTF_32BE) || bomEnc.equals(UTF_32LE)) {
>> + if (xmlGuessEnc != null && !xmlGuessEnc.equals(bomEnc)) {
>> + String msg = MessageFormat.format(RAW_EX_1, new Object[] { bomEnc, xmlGuessEnc, xmlEnc });
>> + throw new XmlStreamReaderException(msg, bomEnc, xmlGuessEnc, xmlEnc);
>> + }
>> + if (xmlEnc != null && !xmlEnc.equals(UTF_32) && !xmlEnc.equals(bomEnc)) {
>> + String msg = MessageFormat.format(RAW_EX_1, new Object[] { bomEnc, xmlGuessEnc, xmlEnc });
>> + throw new XmlStreamReaderException(msg, bomEnc, xmlGuessEnc, xmlEnc);
>> + }
>> + return bomEnc;
>> + }
>> +
>> // BOM is something else
>> String msg = MessageFormat.format(RAW_EX_2, new Object[] { bomEnc, xmlGuessEnc, xmlEnc });
>> throw new XmlStreamReaderException(msg, bomEnc, xmlGuessEnc, xmlEnc);
>> @@ -598,6 +624,24 @@ public class XmlStreamReader extends Rea
>> throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
>> }
>>
>> + // UTF-32BE or UTF-132E content type encoding
>> + if (cTEnc.equals(UTF_32BE) || cTEnc.equals(UTF_32LE)) {
>> + if (bomEnc != null) {
>> + String msg = MessageFormat.format(HTTP_EX_1, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
>> + throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
>> + }
>> + return cTEnc;
>> + }
>> +
>> + // UTF-32 content type encoding
>> + if (cTEnc.equals(UTF_32)) {
>> + if (bomEnc != null && bomEnc.startsWith(UTF_32)) {
>> + return bomEnc;
>> + }
>> + String msg = MessageFormat.format(HTTP_EX_2, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
>> + throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
>> + }
>> +
>> return cTEnc;
>> }
>>
>>
>> Modified: commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java
>> URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java?rev=1346400&r1=1346399&r2=1346400&view=diff
>> ==============================================================================
>> --- commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java (original)
>> +++ commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java Tue Jun 5 14:48:01 2012
>> @@ -31,7 +31,6 @@ import java.util.HashMap;
>> import java.util.Map;
>>
>> import org.apache.commons.io.IOUtils;
>> -import org.junit.Ignore;
>> import org.junit.Test;
>>
>> /**
>> @@ -96,13 +95,11 @@ public class XmlStreamReaderTest {
>> }
>>
>> @Test
>> - @Ignore
>> public void testRawNoBomUtf32BE() throws Exception {
>> _testRawNoBomValid("UTF-32BE");
>> }
>>
>> @Test
>> - @Ignore
>> public void testRawNoBomUtf32LE() throws Exception {
>> _testRawNoBomValid("UTF-32LE");
>> }
>> @@ -121,7 +118,7 @@ public class XmlStreamReaderTest {
>> InputStream is = getXmlStream(encoding + "-bom", XML3, encoding,
>> encoding);
>> XmlStreamReader xmlReader = new XmlStreamReader(is, false);
>> - if (!encoding.equals("UTF-16")) {
>> + if (!encoding.equals("UTF-16") && !encoding.equals("UTF-32")) {
>> assertEquals(xmlReader.getEncoding(), encoding);
>> } else {
>> assertEquals(xmlReader.getEncoding()
>> @@ -135,7 +132,7 @@ public class XmlStreamReaderTest {
>> try {
>> XmlStreamReader xmlReader = new XmlStreamReader(is, false);
>> String foundEnc = xmlReader.getEncoding();
>> - fail("It should have failed for BOM " + bomEnc + ", streamEnc "
>> + fail("Expected IOException for BOM " + bomEnc + ", streamEnc "
>> + streamEnc + " and prologEnc " + prologEnc + ": found "
>> + foundEnc);
>> } catch (IOException ex) {
>> @@ -154,6 +151,9 @@ public class XmlStreamReaderTest {
>> _testRawBomInvalid("UTF-16BE-bom", "UTF-16BE", "UTF-16LE");
>> _testRawBomInvalid("UTF-16LE-bom", "UTF-16LE", "UTF-16BE");
>> _testRawBomInvalid("UTF-16LE-bom", "UTF-16LE", "UTF-8");
>> + _testRawBomInvalid("UTF-32BE-bom", "UTF-32BE", "UTF-32LE");
>> + _testRawBomInvalid("UTF-32LE-bom", "UTF-32LE", "UTF-32BE");
>> + _testRawBomInvalid("UTF-32LE-bom", "UTF-32LE", "UTF-8");
>> }
>>
>> @Test
>> @@ -168,114 +168,105 @@ public class XmlStreamReaderTest {
>> }
>>
>> @Test
>> - @Ignore
>> public void testRawBomUtf32() throws Exception {
>> _testRawBomValid("UTF-32BE");
>> _testRawBomValid("UTF-32LE");
>> _testRawBomValid("UTF-32");
>> - }
>> +
>> + _testRawBomInvalid("UTF-32BE-bom", "UTF-32BE", "UTF-32LE");
>> + _testRawBomInvalid("UTF-32LE-bom", "UTF-32LE", "UTF-32BE");
>> + _testRawBomInvalid("UTF-32LE-bom", "UTF-32LE", "UTF-8");
>> +}
>>
>>
>> @Test
>> public void testHttp() throws Exception {
>> // niallp 2010-10-06 - remove following 2 tests - I reinstated
>> - // checks for non-UTF-16 encodings (18 tests) and these failed
>> - //_testHttpValid("application/xml", "no-bom", "US-ASCII", null);
>> - //_testHttpValid("application/xml", "UTF-8-bom", "US-ASCII", null);
>> + // checks for non-UTF-16 encodings (18 tests) and these failed
>> + // _testHttpValid("application/xml", "no-bom", "US-ASCII", null);
>> + // _testHttpValid("application/xml", "UTF-8-bom", "US-ASCII", null);
>> _testHttpValid("application/xml", "UTF-8-bom", "UTF-8", null);
>> _testHttpValid("application/xml", "UTF-8-bom", "UTF-8", "UTF-8");
>> - _testHttpValid("application/xml;charset=UTF-8", "UTF-8-bom", "UTF-8",
>> - null);
>> - _testHttpValid("application/xml;charset=\"UTF-8\"", "UTF-8-bom",
>> - "UTF-8", null);
>> - _testHttpValid("application/xml;charset='UTF-8'", "UTF-8-bom", "UTF-8",
>> - null);
>> - _testHttpValid("application/xml;charset=UTF-8", "UTF-8-bom", "UTF-8",
>> - "UTF-8");
>> - _testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom",
>> - "UTF-16BE", null);
>> - _testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom",
>> - "UTF-16BE", "UTF-16");
>> - _testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom",
>> - "UTF-16BE", "UTF-16BE");
>> -
>> - _testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom",
>> - "UTF-16BE", null);
>> - _testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom",
>> - "UTF-16BE", "UTF-16");
>> - _testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom",
>> - "UTF-16BE", "UTF-16BE");
>> + _testHttpValid("application/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", null);
>> + _testHttpValid("application/xml;charset=\"UTF-8\"", "UTF-8-bom", "UTF-8", null);
>> + _testHttpValid("application/xml;charset='UTF-8'", "UTF-8-bom", "UTF-8", null);
>> + _testHttpValid("application/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", "UTF-8");
>> + _testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", null);
>> + _testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16");
>> + _testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE");
>> +
>> + _testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", null);
>> + _testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16");
>> + _testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE");
>> +
>> + _testHttpInvalid("application/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", null);
>> + _testHttpInvalid("application/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32");
>> + _testHttpInvalid("application/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32BE");
>> +
>> _testHttpInvalid("application/xml", "UTF-8-bom", "US-ASCII", "US-ASCII");
>> - _testHttpInvalid("application/xml;charset=UTF-16", "UTF-16LE", "UTF-8",
>> - "UTF-8");
>> - _testHttpInvalid("application/xml;charset=UTF-16", "no-bom",
>> - "UTF-16BE", "UTF-16BE");
>> + _testHttpInvalid("application/xml;charset=UTF-16", "UTF-16LE", "UTF-8", "UTF-8");
>> + _testHttpInvalid("application/xml;charset=UTF-16", "no-bom", "UTF-16BE", "UTF-16BE");
>> + _testHttpInvalid("application/xml;charset=UTF-32", "UTF-32LE", "UTF-8", "UTF-8");
>> + _testHttpInvalid("application/xml;charset=UTF-32", "no-bom", "UTF-32BE", "UTF-32BE");
>>
>> _testHttpValid("text/xml", "no-bom", "US-ASCII", null);
>> _testHttpValid("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", "UTF-8");
>> _testHttpValid("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", null);
>> - _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE",
>> - null);
>> - _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE",
>> - "UTF-16");
>> - _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE",
>> - "UTF-16BE");
>> + _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", null);
>> + _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16");
>> + _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE");
>> + _testHttpValid("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", null);
>> + _testHttpValid("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", "UTF-32");
>> + _testHttpValid("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", "UTF-32BE");
>> _testHttpValid("text/xml", "UTF-8-bom", "US-ASCII", null);
>>
>> - _testAlternateDefaultEncoding("application/xml", "UTF-8-bom", "UTF-8",
>> - null, null);
>> - _testAlternateDefaultEncoding("application/xml", "no-bom", "US-ASCII",
>> - null, "US-ASCII");
>> - _testAlternateDefaultEncoding("application/xml", "UTF-8-bom", "UTF-8",
>> - null, "UTF-8");
>> - _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", null,
>> - null);
>> - _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", null,
>> - "US-ASCII");
>> - _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", null,
>> - "UTF-8");
>> -
>> - _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom",
>> - "UTF-16BE", null);
>> - _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom",
>> - "UTF-16BE", "UTF-16");
>> - _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom",
>> - "UTF-16BE", "UTF-16BE");
>> - _testHttpInvalid("text/xml;charset=UTF-16", "no-bom", "UTF-16BE",
>> - "UTF-16BE");
>> + _testAlternateDefaultEncoding("application/xml", "UTF-8-bom", "UTF-8", null, null);
>> + _testAlternateDefaultEncoding("application/xml", "no-bom", "US-ASCII", null, "US-ASCII");
>> + _testAlternateDefaultEncoding("application/xml", "UTF-8-bom", "UTF-8", null, "UTF-8");
>> + _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", null, null);
>> + _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", null, "US-ASCII");
>> + _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", null, "UTF-8");
>> +
>> + _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", null);
>> + _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16");
>> + _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE");
>> + _testHttpInvalid("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", "UTF-16BE");
>> _testHttpInvalid("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", null);
>>
>> + _testHttpInvalid("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", null);
>> + _testHttpInvalid("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32");
>> + _testHttpInvalid("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32BE");
>> + _testHttpInvalid("text/xml;charset=UTF-32", "no-bom", "UTF-32BE", "UTF-32BE");
>> + _testHttpInvalid("text/xml;charset=UTF-32", "no-bom", "UTF-32BE", null);
>> +
>> _testHttpLenient("text/xml", "no-bom", "US-ASCII", null, "US-ASCII");
>> - _testHttpLenient("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8",
>> - "UTF-8", "UTF-8");
>> - _testHttpLenient("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", null,
>> - "UTF-8");
>> - _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE",
>> - null, "UTF-16BE");
>> - _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE",
>> - "UTF-16", "UTF-16");
>> - _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE",
>> - "UTF-16BE", "UTF-16BE");
>> + _testHttpLenient("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", "UTF-8", "UTF-8");
>> + _testHttpLenient("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", null, "UTF-8");
>> + _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", null, "UTF-16BE");
>> + _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16", "UTF-16");
>> + _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE", "UTF-16BE");
>> + _testHttpLenient("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", null, "UTF-32BE");
>> + _testHttpLenient("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", "UTF-32", "UTF-32");
>> + _testHttpLenient("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", "UTF-32BE", "UTF-32BE");
>> _testHttpLenient("text/xml", "UTF-8-bom", "US-ASCII", null, "US-ASCII");
>>
>> - _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom",
>> - "UTF-16BE", null, "UTF-16BE");
>> - _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom",
>> - "UTF-16BE", "UTF-16", "UTF-16");
>> - _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom",
>> - "UTF-16BE", "UTF-16BE", "UTF-16BE");
>> - _testHttpLenient("text/xml;charset=UTF-16", "no-bom", "UTF-16BE",
>> - "UTF-16BE", "UTF-16BE");
>> - _testHttpLenient("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", null,
>> - "UTF-16");
>> + _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", null, "UTF-16BE");
>> + _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16", "UTF-16");
>> + _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE", "UTF-16BE");
>> + _testHttpLenient("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", "UTF-16BE", "UTF-16BE");
>> + _testHttpLenient("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", null, "UTF-16");
>> +
>> + _testHttpLenient("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", null, "UTF-32BE");
>> + _testHttpLenient("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32", "UTF-32");
>> + _testHttpLenient("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32BE", "UTF-32BE");
>> + _testHttpLenient("text/xml;charset=UTF-32", "no-bom", "UTF-32BE", "UTF-32BE", "UTF-32BE");
>> + _testHttpLenient("text/xml;charset=UTF-32", "no-bom", "UTF-32BE", null, "UTF-32");
>>
>> - _testHttpLenient("text/html", "no-bom", "US-ASCII", "US-ASCII",
>> - "US-ASCII");
>> + _testHttpLenient("text/html", "no-bom", "US-ASCII", "US-ASCII", "US-ASCII");
>> _testHttpLenient("text/html", "no-bom", "US-ASCII", null, "US-ASCII");
>> - _testHttpLenient("text/html;charset=UTF-8", "no-bom", "US-ASCII",
>> - "UTF-8", "UTF-8");
>> - _testHttpLenient("text/html;charset=UTF-16BE", "no-bom", "US-ASCII",
>> - "UTF-8", "UTF-8");
>> + _testHttpLenient("text/html;charset=UTF-8", "no-bom", "US-ASCII", "UTF-8", "UTF-8");
>> + _testHttpLenient("text/html;charset=UTF-16BE", "no-bom", "US-ASCII", "UTF-8", "UTF-8");
>> + _testHttpLenient("text/html;charset=UTF-32BE", "no-bom", "US-ASCII", "UTF-8", "UTF-8");
>> }
>>
>> @Test
>>
>> Modified: commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderUtilitiesTest.java
>> URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderUtilitiesTest.java?rev=1346400&r1=1346399&r2=1346400&view=diff
>> ==============================================================================
>> --- commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderUtilitiesTest.java (original)
>> +++ commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderUtilitiesTest.java Tue Jun 5 14:48:01 2012
>> @@ -24,7 +24,6 @@ import static org.junit.Assert.fail;
>> import java.io.ByteArrayInputStream;
>> import java.io.IOException;
>>
>> -import org.junit.Ignore;
>> import org.junit.Test;
>>
>> /**
>> @@ -156,12 +155,13 @@ public class XmlStreamReaderUtilitiesTes
>>
>> /** BOM calculateRawEncoding() Test */
>> @Test
>> - @Ignore
>> + //@Ignore
>> public void testCalculateRawEncodingStandardUtf32() throws IOException {
>> // Standard BOM Checks BOM Other Default
>> + testCalculateRawEncodingStandard("UTF-8", "UTF-32BE", "UTF-32LE");
>> testCalculateRawEncodingStandard("UTF-32BE", "UTF-8", "UTF-32LE");
>> testCalculateRawEncodingStandard("UTF-32LE", "UTF-8", "UTF-32BE");
>> - }
>> +}
>>
>> private void testCalculateRawEncodingStandard(String bomEnc, String otherEnc, String defaultEnc) throws IOException {
>> // Expected BOM Guess XMLEnc Default
>> @@ -178,7 +178,7 @@ public class XmlStreamReaderUtilitiesTes
>>
>> /** Additional UTF-16 calculateRawEncoding() Test */
>> @Test
>> - public void testCalculateRawEncodingAdditonalkUTF16() throws IOException {
>> + public void testCalculateRawEncodingAdditonalUTF16() throws IOException {
>> // BOM Guess XML Default
>> checkRawError(RAWMGS1, "UTF-16BE", "UTF-16", null, null);
>> checkRawEncoding("UTF-16BE", "UTF-16BE", null, "UTF-16", null);
>> @@ -192,6 +192,22 @@ public class XmlStreamReaderUtilitiesTes
>> checkRawError(RAWMGS1, "UTF-16LE", "UTF-16LE", "UTF-16BE", null);
>> }
>>
>> + /** Additional UTF-32 calculateRawEncoding() Test */
>> + @Test
>> + public void testCalculateRawEncodingAdditonalUTF32() throws IOException {
>> + // BOM Guess XML Default
>> + checkRawError(RAWMGS1, "UTF-32BE", "UTF-32", null, null);
>> + checkRawEncoding("UTF-32BE", "UTF-32BE", null, "UTF-32", null);
>> + checkRawEncoding("UTF-32BE", "UTF-32BE", "UTF-32BE", "UTF-32", null);
>> + checkRawError(RAWMGS1, "UTF-32BE", null, "UTF-32LE", null);
>> + checkRawError(RAWMGS1, "UTF-32BE", "UTF-32BE", "UTF-32LE", null);
>> + checkRawError(RAWMGS1, "UTF-32LE", "UTF-32", null, null);
>> + checkRawEncoding("UTF-32LE", "UTF-32LE", null, "UTF-32", null);
>> + checkRawEncoding("UTF-32LE", "UTF-32LE", "UTF-32LE", "UTF-32", null);
>> + checkRawError(RAWMGS1, "UTF-32LE", null, "UTF-32BE", null);
>> + checkRawError(RAWMGS1, "UTF-32LE", "UTF-32LE", "UTF-32BE", null);
>> + }
>> +
>> private void checkRawEncoding(String expected,
>> String bomEnc, String xmlGuessEnc, String xmlEnc, String defaultEncoding) throws IOException {
>> StringBuilder builder = new StringBuilder();
>> @@ -207,8 +223,7 @@ public class XmlStreamReaderUtilitiesTes
>> protected String calculateRawEncoding(String bomEnc, String xmlGuessEnc, String xmlEnc,
>> String defaultEncoding) throws IOException {
>> MockXmlStreamReader mock = new MockXmlStreamReader(defaultEncoding);
>> - String encoding = mock.calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc);
>> - return encoding;
>> + return mock.calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc);
>> }
>>
>> private void checkRawError(String msgSuffix,
>> @@ -257,7 +272,7 @@ public class XmlStreamReaderUtilitiesTes
>>
>> /** Test calculate HTTP Encoding */
>> @Test
>> - @Ignore
>> + //@Ignore
>> public void testCalculateHttpEncodingUtf32() throws IOException {
>> // No BOM Expected Lenient cType BOM Guess XML Default
>> checkHttpEncoding("UTF-32LE", true, null, null, null, "UTF-32LE", null);
>> @@ -277,7 +292,7 @@ public class XmlStreamReaderUtilitiesTes
>> private void checkHttpEncoding(String expected, boolean lenient, String httpContentType,
>> String bomEnc, String xmlGuessEnc, String xmlEnc, String defaultEncoding) throws IOException {
>> StringBuilder builder = new StringBuilder();
>> - builder.append("HttpEncoding: ").append(bomEnc).append("], ");
>> + builder.append("HttpEncoding=[").append(bomEnc).append("], ");
>> builder.append("lenient=[").append(lenient).append("], ");
>> builder.append("httpContentType=[").append(httpContentType).append("], ");
>> builder.append("bomEnc=[").append(bomEnc).append("], ");
>> @@ -291,8 +306,7 @@ public class XmlStreamReaderUtilitiesTes
>> protected String calculateHttpEncoding(String httpContentType, String bomEnc, String xmlGuessEnc,
>> String xmlEnc, boolean lenient, String defaultEncoding) throws IOException {
>> MockXmlStreamReader mock = new MockXmlStreamReader(defaultEncoding);
>> - String encoding = mock.calculateHttpEncoding(httpContentType, bomEnc, xmlGuessEnc, xmlEnc, lenient);
>> - return encoding;
>> + return mock.calculateHttpEncoding(httpContentType, bomEnc, xmlGuessEnc, xmlEnc, lenient);
>> }
>>
>> private void checkHttpError(String msgSuffix, boolean lenient, String httpContentType,
>>
>> Modified: commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReader.java
>> URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReader.java?rev=1346400&r1=1346399&r2=1346400&view=diff
>> ==============================================================================
>> --- commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReader.java (original)
>> +++ commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReader.java Tue Jun 5 14:48:01 2012
>> @@ -74,6 +74,12 @@ public class XmlStreamReader extends Rea
>>
>> private static final String UTF_16 = "UTF-16";
>>
>> + private static final String UTF_32BE = "UTF-32BE";
>> +
>> + private static final String UTF_32LE = "UTF-32LE";
>> +
>> + private static final String UTF_32 = "UTF-32";
>> +
>> private static final String EBCDIC = "CP1047";
>>
>> private static String staticDefaultEncoding = null;
>> @@ -447,6 +453,10 @@ public class XmlStreamReader extends Rea
>> && (xmlGuessEnc.equals(UTF_16BE) || xmlGuessEnc
>> .equals(UTF_16LE))) {
>> encoding = xmlGuessEnc;
>> + } else if (xmlEnc.equals(UTF_32)
>> + && (xmlGuessEnc.equals(UTF_32BE) || xmlGuessEnc
>> + .equals(UTF_32LE))) {
>> + encoding = xmlGuessEnc;
>> } else {
>> encoding = xmlEnc;
>> }
>> @@ -474,6 +484,18 @@ public class XmlStreamReader extends Rea
>> bomEnc, xmlGuessEnc, xmlEnc, is);
>> }
>> encoding = bomEnc;
>> + } else if (bomEnc.equals(UTF_32BE) || bomEnc.equals(UTF_32LE)) {
>> + if (xmlGuessEnc != null && !xmlGuessEnc.equals(bomEnc)) {
>> + throw new XmlStreamReaderException(RAW_EX_1.format(new Object[] { bomEnc,
>> + xmlGuessEnc, xmlEnc }), bomEnc, xmlGuessEnc, xmlEnc, is);
>> + }
>> + if (xmlEnc != null && !xmlEnc.equals(UTF_32)
>> + && !xmlEnc.equals(bomEnc)) {
>> + throw new XmlStreamReaderException(RAW_EX_1
>> + .format(new Object[] { bomEnc, xmlGuessEnc, xmlEnc }),
>> + bomEnc, xmlGuessEnc, xmlEnc, is);
>> + }
>> + encoding = bomEnc;
>> } else {
>> throw new XmlStreamReaderException(RAW_EX_2.format(new Object[] {
>> bomEnc, xmlGuessEnc, xmlEnc }), bomEnc, xmlGuessEnc,
>> @@ -516,6 +538,21 @@ public class XmlStreamReader extends Rea
>> xmlGuessEnc, xmlEnc }), cTMime, cTEnc,
>> bomEnc, xmlGuessEnc, xmlEnc, is);
>> }
>> + } else if (bomEnc != null
>> + && (cTEnc.equals(UTF_32BE) || cTEnc.equals(UTF_32LE))) {
>> + throw new XmlStreamReaderException(HTTP_EX_1
>> + .format(new Object[] { cTMime, cTEnc, bomEnc,
>> + xmlGuessEnc, xmlEnc }), cTMime, cTEnc,
>> + bomEnc, xmlGuessEnc, xmlEnc, is);
>> + } else if (cTEnc.equals(UTF_32)) {
>> + if (bomEnc != null && bomEnc.startsWith(UTF_32)) {
>> + encoding = bomEnc;
>> + } else {
>> + throw new XmlStreamReaderException(HTTP_EX_2
>> + .format(new Object[] { cTMime, cTEnc, bomEnc,
>> + xmlGuessEnc, xmlEnc }), cTMime, cTEnc,
>> + bomEnc, xmlGuessEnc, xmlEnc, is);
>> + }
>> } else {
>> encoding = cTEnc;
>> }
>>
>> Modified: commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReaderUtilitiesCompatibilityTest.java
>> URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReaderUtilitiesCompatibilityTest.java?rev=1346400&r1=1346399&r2=1346400&view=diff
>> ==============================================================================
>> --- commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReaderUtilitiesCompatibilityTest.java (original)
>> +++ commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReaderUtilitiesCompatibilityTest.java Tue Jun 5 14:48:01 2012
>> @@ -36,11 +36,10 @@ public class XmlStreamReaderUtilitiesCom
>> protected String calculateHttpEncoding(String httpContentType, String bomEnc, String xmlGuessEnc,
>> String xmlEnc, boolean lenient, String defaultEncoding) throws IOException {
>> MockXmlStreamReader mock = new MockXmlStreamReader(defaultEncoding);
>> - String encoding = mock.calculateHttpEncoding(
>> + return mock.calculateHttpEncoding(
>> XmlStreamReader.getContentTypeMime(httpContentType),
>> XmlStreamReader.getContentTypeEncoding(httpContentType),
>> bomEnc, xmlGuessEnc, xmlEnc, null, lenient);
>> - return encoding;
>> }
>>
>> /** Mock {@link XmlStreamReader} implementation */
>>
>>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: dev-unsubscribe@commons.apache.org
> For additional commands, e-mail: dev-help@commons.apache.org
>
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@commons.apache.org
For additional commands, e-mail: dev-help@commons.apache.org
Re: svn commit: r1346400 - in /commons/proper/io/trunk/src: changes/
main/java/org/apache/commons/io/input/ test/java/org/apache/commons/io/input/ test/java/org/apache/commons/io/input/compatibility/
Posted by sebb <se...@gmail.com>.
On 5 June 2012 15:48, <gg...@apache.org> wrote:
> Author: ggregory
> Date: Tue Jun 5 14:48:01 2012
> New Revision: 1346400
>
> URL: http://svn.apache.org/viewvc?rev=1346400&view=rev
> Log:
> [IO-320] Add XmlStreamReader support for UTF-32.
> [IO-331] BOMInputStream wrongly detects UTF-32LE_BOM files as UTF-16LE_BOM files in method getBOM().
Please try to keep commits to a single fix.
>
> Modified:
> commons/proper/io/trunk/src/changes/changes.xml
> commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/BOMInputStream.java
> commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/XmlStreamReader.java
> commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java
> commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderUtilitiesTest.java
> commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReader.java
> commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReaderUtilitiesCompatibilityTest.java
>
> Modified: commons/proper/io/trunk/src/changes/changes.xml
> URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/changes/changes.xml?rev=1346400&r1=1346399&r2=1346400&view=diff
> ==============================================================================
> --- commons/proper/io/trunk/src/changes/changes.xml (original)
> +++ commons/proper/io/trunk/src/changes/changes.xml Tue Jun 5 14:48:01 2012
> @@ -47,6 +47,12 @@ The <action> type attribute can be add,u
> <body>
> <!-- The release date is the date RC is cut -->
> <release version="2.4" date="2012-TDB-TDB" description="">
> + <action issue="IO-320" dev="ggregory" type="add">
> + Add XmlStreamReader support for UTF-32.
> + </action>
> + <action issue="IO-331" dev="ggregory" type="add">
> + BOMInputStream wrongly detects UTF-32LE_BOM files as UTF-16LE_BOM files in method getBOM().
> + </action>
> <action issue="IO-332" dev="ggregory" type="fix" due-to="liangly">
> Improve tailer's reading performance.
> </action>
>
> Modified: commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/BOMInputStream.java
> URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/BOMInputStream.java?rev=1346400&r1=1346399&r2=1346400&view=diff
> ==============================================================================
> --- commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/BOMInputStream.java (original)
> +++ commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/BOMInputStream.java Tue Jun 5 14:48:01 2012
> @@ -19,54 +19,66 @@ package org.apache.commons.io.input;
> import java.io.IOException;
> import java.io.InputStream;
> import java.util.Arrays;
> +import java.util.Comparator;
> import java.util.List;
>
> import org.apache.commons.io.ByteOrderMark;
>
> /**
> - * This class is used to wrap a stream that includes an encoded
> - * {@link ByteOrderMark} as its first bytes.
> - *
> - * This class detects these bytes and, if required, can automatically skip them
> - * and return the subsequent byte as the first byte in the stream.
> - *
> + * This class is used to wrap a stream that includes an encoded {@link ByteOrderMark} as its first bytes.
> + *
> + * This class detects these bytes and, if required, can automatically skip them and return the subsequent byte as the
> + * first byte in the stream.
> + *
> * The {@link ByteOrderMark} implementation has the following pre-defined BOMs:
> * <ul>
> - * <li>UTF-8 - {@link ByteOrderMark#UTF_8}</li>
> - * <li>UTF-16BE - {@link ByteOrderMark#UTF_16LE}</li>
> - * <li>UTF-16LE - {@link ByteOrderMark#UTF_16BE}</li>
> + * <li>UTF-8 - {@link ByteOrderMark#UTF_8}</li>
> + * <li>UTF-16BE - {@link ByteOrderMark#UTF_16LE}</li>
> + * <li>UTF-16LE - {@link ByteOrderMark#UTF_16BE}</li>
> + * <li>UTF-32BE - {@link ByteOrderMark#UTF_32LE}</li>
> + * <li>UTF-32LE - {@link ByteOrderMark#UTF_32BE}</li>
> * </ul>
> - *
> - *
> + *
> + *
> * <h3>Example 1 - Detect and exclude a UTF-8 BOM</h3>
> + *
> * <pre>
> - * BOMInputStream bomIn = new BOMInputStream(in);
> - * if (bomIn.hasBOM()) {
> - * // has a UTF-8 BOM
> - * }
> + * BOMInputStream bomIn = new BOMInputStream(in);
> + * if (bomIn.hasBOM()) {
> + * // has a UTF-8 BOM
> + * }
> * </pre>
> - *
> + *
> * <h3>Example 2 - Detect a UTF-8 BOM (but don't exclude it)</h3>
> + *
> * <pre>
> - * boolean include = true;
> - * BOMInputStream bomIn = new BOMInputStream(in, include);
> - * if (bomIn.hasBOM()) {
> - * // has a UTF-8 BOM
> - * }
> + * boolean include = true;
> + * BOMInputStream bomIn = new BOMInputStream(in, include);
> + * if (bomIn.hasBOM()) {
> + * // has a UTF-8 BOM
> + * }
> * </pre>
> - *
> + *
> * <h3>Example 3 - Detect Multiple BOMs</h3>
> + *
> * <pre>
> - * BOMInputStream bomIn = new BOMInputStream(in, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE);
> - * if (bomIn.hasBOM() == false) {
> - * // No BOM found
> - * } else if (bomIn.hasBOM(ByteOrderMark.UTF_16LE)) {
> - * // has a UTF-16LE BOM
> - * } else if (bomIn.hasBOM(ByteOrderMark.UTF_16BE)) {
> - * // has a UTF-16BE BOM
> - * }
> + * BOMInputStream bomIn = new BOMInputStream(in,
> + * ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE,
> + * ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE
> + * );
> + * if (bomIn.hasBOM() == false) {
> + * // No BOM found
> + * } else if (bomIn.hasBOM(ByteOrderMark.UTF_16LE)) {
> + * // has a UTF-16LE BOM
> + * } else if (bomIn.hasBOM(ByteOrderMark.UTF_16BE)) {
> + * // has a UTF-16BE BOM
> + * } else if (bomIn.hasBOM(ByteOrderMark.UTF_32LE)) {
> + * // has a UTF-32LE BOM
> + * } else if (bomIn.hasBOM(ByteOrderMark.UTF_32BE)) {
> + * // has a UTF-32BE BOM
> + * }
> * </pre>
> - *
> + *
> * @see org.apache.commons.io.ByteOrderMark
> * @see <a href="http://en.wikipedia.org/wiki/Byte_order_mark">Wikipedia - Byte Order Mark</a>
> * @version $Id$
> @@ -74,6 +86,9 @@ import org.apache.commons.io.ByteOrderMa
> */
> public class BOMInputStream extends ProxyInputStream {
> private final boolean include;
> + /**
> + * BOMs are sorted from longest to shortest.
> + */
> private final List<ByteOrderMark> boms;
> private ByteOrderMark byteOrderMark;
> private int[] firstBytes;
> @@ -83,42 +98,66 @@ public class BOMInputStream extends Prox
> private boolean markedAtStart;
>
> /**
> - * Constructs a new BOM InputStream that excludes
> - * a {@link ByteOrderMark#UTF_8} BOM.
> - * @param delegate the InputStream to delegate to
> + * Constructs a new BOM InputStream that excludes a {@link ByteOrderMark#UTF_8} BOM.
> + *
> + * @param delegate
> + * the InputStream to delegate to
> */
> public BOMInputStream(InputStream delegate) {
> this(delegate, false, ByteOrderMark.UTF_8);
> }
>
> /**
> - * Constructs a new BOM InputStream that detects a
> - * a {@link ByteOrderMark#UTF_8} and optionally includes it.
> - * @param delegate the InputStream to delegate to
> - * @param include true to include the UTF-8 BOM or
> - * false to exclude it
> + * Constructs a new BOM InputStream that detects a a {@link ByteOrderMark#UTF_8} and optionally includes it.
> + *
> + * @param delegate
> + * the InputStream to delegate to
> + * @param include
> + * true to include the UTF-8 BOM or false to exclude it
> */
> public BOMInputStream(InputStream delegate, boolean include) {
> this(delegate, include, ByteOrderMark.UTF_8);
> }
>
> /**
> - * Constructs a new BOM InputStream that excludes
> - * the specified BOMs.
> - * @param delegate the InputStream to delegate to
> - * @param boms The BOMs to detect and exclude
> + * Constructs a new BOM InputStream that excludes the specified BOMs.
> + *
> + * @param delegate
> + * the InputStream to delegate to
> + * @param boms
> + * The BOMs to detect and exclude
> */
> public BOMInputStream(InputStream delegate, ByteOrderMark... boms) {
> this(delegate, false, boms);
> }
>
> /**
> - * Constructs a new BOM InputStream that detects the
> - * specified BOMs and optionally includes them.
> - * @param delegate the InputStream to delegate to
> - * @param include true to include the specified BOMs or
> - * false to exclude them
> - * @param boms The BOMs to detect and optionally exclude
> + * Compares ByteOrderMark objects in descending length order.
> + */
> + private static final Comparator<ByteOrderMark> ByteOrderMarkLengthComparator = new Comparator<ByteOrderMark>() {
> +
> + public int compare(ByteOrderMark bom1, ByteOrderMark bom2) {
> + int len1 = bom1.length();
> + int len2 = bom2.length();
> + if (len1 > len2) {
> + return -1;
> + }
> + if (len2 > len1) {
> + return 1;
> + }
> + return 0;
> + }
> + };
> +
> + /**
> + * Constructs a new BOM InputStream that detects the specified BOMs and optionally includes them.
> + *
> + * @param delegate
> + * the InputStream to delegate to
> + * @param include
> + * true to include the specified BOMs or false to exclude them
> + * @param boms
> + * The BOMs to detect and optionally exclude
> */
> public BOMInputStream(InputStream delegate, boolean include, ByteOrderMark... boms) {
> super(delegate);
> @@ -126,15 +165,18 @@ public class BOMInputStream extends Prox
> throw new IllegalArgumentException("No BOMs specified");
> }
> this.include = include;
> + // Sort the BOMs to match the longest BOM first because some BOMs have the same starting two bytes.
> + Arrays.sort(boms, ByteOrderMarkLengthComparator);
> this.boms = Arrays.asList(boms);
> +
> }
>
> /**
> * Indicates whether the stream contains one of the specified BOMs.
> - *
> - * @return true if the stream has one of the specified BOMs, otherwise false
> - * if it does not
> - * @throws IOException if an error reading the first bytes of the stream occurs
> + *
> + * @return true if the stream has one of the specified BOMs, otherwise false if it does not
> + * @throws IOException
> + * if an error reading the first bytes of the stream occurs
> */
> public boolean hasBOM() throws IOException {
> return getBOM() != null;
> @@ -142,13 +184,14 @@ public class BOMInputStream extends Prox
>
> /**
> * Indicates whether the stream contains the specified BOM.
> - *
> - * @param bom The BOM to check for
> - * @return true if the stream has the specified BOM, otherwise false
> - * if it does not
> - * @throws IllegalArgumentException if the BOM is not one the stream
> - * is configured to detect
> - * @throws IOException if an error reading the first bytes of the stream occurs
> + *
> + * @param bom
> + * The BOM to check for
> + * @return true if the stream has the specified BOM, otherwise false if it does not
> + * @throws IllegalArgumentException
> + * if the BOM is not one the stream is configured to detect
> + * @throws IOException
> + * if an error reading the first bytes of the stream occurs
> */
> public boolean hasBOM(ByteOrderMark bom) throws IOException {
> if (!boms.contains(bom)) {
> @@ -159,31 +202,34 @@ public class BOMInputStream extends Prox
>
> /**
> * Return the BOM (Byte Order Mark).
> - *
> + *
> * @return The BOM or null if none
> - * @throws IOException if an error reading the first bytes of the stream occurs
> + * @throws IOException
> + * if an error reading the first bytes of the stream occurs
> */
> public ByteOrderMark getBOM() throws IOException {
> if (firstBytes == null) {
> fbLength = 0;
> - int max = 0;
> - for (ByteOrderMark bom : boms) {
> - max = Math.max(max, bom.length());
> - }
> - firstBytes = new int[max];
> + // BOMs are sorted from longest to shortest
> + final int maxBomSize = boms.get(0).length();
> + firstBytes = new int[maxBomSize];
> + // Read first maxBomSize bytes
> for (int i = 0; i < firstBytes.length; i++) {
> firstBytes[i] = in.read();
> fbLength++;
> if (firstBytes[i] < 0) {
> break;
> }
> -
> - byteOrderMark = find();
> - if (byteOrderMark != null) {
> - if (!include) {
> + }
> + // match BOM in firstBytes
> + byteOrderMark = find();
> + if (byteOrderMark != null) {
> + if (!include) {
> + if (byteOrderMark.length() < firstBytes.length) {
> + fbIndex = byteOrderMark.length();
> + } else {
> fbLength = 0;
> }
> - break;
> }
> }
> }
> @@ -192,9 +238,10 @@ public class BOMInputStream extends Prox
>
> /**
> * Return the BOM charset Name - {@link ByteOrderMark#getCharsetName()}.
> - *
> + *
> * @return The BOM charset Name or null if no BOM found
> - * @throws IOException if an error reading the first bytes of the stream occurs
> + * @throws IOException
> + * if an error reading the first bytes of the stream occurs
> *
> */
> public String getBOMCharsetName() throws IOException {
> @@ -203,12 +250,13 @@ public class BOMInputStream extends Prox
> }
>
> /**
> - * This method reads and either preserves or skips the first bytes in the
> - * stream. It behaves like the single-byte <code>read()</code> method,
> - * either returning a valid byte or -1 to indicate that the initial bytes
> - * have been processed already.
> + * This method reads and either preserves or skips the first bytes in the stream. It behaves like the single-byte
> + * <code>read()</code> method, either returning a valid byte or -1 to indicate that the initial bytes have been
> + * processed already.
> + *
> * @return the byte read (excluding BOM) or -1 if the end of stream
> - * @throws IOException if an I/O error occurs
> + * @throws IOException
> + * if an I/O error occurs
> */
> private int readFirstBytes() throws IOException {
> getBOM();
> @@ -217,7 +265,7 @@ public class BOMInputStream extends Prox
>
> /**
> * Find a BOM with the specified bytes.
> - *
> + *
> * @return The matched BOM or null if none matched
> */
> private ByteOrderMark find() {
> @@ -231,14 +279,16 @@ public class BOMInputStream extends Prox
>
> /**
> * Check if the bytes match a BOM.
> - *
> - * @param bom The BOM
> + *
> + * @param bom
> + * The BOM
> * @return true if the bytes match the bom, otherwise false
> */
> private boolean matches(ByteOrderMark bom) {
> - if (bom.length() != fbLength) {
> - return false;
> - }
> + // if (bom.length() != fbLength) {
> + // return false;
> + // }
> + // firstBytes may be bigger than the BOM bytes
> for (int i = 0; i < bom.length(); i++) {
> if (bom.get(i) != firstBytes[i]) {
> return false;
> @@ -247,15 +297,16 @@ public class BOMInputStream extends Prox
> return true;
> }
>
> - //----------------------------------------------------------------------------
> - // Implementation of InputStream
> - //----------------------------------------------------------------------------
> + // ----------------------------------------------------------------------------
> + // Implementation of InputStream
> + // ----------------------------------------------------------------------------
>
> /**
> - * Invokes the delegate's <code>read()</code> method, detecting and
> - * optionally skipping BOM.
> + * Invokes the delegate's <code>read()</code> method, detecting and optionally skipping BOM.
> + *
> * @return the byte read (excluding BOM) or -1 if the end of stream
> - * @throws IOException if an I/O error occurs
> + * @throws IOException
> + * if an I/O error occurs
> */
> @Override
> public int read() throws IOException {
> @@ -264,13 +315,17 @@ public class BOMInputStream extends Prox
> }
>
> /**
> - * Invokes the delegate's <code>read(byte[], int, int)</code> method, detecting
> - * and optionally skipping BOM.
> - * @param buf the buffer to read the bytes into
> - * @param off The start offset
> - * @param len The number of bytes to read (excluding BOM)
> + * Invokes the delegate's <code>read(byte[], int, int)</code> method, detecting and optionally skipping BOM.
> + *
> + * @param buf
> + * the buffer to read the bytes into
> + * @param off
> + * The start offset
> + * @param len
> + * The number of bytes to read (excluding BOM)
> * @return the number of bytes read or -1 if the end of stream
> - * @throws IOException if an I/O error occurs
> + * @throws IOException
> + * if an I/O error occurs
> */
> @Override
> public int read(byte[] buf, int off, int len) throws IOException {
> @@ -289,12 +344,13 @@ public class BOMInputStream extends Prox
> }
>
> /**
> - * Invokes the delegate's <code>read(byte[])</code> method, detecting and
> - * optionally skipping BOM.
> - * @param buf the buffer to read the bytes into
> - * @return the number of bytes read (excluding BOM)
> - * or -1 if the end of stream
> - * @throws IOException if an I/O error occurs
> + * Invokes the delegate's <code>read(byte[])</code> method, detecting and optionally skipping BOM.
> + *
> + * @param buf
> + * the buffer to read the bytes into
> + * @return the number of bytes read (excluding BOM) or -1 if the end of stream
> + * @throws IOException
> + * if an I/O error occurs
> */
> @Override
> public int read(byte[] buf) throws IOException {
> @@ -303,7 +359,9 @@ public class BOMInputStream extends Prox
>
> /**
> * Invokes the delegate's <code>mark(int)</code> method.
> - * @param readlimit read ahead limit
> + *
> + * @param readlimit
> + * read ahead limit
> */
> @Override
> public synchronized void mark(int readlimit) {
> @@ -314,7 +372,9 @@ public class BOMInputStream extends Prox
>
> /**
> * Invokes the delegate's <code>reset()</code> method.
> - * @throws IOException if an I/O error occurs
> + *
> + * @throws IOException
> + * if an I/O error occurs
> */
> @Override
> public synchronized void reset() throws IOException {
> @@ -327,11 +387,13 @@ public class BOMInputStream extends Prox
> }
>
> /**
> - * Invokes the delegate's <code>skip(long)</code> method, detecting
> - * and optionallyskipping BOM.
> - * @param n the number of bytes to skip
> + * Invokes the delegate's <code>skip(long)</code> method, detecting and optionallyskipping BOM.
> + *
> + * @param n
> + * the number of bytes to skip
> * @return the number of bytes to skipped or -1 if the end of stream
> - * @throws IOException if an I/O error occurs
> + * @throws IOException
> + * if an I/O error occurs
> */
> @Override
> public long skip(long n) throws IOException {
>
> Modified: commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/XmlStreamReader.java
> URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/XmlStreamReader.java?rev=1346400&r1=1346399&r2=1346400&view=diff
> ==============================================================================
> --- commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/XmlStreamReader.java (original)
> +++ commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/XmlStreamReader.java Tue Jun 5 14:48:01 2012
> @@ -74,23 +74,36 @@ public class XmlStreamReader extends Rea
>
> private static final String UTF_16LE = "UTF-16LE";
>
> + private static final String UTF_32BE = "UTF-32BE";
> +
> + private static final String UTF_32LE = "UTF-32LE";
> +
> private static final String UTF_16 = "UTF-16";
>
> + private static final String UTF_32 = "UTF-32";
> +
> private static final String EBCDIC = "CP1047";
>
> private static final ByteOrderMark[] BOMS = new ByteOrderMark[] {
> ByteOrderMark.UTF_8,
> ByteOrderMark.UTF_16BE,
> - ByteOrderMark.UTF_16LE
> + ByteOrderMark.UTF_16LE,
> + ByteOrderMark.UTF_32BE,
> + ByteOrderMark.UTF_32LE
> };
> +
> + // UTF_16LE and UTF_32LE have the same two starting BOM bytes.
> private static final ByteOrderMark[] XML_GUESS_BYTES = new ByteOrderMark[] {
> new ByteOrderMark(UTF_8, 0x3C, 0x3F, 0x78, 0x6D),
> new ByteOrderMark(UTF_16BE, 0x00, 0x3C, 0x00, 0x3F),
> new ByteOrderMark(UTF_16LE, 0x3C, 0x00, 0x3F, 0x00),
> + new ByteOrderMark(UTF_32BE, 0x00, 0x00, 0x00, 0x3C,
> + 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x6D),
> + new ByteOrderMark(UTF_32LE, 0x3C, 0x00, 0x00, 0x00,
> + 0x3F, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x6D, 0x00, 0x00, 0x00),
> new ByteOrderMark(EBCDIC, 0x4C, 0x6F, 0xA7, 0x94)
> };
>
> -
> private final Reader reader;
>
> private final String encoding;
> @@ -532,6 +545,19 @@ public class XmlStreamReader extends Rea
> return bomEnc;
> }
>
> + // BOM is UTF-32BE or UTF-32LE
> + if (bomEnc.equals(UTF_32BE) || bomEnc.equals(UTF_32LE)) {
> + if (xmlGuessEnc != null && !xmlGuessEnc.equals(bomEnc)) {
> + String msg = MessageFormat.format(RAW_EX_1, new Object[] { bomEnc, xmlGuessEnc, xmlEnc });
> + throw new XmlStreamReaderException(msg, bomEnc, xmlGuessEnc, xmlEnc);
> + }
> + if (xmlEnc != null && !xmlEnc.equals(UTF_32) && !xmlEnc.equals(bomEnc)) {
> + String msg = MessageFormat.format(RAW_EX_1, new Object[] { bomEnc, xmlGuessEnc, xmlEnc });
> + throw new XmlStreamReaderException(msg, bomEnc, xmlGuessEnc, xmlEnc);
> + }
> + return bomEnc;
> + }
> +
> // BOM is something else
> String msg = MessageFormat.format(RAW_EX_2, new Object[] { bomEnc, xmlGuessEnc, xmlEnc });
> throw new XmlStreamReaderException(msg, bomEnc, xmlGuessEnc, xmlEnc);
> @@ -598,6 +624,24 @@ public class XmlStreamReader extends Rea
> throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
> }
>
> + // UTF-32BE or UTF-132E content type encoding
> + if (cTEnc.equals(UTF_32BE) || cTEnc.equals(UTF_32LE)) {
> + if (bomEnc != null) {
> + String msg = MessageFormat.format(HTTP_EX_1, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
> + throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
> + }
> + return cTEnc;
> + }
> +
> + // UTF-32 content type encoding
> + if (cTEnc.equals(UTF_32)) {
> + if (bomEnc != null && bomEnc.startsWith(UTF_32)) {
> + return bomEnc;
> + }
> + String msg = MessageFormat.format(HTTP_EX_2, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
> + throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
> + }
> +
> return cTEnc;
> }
>
>
> Modified: commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java
> URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java?rev=1346400&r1=1346399&r2=1346400&view=diff
> ==============================================================================
> --- commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java (original)
> +++ commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java Tue Jun 5 14:48:01 2012
> @@ -31,7 +31,6 @@ import java.util.HashMap;
> import java.util.Map;
>
> import org.apache.commons.io.IOUtils;
> -import org.junit.Ignore;
> import org.junit.Test;
>
> /**
> @@ -96,13 +95,11 @@ public class XmlStreamReaderTest {
> }
>
> @Test
> - @Ignore
> public void testRawNoBomUtf32BE() throws Exception {
> _testRawNoBomValid("UTF-32BE");
> }
>
> @Test
> - @Ignore
> public void testRawNoBomUtf32LE() throws Exception {
> _testRawNoBomValid("UTF-32LE");
> }
> @@ -121,7 +118,7 @@ public class XmlStreamReaderTest {
> InputStream is = getXmlStream(encoding + "-bom", XML3, encoding,
> encoding);
> XmlStreamReader xmlReader = new XmlStreamReader(is, false);
> - if (!encoding.equals("UTF-16")) {
> + if (!encoding.equals("UTF-16") && !encoding.equals("UTF-32")) {
> assertEquals(xmlReader.getEncoding(), encoding);
> } else {
> assertEquals(xmlReader.getEncoding()
> @@ -135,7 +132,7 @@ public class XmlStreamReaderTest {
> try {
> XmlStreamReader xmlReader = new XmlStreamReader(is, false);
> String foundEnc = xmlReader.getEncoding();
> - fail("It should have failed for BOM " + bomEnc + ", streamEnc "
> + fail("Expected IOException for BOM " + bomEnc + ", streamEnc "
> + streamEnc + " and prologEnc " + prologEnc + ": found "
> + foundEnc);
> } catch (IOException ex) {
> @@ -154,6 +151,9 @@ public class XmlStreamReaderTest {
> _testRawBomInvalid("UTF-16BE-bom", "UTF-16BE", "UTF-16LE");
> _testRawBomInvalid("UTF-16LE-bom", "UTF-16LE", "UTF-16BE");
> _testRawBomInvalid("UTF-16LE-bom", "UTF-16LE", "UTF-8");
> + _testRawBomInvalid("UTF-32BE-bom", "UTF-32BE", "UTF-32LE");
> + _testRawBomInvalid("UTF-32LE-bom", "UTF-32LE", "UTF-32BE");
> + _testRawBomInvalid("UTF-32LE-bom", "UTF-32LE", "UTF-8");
> }
>
> @Test
> @@ -168,114 +168,105 @@ public class XmlStreamReaderTest {
> }
>
> @Test
> - @Ignore
> public void testRawBomUtf32() throws Exception {
> _testRawBomValid("UTF-32BE");
> _testRawBomValid("UTF-32LE");
> _testRawBomValid("UTF-32");
> - }
> +
> + _testRawBomInvalid("UTF-32BE-bom", "UTF-32BE", "UTF-32LE");
> + _testRawBomInvalid("UTF-32LE-bom", "UTF-32LE", "UTF-32BE");
> + _testRawBomInvalid("UTF-32LE-bom", "UTF-32LE", "UTF-8");
> +}
>
>
> @Test
> public void testHttp() throws Exception {
> // niallp 2010-10-06 - remove following 2 tests - I reinstated
> - // checks for non-UTF-16 encodings (18 tests) and these failed
> - //_testHttpValid("application/xml", "no-bom", "US-ASCII", null);
> - //_testHttpValid("application/xml", "UTF-8-bom", "US-ASCII", null);
> + // checks for non-UTF-16 encodings (18 tests) and these failed
> + // _testHttpValid("application/xml", "no-bom", "US-ASCII", null);
> + // _testHttpValid("application/xml", "UTF-8-bom", "US-ASCII", null);
> _testHttpValid("application/xml", "UTF-8-bom", "UTF-8", null);
> _testHttpValid("application/xml", "UTF-8-bom", "UTF-8", "UTF-8");
> - _testHttpValid("application/xml;charset=UTF-8", "UTF-8-bom", "UTF-8",
> - null);
> - _testHttpValid("application/xml;charset=\"UTF-8\"", "UTF-8-bom",
> - "UTF-8", null);
> - _testHttpValid("application/xml;charset='UTF-8'", "UTF-8-bom", "UTF-8",
> - null);
> - _testHttpValid("application/xml;charset=UTF-8", "UTF-8-bom", "UTF-8",
> - "UTF-8");
> - _testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom",
> - "UTF-16BE", null);
> - _testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom",
> - "UTF-16BE", "UTF-16");
> - _testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom",
> - "UTF-16BE", "UTF-16BE");
> -
> - _testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom",
> - "UTF-16BE", null);
> - _testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom",
> - "UTF-16BE", "UTF-16");
> - _testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom",
> - "UTF-16BE", "UTF-16BE");
> + _testHttpValid("application/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", null);
> + _testHttpValid("application/xml;charset=\"UTF-8\"", "UTF-8-bom", "UTF-8", null);
> + _testHttpValid("application/xml;charset='UTF-8'", "UTF-8-bom", "UTF-8", null);
> + _testHttpValid("application/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", "UTF-8");
> + _testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", null);
> + _testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16");
> + _testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE");
> +
> + _testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", null);
> + _testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16");
> + _testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE");
> +
> + _testHttpInvalid("application/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", null);
> + _testHttpInvalid("application/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32");
> + _testHttpInvalid("application/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32BE");
> +
> _testHttpInvalid("application/xml", "UTF-8-bom", "US-ASCII", "US-ASCII");
> - _testHttpInvalid("application/xml;charset=UTF-16", "UTF-16LE", "UTF-8",
> - "UTF-8");
> - _testHttpInvalid("application/xml;charset=UTF-16", "no-bom",
> - "UTF-16BE", "UTF-16BE");
> + _testHttpInvalid("application/xml;charset=UTF-16", "UTF-16LE", "UTF-8", "UTF-8");
> + _testHttpInvalid("application/xml;charset=UTF-16", "no-bom", "UTF-16BE", "UTF-16BE");
> + _testHttpInvalid("application/xml;charset=UTF-32", "UTF-32LE", "UTF-8", "UTF-8");
> + _testHttpInvalid("application/xml;charset=UTF-32", "no-bom", "UTF-32BE", "UTF-32BE");
>
> _testHttpValid("text/xml", "no-bom", "US-ASCII", null);
> _testHttpValid("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", "UTF-8");
> _testHttpValid("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", null);
> - _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE",
> - null);
> - _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE",
> - "UTF-16");
> - _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE",
> - "UTF-16BE");
> + _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", null);
> + _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16");
> + _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE");
> + _testHttpValid("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", null);
> + _testHttpValid("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", "UTF-32");
> + _testHttpValid("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", "UTF-32BE");
> _testHttpValid("text/xml", "UTF-8-bom", "US-ASCII", null);
>
> - _testAlternateDefaultEncoding("application/xml", "UTF-8-bom", "UTF-8",
> - null, null);
> - _testAlternateDefaultEncoding("application/xml", "no-bom", "US-ASCII",
> - null, "US-ASCII");
> - _testAlternateDefaultEncoding("application/xml", "UTF-8-bom", "UTF-8",
> - null, "UTF-8");
> - _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", null,
> - null);
> - _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", null,
> - "US-ASCII");
> - _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", null,
> - "UTF-8");
> -
> - _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom",
> - "UTF-16BE", null);
> - _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom",
> - "UTF-16BE", "UTF-16");
> - _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom",
> - "UTF-16BE", "UTF-16BE");
> - _testHttpInvalid("text/xml;charset=UTF-16", "no-bom", "UTF-16BE",
> - "UTF-16BE");
> + _testAlternateDefaultEncoding("application/xml", "UTF-8-bom", "UTF-8", null, null);
> + _testAlternateDefaultEncoding("application/xml", "no-bom", "US-ASCII", null, "US-ASCII");
> + _testAlternateDefaultEncoding("application/xml", "UTF-8-bom", "UTF-8", null, "UTF-8");
> + _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", null, null);
> + _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", null, "US-ASCII");
> + _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", null, "UTF-8");
> +
> + _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", null);
> + _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16");
> + _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE");
> + _testHttpInvalid("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", "UTF-16BE");
> _testHttpInvalid("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", null);
>
> + _testHttpInvalid("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", null);
> + _testHttpInvalid("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32");
> + _testHttpInvalid("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32BE");
> + _testHttpInvalid("text/xml;charset=UTF-32", "no-bom", "UTF-32BE", "UTF-32BE");
> + _testHttpInvalid("text/xml;charset=UTF-32", "no-bom", "UTF-32BE", null);
> +
> _testHttpLenient("text/xml", "no-bom", "US-ASCII", null, "US-ASCII");
> - _testHttpLenient("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8",
> - "UTF-8", "UTF-8");
> - _testHttpLenient("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", null,
> - "UTF-8");
> - _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE",
> - null, "UTF-16BE");
> - _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE",
> - "UTF-16", "UTF-16");
> - _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE",
> - "UTF-16BE", "UTF-16BE");
> + _testHttpLenient("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", "UTF-8", "UTF-8");
> + _testHttpLenient("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", null, "UTF-8");
> + _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", null, "UTF-16BE");
> + _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16", "UTF-16");
> + _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE", "UTF-16BE");
> + _testHttpLenient("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", null, "UTF-32BE");
> + _testHttpLenient("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", "UTF-32", "UTF-32");
> + _testHttpLenient("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", "UTF-32BE", "UTF-32BE");
> _testHttpLenient("text/xml", "UTF-8-bom", "US-ASCII", null, "US-ASCII");
>
> - _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom",
> - "UTF-16BE", null, "UTF-16BE");
> - _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom",
> - "UTF-16BE", "UTF-16", "UTF-16");
> - _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom",
> - "UTF-16BE", "UTF-16BE", "UTF-16BE");
> - _testHttpLenient("text/xml;charset=UTF-16", "no-bom", "UTF-16BE",
> - "UTF-16BE", "UTF-16BE");
> - _testHttpLenient("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", null,
> - "UTF-16");
> + _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", null, "UTF-16BE");
> + _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16", "UTF-16");
> + _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE", "UTF-16BE");
> + _testHttpLenient("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", "UTF-16BE", "UTF-16BE");
> + _testHttpLenient("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", null, "UTF-16");
> +
> + _testHttpLenient("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", null, "UTF-32BE");
> + _testHttpLenient("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32", "UTF-32");
> + _testHttpLenient("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32BE", "UTF-32BE");
> + _testHttpLenient("text/xml;charset=UTF-32", "no-bom", "UTF-32BE", "UTF-32BE", "UTF-32BE");
> + _testHttpLenient("text/xml;charset=UTF-32", "no-bom", "UTF-32BE", null, "UTF-32");
>
> - _testHttpLenient("text/html", "no-bom", "US-ASCII", "US-ASCII",
> - "US-ASCII");
> + _testHttpLenient("text/html", "no-bom", "US-ASCII", "US-ASCII", "US-ASCII");
> _testHttpLenient("text/html", "no-bom", "US-ASCII", null, "US-ASCII");
> - _testHttpLenient("text/html;charset=UTF-8", "no-bom", "US-ASCII",
> - "UTF-8", "UTF-8");
> - _testHttpLenient("text/html;charset=UTF-16BE", "no-bom", "US-ASCII",
> - "UTF-8", "UTF-8");
> + _testHttpLenient("text/html;charset=UTF-8", "no-bom", "US-ASCII", "UTF-8", "UTF-8");
> + _testHttpLenient("text/html;charset=UTF-16BE", "no-bom", "US-ASCII", "UTF-8", "UTF-8");
> + _testHttpLenient("text/html;charset=UTF-32BE", "no-bom", "US-ASCII", "UTF-8", "UTF-8");
> }
>
> @Test
>
> Modified: commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderUtilitiesTest.java
> URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderUtilitiesTest.java?rev=1346400&r1=1346399&r2=1346400&view=diff
> ==============================================================================
> --- commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderUtilitiesTest.java (original)
> +++ commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderUtilitiesTest.java Tue Jun 5 14:48:01 2012
> @@ -24,7 +24,6 @@ import static org.junit.Assert.fail;
> import java.io.ByteArrayInputStream;
> import java.io.IOException;
>
> -import org.junit.Ignore;
> import org.junit.Test;
>
> /**
> @@ -156,12 +155,13 @@ public class XmlStreamReaderUtilitiesTes
>
> /** BOM calculateRawEncoding() Test */
> @Test
> - @Ignore
> + //@Ignore
> public void testCalculateRawEncodingStandardUtf32() throws IOException {
> // Standard BOM Checks BOM Other Default
> + testCalculateRawEncodingStandard("UTF-8", "UTF-32BE", "UTF-32LE");
> testCalculateRawEncodingStandard("UTF-32BE", "UTF-8", "UTF-32LE");
> testCalculateRawEncodingStandard("UTF-32LE", "UTF-8", "UTF-32BE");
> - }
> +}
>
> private void testCalculateRawEncodingStandard(String bomEnc, String otherEnc, String defaultEnc) throws IOException {
> // Expected BOM Guess XMLEnc Default
> @@ -178,7 +178,7 @@ public class XmlStreamReaderUtilitiesTes
>
> /** Additional UTF-16 calculateRawEncoding() Test */
> @Test
> - public void testCalculateRawEncodingAdditonalkUTF16() throws IOException {
> + public void testCalculateRawEncodingAdditonalUTF16() throws IOException {
> // BOM Guess XML Default
> checkRawError(RAWMGS1, "UTF-16BE", "UTF-16", null, null);
> checkRawEncoding("UTF-16BE", "UTF-16BE", null, "UTF-16", null);
> @@ -192,6 +192,22 @@ public class XmlStreamReaderUtilitiesTes
> checkRawError(RAWMGS1, "UTF-16LE", "UTF-16LE", "UTF-16BE", null);
> }
>
> + /** Additional UTF-32 calculateRawEncoding() Test */
> + @Test
> + public void testCalculateRawEncodingAdditonalUTF32() throws IOException {
> + // BOM Guess XML Default
> + checkRawError(RAWMGS1, "UTF-32BE", "UTF-32", null, null);
> + checkRawEncoding("UTF-32BE", "UTF-32BE", null, "UTF-32", null);
> + checkRawEncoding("UTF-32BE", "UTF-32BE", "UTF-32BE", "UTF-32", null);
> + checkRawError(RAWMGS1, "UTF-32BE", null, "UTF-32LE", null);
> + checkRawError(RAWMGS1, "UTF-32BE", "UTF-32BE", "UTF-32LE", null);
> + checkRawError(RAWMGS1, "UTF-32LE", "UTF-32", null, null);
> + checkRawEncoding("UTF-32LE", "UTF-32LE", null, "UTF-32", null);
> + checkRawEncoding("UTF-32LE", "UTF-32LE", "UTF-32LE", "UTF-32", null);
> + checkRawError(RAWMGS1, "UTF-32LE", null, "UTF-32BE", null);
> + checkRawError(RAWMGS1, "UTF-32LE", "UTF-32LE", "UTF-32BE", null);
> + }
> +
> private void checkRawEncoding(String expected,
> String bomEnc, String xmlGuessEnc, String xmlEnc, String defaultEncoding) throws IOException {
> StringBuilder builder = new StringBuilder();
> @@ -207,8 +223,7 @@ public class XmlStreamReaderUtilitiesTes
> protected String calculateRawEncoding(String bomEnc, String xmlGuessEnc, String xmlEnc,
> String defaultEncoding) throws IOException {
> MockXmlStreamReader mock = new MockXmlStreamReader(defaultEncoding);
> - String encoding = mock.calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc);
> - return encoding;
> + return mock.calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc);
> }
>
> private void checkRawError(String msgSuffix,
> @@ -257,7 +272,7 @@ public class XmlStreamReaderUtilitiesTes
>
> /** Test calculate HTTP Encoding */
> @Test
> - @Ignore
> + //@Ignore
> public void testCalculateHttpEncodingUtf32() throws IOException {
> // No BOM Expected Lenient cType BOM Guess XML Default
> checkHttpEncoding("UTF-32LE", true, null, null, null, "UTF-32LE", null);
> @@ -277,7 +292,7 @@ public class XmlStreamReaderUtilitiesTes
> private void checkHttpEncoding(String expected, boolean lenient, String httpContentType,
> String bomEnc, String xmlGuessEnc, String xmlEnc, String defaultEncoding) throws IOException {
> StringBuilder builder = new StringBuilder();
> - builder.append("HttpEncoding: ").append(bomEnc).append("], ");
> + builder.append("HttpEncoding=[").append(bomEnc).append("], ");
> builder.append("lenient=[").append(lenient).append("], ");
> builder.append("httpContentType=[").append(httpContentType).append("], ");
> builder.append("bomEnc=[").append(bomEnc).append("], ");
> @@ -291,8 +306,7 @@ public class XmlStreamReaderUtilitiesTes
> protected String calculateHttpEncoding(String httpContentType, String bomEnc, String xmlGuessEnc,
> String xmlEnc, boolean lenient, String defaultEncoding) throws IOException {
> MockXmlStreamReader mock = new MockXmlStreamReader(defaultEncoding);
> - String encoding = mock.calculateHttpEncoding(httpContentType, bomEnc, xmlGuessEnc, xmlEnc, lenient);
> - return encoding;
> + return mock.calculateHttpEncoding(httpContentType, bomEnc, xmlGuessEnc, xmlEnc, lenient);
> }
>
> private void checkHttpError(String msgSuffix, boolean lenient, String httpContentType,
>
> Modified: commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReader.java
> URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReader.java?rev=1346400&r1=1346399&r2=1346400&view=diff
> ==============================================================================
> --- commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReader.java (original)
> +++ commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReader.java Tue Jun 5 14:48:01 2012
> @@ -74,6 +74,12 @@ public class XmlStreamReader extends Rea
>
> private static final String UTF_16 = "UTF-16";
>
> + private static final String UTF_32BE = "UTF-32BE";
> +
> + private static final String UTF_32LE = "UTF-32LE";
> +
> + private static final String UTF_32 = "UTF-32";
> +
> private static final String EBCDIC = "CP1047";
>
> private static String staticDefaultEncoding = null;
> @@ -447,6 +453,10 @@ public class XmlStreamReader extends Rea
> && (xmlGuessEnc.equals(UTF_16BE) || xmlGuessEnc
> .equals(UTF_16LE))) {
> encoding = xmlGuessEnc;
> + } else if (xmlEnc.equals(UTF_32)
> + && (xmlGuessEnc.equals(UTF_32BE) || xmlGuessEnc
> + .equals(UTF_32LE))) {
> + encoding = xmlGuessEnc;
> } else {
> encoding = xmlEnc;
> }
> @@ -474,6 +484,18 @@ public class XmlStreamReader extends Rea
> bomEnc, xmlGuessEnc, xmlEnc, is);
> }
> encoding = bomEnc;
> + } else if (bomEnc.equals(UTF_32BE) || bomEnc.equals(UTF_32LE)) {
> + if (xmlGuessEnc != null && !xmlGuessEnc.equals(bomEnc)) {
> + throw new XmlStreamReaderException(RAW_EX_1.format(new Object[] { bomEnc,
> + xmlGuessEnc, xmlEnc }), bomEnc, xmlGuessEnc, xmlEnc, is);
> + }
> + if (xmlEnc != null && !xmlEnc.equals(UTF_32)
> + && !xmlEnc.equals(bomEnc)) {
> + throw new XmlStreamReaderException(RAW_EX_1
> + .format(new Object[] { bomEnc, xmlGuessEnc, xmlEnc }),
> + bomEnc, xmlGuessEnc, xmlEnc, is);
> + }
> + encoding = bomEnc;
> } else {
> throw new XmlStreamReaderException(RAW_EX_2.format(new Object[] {
> bomEnc, xmlGuessEnc, xmlEnc }), bomEnc, xmlGuessEnc,
> @@ -516,6 +538,21 @@ public class XmlStreamReader extends Rea
> xmlGuessEnc, xmlEnc }), cTMime, cTEnc,
> bomEnc, xmlGuessEnc, xmlEnc, is);
> }
> + } else if (bomEnc != null
> + && (cTEnc.equals(UTF_32BE) || cTEnc.equals(UTF_32LE))) {
> + throw new XmlStreamReaderException(HTTP_EX_1
> + .format(new Object[] { cTMime, cTEnc, bomEnc,
> + xmlGuessEnc, xmlEnc }), cTMime, cTEnc,
> + bomEnc, xmlGuessEnc, xmlEnc, is);
> + } else if (cTEnc.equals(UTF_32)) {
> + if (bomEnc != null && bomEnc.startsWith(UTF_32)) {
> + encoding = bomEnc;
> + } else {
> + throw new XmlStreamReaderException(HTTP_EX_2
> + .format(new Object[] { cTMime, cTEnc, bomEnc,
> + xmlGuessEnc, xmlEnc }), cTMime, cTEnc,
> + bomEnc, xmlGuessEnc, xmlEnc, is);
> + }
> } else {
> encoding = cTEnc;
> }
>
> Modified: commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReaderUtilitiesCompatibilityTest.java
> URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReaderUtilitiesCompatibilityTest.java?rev=1346400&r1=1346399&r2=1346400&view=diff
> ==============================================================================
> --- commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReaderUtilitiesCompatibilityTest.java (original)
> +++ commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReaderUtilitiesCompatibilityTest.java Tue Jun 5 14:48:01 2012
> @@ -36,11 +36,10 @@ public class XmlStreamReaderUtilitiesCom
> protected String calculateHttpEncoding(String httpContentType, String bomEnc, String xmlGuessEnc,
> String xmlEnc, boolean lenient, String defaultEncoding) throws IOException {
> MockXmlStreamReader mock = new MockXmlStreamReader(defaultEncoding);
> - String encoding = mock.calculateHttpEncoding(
> + return mock.calculateHttpEncoding(
> XmlStreamReader.getContentTypeMime(httpContentType),
> XmlStreamReader.getContentTypeEncoding(httpContentType),
> bomEnc, xmlGuessEnc, xmlEnc, null, lenient);
> - return encoding;
> }
>
> /** Mock {@link XmlStreamReader} implementation */
>
>
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@commons.apache.org
For additional commands, e-mail: dev-help@commons.apache.org