You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ni...@apache.org on 2010/10/04 01:44:20 UTC
svn commit: r1004073 - in /commons/proper/io/trunk/src:
java/org/apache/commons/io/ java/org/apache/commons/io/input/
test/org/apache/commons/io/ test/org/apache/commons/io/input/
Author: niallp
Date: Sun Oct 3 23:44:19 2010
New Revision: 1004073
URL: http://svn.apache.org/viewvc?rev=1004073&view=rev
Log:
IO-178 Extend BOMInputStream from excluding UTF-8 BOMs to detecting and optionally excluding any BOM
- Rename BOMExclusionInputStream to BOMInputStream
- Add new ByteOrderMark implementation
- Enhance BOMInputStream from only processing UTF-8 BOMs to be able to configure for any BOM
- Provide ability to get/check the BOM detected
- Provide facility to either include or exclude the detected BOM
Added:
commons/proper/io/trunk/src/java/org/apache/commons/io/ByteOrderMark.java (with props)
commons/proper/io/trunk/src/java/org/apache/commons/io/input/BOMInputStream.java
- copied, changed from r1003693, commons/proper/io/trunk/src/java/org/apache/commons/io/input/BOMExclusionInputStream.java
commons/proper/io/trunk/src/test/org/apache/commons/io/ByteOrderMarkTestCase.java (with props)
commons/proper/io/trunk/src/test/org/apache/commons/io/input/BOMInputStreamTest.java
- copied, changed from r1003693, commons/proper/io/trunk/src/test/org/apache/commons/io/input/BOMExclusionInputStreamTest.java
Removed:
commons/proper/io/trunk/src/java/org/apache/commons/io/input/BOMExclusionInputStream.java
commons/proper/io/trunk/src/test/org/apache/commons/io/input/BOMExclusionInputStreamTest.java
Added: commons/proper/io/trunk/src/java/org/apache/commons/io/ByteOrderMark.java
URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/java/org/apache/commons/io/ByteOrderMark.java?rev=1004073&view=auto
==============================================================================
--- commons/proper/io/trunk/src/java/org/apache/commons/io/ByteOrderMark.java (added)
+++ commons/proper/io/trunk/src/java/org/apache/commons/io/ByteOrderMark.java Sun Oct 3 23:44:19 2010
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.io;
+
+import java.io.Serializable;
+
+/**
+ * Byte Order Mark (BOM) representation.
+ *
+ * @see org.apache.commons.io.input.BOMInputStream
+ * @see <a href="http://en.wikipedia.org/wiki/Byte_order_mark">Wikipedia - Byte Order Mark<a>
+ * @version $Id$
+ * @Since Commons IO 2.0
+ */
+public class ByteOrderMark implements Serializable {
+
+ private static final long serialVersionUID = 1L;
+
+ /** UTF-8 BOM */
+ public static final ByteOrderMark UTF_8 = new ByteOrderMark("UTF-8", 0xEF, 0xBB, 0xBF);
+ /** UTF-16BE BOM (Big Endian) */
+ public static final ByteOrderMark UTF_16BE = new ByteOrderMark("UTF-16BE", 0xFE, 0xFF);
+ /** UTF-16LE BOM (Little Endian) */
+ public static final ByteOrderMark UTF_16LE = new ByteOrderMark("UTF-16LE", 0xFF, 0xFE);
+
+ private final String charsetName;
+ private final int[] bytes;
+
+ /**
+ * Construct a new BOM.
+ *
+ * @param charsetName The name of the charset the BOM represents
+ * @param bytes The BOM's bytes
+ * @throws IllegalArgumentException if the charsetName is null or
+ * zero length
+ * @throws IllegalArgumentException if the bytes are null or zero
+ * length
+ */
+ public ByteOrderMark(String charsetName, int... bytes) {
+ if (charsetName == null || charsetName.length() == 0) {
+ throw new IllegalArgumentException("No charsetName specified");
+ }
+ if (bytes == null || bytes.length == 0) {
+ throw new IllegalArgumentException("No bytes specified");
+ }
+ this.charsetName = charsetName;
+ this.bytes = new int[bytes.length];
+ System.arraycopy(bytes, 0, this.bytes, 0, bytes.length);
+ }
+
+ /**
+ * Return the name of the {@link java.nio.charset.Charset} the BOM represents.
+ *
+ * @return the character set name
+ */
+ public String getCharsetName() {
+ return charsetName;
+ }
+
+ /**
+ * Return the length of the BOM's bytes.
+ *
+ * @return the length of the BOM's bytes
+ */
+ public int length() {
+ return bytes.length;
+ }
+
+ /**
+ * The byte at the specified position.
+ *
+ * @param pos The position
+ * @return The specified byte
+ */
+ public int get(int pos) {
+ return bytes[pos];
+ }
+
+ /**
+ * Return a copy of the BOM's bytes.
+ *
+ * @return a copy of the BOM's bytes
+ */
+ public byte[] getBytes() {
+ byte[] copy = new byte[bytes.length];
+ for (int i = 0; i < bytes.length; i++) {
+ copy[i] = (byte)bytes[i];
+ }
+ return copy;
+ }
+
+ /**
+ * Indicates if this BOM's bytes equals another.
+ *
+ * @param obj The object to compare to
+ * @return true if the bom's bytes are equal, otherwise
+ * false
+ */
+ @Override
+ public boolean equals(Object obj) {
+ if (!(obj instanceof ByteOrderMark)) {
+ return false;
+ }
+ ByteOrderMark bom = (ByteOrderMark)obj;
+ if (bytes.length != bom.length()) {
+ return false;
+ }
+ for (int i = 0; i < bytes.length; i++) {
+ if (bytes[i] != bom.get(i)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Return the hashcode for this BOM.
+ *
+ * @return the hashcode for this BOM.
+ * @see java.lang.Object#hashCode()
+ */
+ @Override
+ public int hashCode() {
+ int hashCode = getClass().hashCode();
+ for (int b : bytes) {
+ hashCode += b;
+ }
+ return hashCode;
+ }
+
+ /**
+ * Provide a String representation of the BOM.
+ *
+ * @return the length of the BOM's bytes
+ */
+ @Override
+ public String toString() {
+ StringBuilder builder = new StringBuilder();
+ builder.append(getClass().getSimpleName());
+ builder.append('[');
+ builder.append(charsetName);
+ builder.append(": ");
+ for (int i = 0; i < bytes.length; i++) {
+ if (i > 0) {
+ builder.append(",");
+ }
+ builder.append("0x");
+ Integer.toBinaryString(0);
+ Integer.toOctalString(0);
+ Integer.toHexString(0);
+ builder.append(Integer.toHexString(0xFF & bytes[i]).toUpperCase());
+ }
+ builder.append(']');
+ return builder.toString();
+ }
+
+}
Propchange: commons/proper/io/trunk/src/java/org/apache/commons/io/ByteOrderMark.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: commons/proper/io/trunk/src/java/org/apache/commons/io/ByteOrderMark.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Copied: commons/proper/io/trunk/src/java/org/apache/commons/io/input/BOMInputStream.java (from r1003693, commons/proper/io/trunk/src/java/org/apache/commons/io/input/BOMExclusionInputStream.java)
URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/java/org/apache/commons/io/input/BOMInputStream.java?p2=commons/proper/io/trunk/src/java/org/apache/commons/io/input/BOMInputStream.java&p1=commons/proper/io/trunk/src/java/org/apache/commons/io/input/BOMExclusionInputStream.java&r1=1003693&r2=1004073&rev=1004073&view=diff
==============================================================================
--- commons/proper/io/trunk/src/java/org/apache/commons/io/input/BOMExclusionInputStream.java (original)
+++ commons/proper/io/trunk/src/java/org/apache/commons/io/input/BOMInputStream.java Sun Oct 3 23:44:19 2010
@@ -18,33 +18,146 @@ package org.apache.commons.io.input;
import java.io.IOException;
import java.io.InputStream;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.commons.io.ByteOrderMark;
/**
- * This class is used to wrap a UTF8-encoded stream that includes an encoded
- * Byte Order Mark (BOM, 0xFEFF encoded as 0xEF 0xBB 0xBF) as its first bytes.
- * Such streams are produced by various Microsoft applications. This class
- * will automatically skip these bytes and return the subsequent byte as the
- * first byte in the stream.
- * <p>
- * If the first byte in the stream is 0xEF, this class will attempt to read
- * the next two bytes. Results are undefined if the stream does not contain
- * UTF-8 encoded data, as these next two bytes may not exist.
+ * This class is used to wrap a stream that includes an encoded
+ * {@link ByteOrderMark} as its first bytes.
+ *
+ * This class detects these bytes and, if required, can automatically skip these bytes
+ * and return the subsequent byte as the first byte in the stream.
+ *
+ * <h3>Example 1 - Detect and exclude a UTF-8 BOM</h3>
+ * <pre>
+ * BOMInputStream bomIn = new BOMInputStream(in);
+ * if (bomIn.hasBOM()) {
+ * // has a UTF-8 BOM
+ * }
+ * </pre>
+ *
+ * <h3>Example 2 - Detect a UTF-8 BOM (but don't exclude it)</h3>
+ * <pre>
+ * boolean include = true;
+ * BOMInputStream bomIn = new BOMInputStream(in, include);
+ * if (bomIn.hasBOM()) {
+ * // has a UTF-8 BOM
+ * }
+ * </pre>
*
+ * <h3>Example 3 - Multiple BOMs</h3>
+ * <pre>
+ * BOMInputStream bomIn = new BOMInputStream(in, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE);
+ * if (bomIn.hasBOM() == false) {
+ * // No BOM found
+ * } else if (bomIn.hasBOM(ByteOrderMark.UTF_16LE)) {
+ * // has a UTF-16LE BOM
+ * } else if (bomIn.hasBOM(ByteOrderMark.UTF_16BE)) {
+ * // has a UTF-16BE BOM
+ * }
+ * </pre>
+ *
+ * @see org.apache.commons.io.ByteOrderMark
+ * @see <a href="http://en.wikipedia.org/wiki/Byte_order_mark">Wikipedia - Byte Order Mark<a>
* @version $Revision$ $Date$
* @since Commons IO 2.0
*/
-public class BOMExclusionInputStream extends ProxyInputStream {
+public class BOMInputStream extends ProxyInputStream {
+ private final boolean include;
+ private final List<ByteOrderMark> boms;
+ private ByteOrderMark byteOrderMark;
private int[] firstBytes;
private int fbLength;
private int fbIndex;
+ private int markFbIndex;
private boolean markedAtStart;
/**
- * Constructs a new BOM Exclusion InputStream.
+ * Constructs a new BOM InputStream that excludes
+ * a {@link ByteOrderMark#UTF_8} BOM.
* @param delegate the InputStream to delegate to
*/
- public BOMExclusionInputStream(InputStream delegate) {
+ public BOMInputStream(InputStream delegate) {
+ this(delegate, false, ByteOrderMark.UTF_8);
+ }
+
+ /**
+ * Constructs a new BOM InputStream that detects a
+ * a {@link ByteOrderMark#UTF_8} and optionally excludes it.
+ * @param delegate the InputStream to delegate to
+ * @param include true to include the UTF-8 BOM or
+ * false to exclude it
+ */
+ public BOMInputStream(InputStream delegate, boolean include) {
+ this(delegate, include, ByteOrderMark.UTF_8);
+ }
+
+ /**
+ * Constructs a new BOM InputStream that excludes
+ * the specified BOMs.
+ * @param delegate the InputStream to delegate to
+ * @param boms The BOMs to detect and exclude
+ */
+ public BOMInputStream(InputStream delegate, ByteOrderMark... boms) {
+ this(delegate, false, boms);
+ }
+
+ /**
+ * Constructs a new BOM InputStream that detects the
+ * specified BOMs and optionally excludes them.
+ * @param delegate the InputStream to delegate to
+ * @param include true to include the specified BOMs or
+ * false to exclude them
+ * @param boms The BOMs to detect and optionally exclude
+ */
+ public BOMInputStream(InputStream delegate, boolean include, ByteOrderMark... boms) {
super(delegate);
+ if (boms == null || boms.length == 0) {
+ throw new IllegalArgumentException("No BOMs specified");
+ }
+ this.include = include;
+ this.boms = Arrays.asList(boms);
+ }
+
+ /**
+ * Indicates whether the stream contains one of the specified BOMs.
+ *
+ * @return true if the stream has one of the specified BOMs, otherwise false
+ * if it does not
+ * @throws IOException if an error reading the first bytes of the stream occurs
+ */
+ public boolean hasBOM() throws IOException {
+ return (getBOM() != null);
+ }
+
+ /**
+ * Indicates whether the stream contains the specified BOM.
+ *
+ * @param bom The BOM to check for
+ * @return true if the stream has the specified BOM, otherwise false
+ * if it does not
+ * @throws IllegalArgumentException if the BOM is not one the stream
+ * is configured to detect
+ * @throws IOException if an error reading the first bytes of the stream occurs
+ */
+ public boolean hasBOM(ByteOrderMark bom) throws IOException {
+ if (!boms.contains(bom)) {
+ throw new IllegalArgumentException("Stream not configure to detect " + bom);
+ }
+ return (byteOrderMark != null && getBOM().equals(bom));
+ }
+
+ /**
+ * Return the BOM (Byte Order Mark).
+ *
+ * @return The BOM or null if none
+ * @throws IOException if an error reading the first bytes of the stream occurs
+ */
+ public ByteOrderMark getBOM() throws IOException {
+ readFirstBytes();
+ return byteOrderMark;
}
/**
@@ -57,33 +170,70 @@ public class BOMExclusionInputStream ext
*/
private int readFirstBytes() throws IOException {
if (firstBytes == null) {
- firstBytes = new int[3];
- int b0 = in.read();
- if ((b0 < 0) || (b0 != 0xEF)) {
- return b0;
+ int max = 0;
+ for (ByteOrderMark bom : boms) {
+ max = Math.max(max, bom.length());
}
-
- int b1 = in.read();
- int b2 = in.read();
- if ((b1 == 0xBB) && (b2 == 0xBF)) {
- return in.read();
+ firstBytes = new int[max];
+ for (int i = 0; i < firstBytes.length; i++) {
+ firstBytes[i] = in.read();
+ fbLength++;
+ if (firstBytes[i] < 0) {
+ break;
+ }
+
+ byteOrderMark = find();
+ if (byteOrderMark != null) {
+ if (!include) {
+ fbLength = 0;
+ }
+ break;
+ }
}
-
- // if the stream isn't valid UTF-8, this is where things get weird
- firstBytes[fbLength++] = b0;
- firstBytes[fbLength++] = b1;
- firstBytes[fbLength++] = b2;
}
return (fbIndex < fbLength) ? firstBytes[fbIndex++] : -1;
}
+ /**
+ * Find a BOM with the specified bytes.
+ *
+ * @return The matched BOM or null if none matched
+ */
+ private ByteOrderMark find() {
+ for (ByteOrderMark bom : boms) {
+ if (matches(bom)) {
+ return bom;
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Check if the bytes match a BOM.
+ *
+ * @param bom The BOM
+ * @return true if the bytes match the bom, otherwise false
+ */
+ private boolean matches(ByteOrderMark bom) {
+ if (bom.length() != fbLength) {
+ return false;
+ }
+ for (int i = 0; i < bom.length(); i++) {
+ if (bom.get(i) != firstBytes[i]) {
+ return false;
+ }
+ }
+ return true;
+ }
+
//----------------------------------------------------------------------------
// Implementation of InputStream
//----------------------------------------------------------------------------
/**
- * Invokes the delegate's <code>read()</code> method, skipping BOM.
+ * Invokes the delegate's <code>read()</code> method, detecting and
+ * optionally skipping BOM.
* @return the byte read (excluding BOM) or -1 if the end of stream
* @throws IOException if an I/O error occurs
*/
@@ -94,7 +244,8 @@ public class BOMExclusionInputStream ext
}
/**
- * Invokes the delegate's <code>read(byte[], int, int)</code> method, skipping BOM.
+ * Invokes the delegate's <code>read(byte[], int, int)</code> method, detecting
+ * and optionally skipping BOM.
* @param buf the buffer to read the bytes into
* @param off The start offset
* @param len The number of bytes to read (excluding BOM)
@@ -118,7 +269,8 @@ public class BOMExclusionInputStream ext
}
/**
- * Invokes the delegate's <code>read(byte[])</code> method, skipping BOM.
+ * Invokes the delegate's <code>read(byte[])</code> method, detecting and
+ * optionally skipping BOM.
* @param buf the buffer to read the bytes into
* @return the number of bytes read (excluding BOM)
* or -1 if the end of stream
@@ -135,6 +287,7 @@ public class BOMExclusionInputStream ext
*/
@Override
public synchronized void mark(int readlimit) {
+ markFbIndex = fbIndex;
markedAtStart = (firstBytes == null);
in.mark(readlimit);
}
@@ -145,6 +298,7 @@ public class BOMExclusionInputStream ext
*/
@Override
public synchronized void reset() throws IOException {
+ fbIndex = markFbIndex;
if (markedAtStart) {
firstBytes = null;
}
@@ -153,7 +307,8 @@ public class BOMExclusionInputStream ext
}
/**
- * Invokes the delegate's <code>skip(long)</code> method, skipping BOM.
+ * Invokes the delegate's <code>skip(long)</code> method, detecting
+ * and optionallyskipping BOM.
* @param n the number of bytes to skip
* @return the number of bytes to skipped or -1 if the end of stream
* @throws IOException if an I/O error occurs
Added: commons/proper/io/trunk/src/test/org/apache/commons/io/ByteOrderMarkTestCase.java
URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/org/apache/commons/io/ByteOrderMarkTestCase.java?rev=1004073&view=auto
==============================================================================
--- commons/proper/io/trunk/src/test/org/apache/commons/io/ByteOrderMarkTestCase.java (added)
+++ commons/proper/io/trunk/src/test/org/apache/commons/io/ByteOrderMarkTestCase.java Sun Oct 3 23:44:19 2010
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.io;
+
+import java.util.Arrays;
+
+import org.apache.commons.io.testtools.FileBasedTestCase;
+
+
+/**
+ * Test for {@link ByteOrderMark}.
+ *
+ * @version $Id$
+ */
+public class ByteOrderMarkTestCase extends FileBasedTestCase {
+
+ private static final ByteOrderMark TEST_BOM_1 = new ByteOrderMark("test1", 1);
+ private static final ByteOrderMark TEST_BOM_2 = new ByteOrderMark("test2", 1, 2);
+ private static final ByteOrderMark TEST_BOM_3 = new ByteOrderMark("test3", 1, 2, 3);
+
+ public ByteOrderMarkTestCase(String name) {
+ super(name);
+ }
+
+ /** Test {@link ByteOrderMark#getCharsetName()} */
+ public void testCharsetName() {
+ assertEquals("test1 name", "test1", TEST_BOM_1.getCharsetName());
+ assertEquals("test2 name", "test2", TEST_BOM_2.getCharsetName());
+ assertEquals("test3 name", "test3", TEST_BOM_3.getCharsetName());
+ }
+
+ /** Test {@link ByteOrderMark#length()} */
+ public void testLength() {
+ assertEquals("test1 length", 1, TEST_BOM_1.length());
+ assertEquals("test2 length", 2, TEST_BOM_2.length());
+ assertEquals("test3 length", 3, TEST_BOM_3.length());
+ }
+
+ /** Test {@link ByteOrderMark#get(int)} */
+ public void testGet() {
+ assertEquals("test1 get(0)", 1, TEST_BOM_1.get(0));
+ assertEquals("test2 get(0)", 1, TEST_BOM_2.get(0));
+ assertEquals("test2 get(1)", 2, TEST_BOM_2.get(1));
+ assertEquals("test3 get(0)", 1, TEST_BOM_3.get(0));
+ assertEquals("test3 get(1)", 2, TEST_BOM_3.get(1));
+ assertEquals("test3 get(2)", 3, TEST_BOM_3.get(2));
+ }
+
+ /** Test {@link ByteOrderMark#getBytes()} */
+ public void testGetBytes() {
+ assertTrue("test1 bytes", Arrays.equals(TEST_BOM_1.getBytes(), new byte[] {(byte)1}));
+ assertTrue("test1 bytes", Arrays.equals(TEST_BOM_2.getBytes(), new byte[] {(byte)1, (byte)2}));
+ assertTrue("test1 bytes", Arrays.equals(TEST_BOM_3.getBytes(), new byte[] {(byte)1, (byte)2, (byte)3}));
+ }
+
+ /** Test {@link ByteOrderMark#equals(Object)} */
+ public void testEquals() {
+ assertTrue("test1 equals", TEST_BOM_1.equals(TEST_BOM_1));
+ assertTrue("test2 equals", TEST_BOM_2.equals(TEST_BOM_2));
+ assertTrue("test3 equals", TEST_BOM_3.equals(TEST_BOM_3));
+
+ assertFalse("Object not equal", TEST_BOM_1.equals(new Object()));
+ assertFalse("test1-1 not equal", TEST_BOM_1.equals(new ByteOrderMark("1a", 2)));
+ assertFalse("test1-2 not test2", TEST_BOM_1.equals(new ByteOrderMark("1b", 1, 2)));
+ assertFalse("test2 not equal", TEST_BOM_2.equals(new ByteOrderMark("2", 1, 1)));
+ assertFalse("test3 not equal", TEST_BOM_3.equals(new ByteOrderMark("3", 1, 2, 4)));
+ }
+
+ /** Test {@link ByteOrderMark#hashCode()} */
+ public void testHashCode() {
+ int bomClassHash = ByteOrderMark.class.hashCode();
+ assertEquals("hash test1 ", bomClassHash + 1, TEST_BOM_1.hashCode());
+ assertEquals("hash test2 ", bomClassHash + 3, TEST_BOM_2.hashCode());
+ assertEquals("hash test3 ", bomClassHash + 6, TEST_BOM_3.hashCode());
+ }
+
+ /** Test Erros */
+ public void testErrors() {
+ try {
+ new ByteOrderMark(null, 1,2,3);
+ fail("null charset name, expected IllegalArgumentException");
+ } catch (IllegalArgumentException e) {
+ // expected
+ }
+ try {
+ new ByteOrderMark("", 1,2,3);
+ fail("no charset name, expected IllegalArgumentException");
+ } catch (IllegalArgumentException e) {
+ // expected
+ }
+ try {
+ new ByteOrderMark("a", (int[])null);
+ fail("null bytes, expected IllegalArgumentException");
+ } catch (IllegalArgumentException e) {
+ // expected
+ }
+ try {
+ new ByteOrderMark("b", new int[0]);
+ fail("empty bytes, expected IllegalArgumentException");
+ } catch (IllegalArgumentException e) {
+ // expected
+ }
+ }
+
+ /** Test {@link ByteOrderMark#toString()} */
+ public void testToString() {
+ assertEquals("test1 ", "ByteOrderMark[test1: 0x1]", TEST_BOM_1.toString());
+ assertEquals("test2 ", "ByteOrderMark[test2: 0x1,0x2]", TEST_BOM_2.toString());
+ assertEquals("test3 ", "ByteOrderMark[test3: 0x1,0x2,0x3]", TEST_BOM_3.toString());
+ }
+}
Propchange: commons/proper/io/trunk/src/test/org/apache/commons/io/ByteOrderMarkTestCase.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: commons/proper/io/trunk/src/test/org/apache/commons/io/ByteOrderMarkTestCase.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Copied: commons/proper/io/trunk/src/test/org/apache/commons/io/input/BOMInputStreamTest.java (from r1003693, commons/proper/io/trunk/src/test/org/apache/commons/io/input/BOMExclusionInputStreamTest.java)
URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/org/apache/commons/io/input/BOMInputStreamTest.java?p2=commons/proper/io/trunk/src/test/org/apache/commons/io/input/BOMInputStreamTest.java&p1=commons/proper/io/trunk/src/test/org/apache/commons/io/input/BOMExclusionInputStreamTest.java&r1=1003693&r2=1004073&rev=1004073&view=diff
==============================================================================
--- commons/proper/io/trunk/src/test/org/apache/commons/io/input/BOMExclusionInputStreamTest.java (original)
+++ commons/proper/io/trunk/src/test/org/apache/commons/io/input/BOMInputStreamTest.java Sun Oct 3 23:44:19 2010
@@ -20,9 +20,16 @@ import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
+import org.apache.commons.io.ByteOrderMark;
+
import junit.framework.TestCase;
-public class BOMExclusionInputStreamTest extends TestCase {
+/**
+ * Test case for {@link BOMInputStream}.
+ *
+ * @version $Id$
+ */
+public class BOMInputStreamTest extends TestCase {
//----------------------------------------------------------------------------
// Support code
//----------------------------------------------------------------------------
@@ -94,39 +101,111 @@ public class BOMExclusionInputStreamTest
public void testReadWithoutBOM() throws Exception {
byte[] data = new byte[] { 'A', 'B', 'C' };
- InputStream in = new BOMExclusionInputStream(createDataStream(data, false));
+ BOMInputStream in = new BOMInputStream(createDataStream(data, false));
assertEquals('A', in.read());
assertEquals('B', in.read());
assertEquals('C', in.read());
assertEquals(-1, in.read());
+ assertFalse("hasBOM()", in.hasBOM());
+ assertFalse("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
+ assertNull("getBOM", in.getBOM());
+ }
+
+ public void testReadEmpty() throws Exception {
+ byte[] data = new byte[] {};
+ BOMInputStream in = new BOMInputStream(createDataStream(data, false));
+ assertEquals(-1, in.read());
+ assertFalse("hasBOM()", in.hasBOM());
+ assertFalse("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
+ assertNull("getBOM", in.getBOM());
+ }
+
+ public void testReadSmall() throws Exception {
+ byte[] data = new byte[] { 'A', 'B' };
+ BOMInputStream in = new BOMInputStream(createDataStream(data, false));
+ assertEquals('A', in.read());
+ assertEquals('B', in.read());
+ assertEquals(-1, in.read());
+ assertFalse("hasBOM()", in.hasBOM());
+ assertFalse("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
+ assertNull("getBOM", in.getBOM());
}
public void testReadWithBOM() throws Exception {
byte[] data = new byte[] { 'A', 'B', 'C' };
- InputStream in = new BOMExclusionInputStream(createDataStream(data, true));
+ BOMInputStream in = new BOMInputStream(createDataStream(data, true));
assertEquals('A', in.read());
assertEquals('B', in.read());
assertEquals('C', in.read());
assertEquals(-1, in.read());
+ assertTrue("hasBOM()", in.hasBOM());
+ assertTrue("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
+ assertEquals("getBOM", ByteOrderMark.UTF_8, in.getBOM());
+ try {
+ in.hasBOM(ByteOrderMark.UTF_16BE);
+ } catch (IllegalArgumentException e) {
+ // expected - not configured for UTF-16BE
+ }
+ }
+
+ public void testGetBOMFirstThenRead() throws Exception {
+ byte[] data = new byte[] { 'A', 'B', 'C' };
+ BOMInputStream in = new BOMInputStream(createDataStream(data, true));
+ assertEquals("getBOM", ByteOrderMark.UTF_8, in.getBOM());
+ assertTrue("hasBOM()", in.hasBOM());
+ assertTrue("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
+ assertEquals('A', in.read());
+ assertEquals('B', in.read());
+ assertEquals('C', in.read());
+ assertEquals(-1, in.read());
+ }
+
+ public void testReadWithBOMInclude() throws Exception {
+ byte[] data = new byte[] { 'A', 'B', 'C' };
+ BOMInputStream in = new BOMInputStream(createDataStream(data, true), true);
+ assertEquals(0xEF, in.read());
+ assertEquals(0xBB, in.read());
+ assertEquals(0xBF, in.read());
+ assertEquals('A', in.read());
+ assertEquals('B', in.read());
+ assertEquals('C', in.read());
+ assertEquals(-1, in.read());
+ assertTrue("hasBOM()", in.hasBOM());
+ assertTrue("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
+ assertEquals("getBOM", ByteOrderMark.UTF_8, in.getBOM());
+ }
+
+ public void testReadWithMultipleBOM() throws Exception {
+ byte[] data = new byte[] { 'A', 'B', 'C' };
+ BOMInputStream in = new BOMInputStream(createDataStream(data, true),
+ ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_8);
+ assertEquals('A', in.read());
+ assertEquals('B', in.read());
+ assertEquals('C', in.read());
+ assertEquals(-1, in.read());
+ assertTrue("hasBOM()", in.hasBOM());
+ assertTrue("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
+ assertFalse("hasBOM(UTF-16BE)", in.hasBOM(ByteOrderMark.UTF_16BE));
+ assertEquals("getBOM", ByteOrderMark.UTF_8, in.getBOM());
}
public void testLargeBufferWithoutBOM() throws Exception {
byte[] data = new byte[] { 'A', 'B', 'C' };
- InputStream in = new BOMExclusionInputStream(createDataStream(data, false));
+ InputStream in = new BOMInputStream(createDataStream(data, false));
byte[] buf = new byte[1024];
assertData(data, buf, 0, in.read(buf));
}
public void testLargeBufferWithBOM() throws Exception {
byte[] data = new byte[] { 'A', 'B', 'C' };
- InputStream in = new BOMExclusionInputStream(createDataStream(data, true));
+ InputStream in = new BOMInputStream(createDataStream(data, true));
byte[] buf = new byte[1024];
assertData(data, buf, 0, in.read(buf));
}
public void testSmallBufferWithoutBOM() throws Exception {
byte[] data = new byte[] { 'A', 'B', 'C' };
- InputStream in = new BOMExclusionInputStream(createDataStream(data, false));
+ InputStream in = new BOMInputStream(createDataStream(data, false));
byte[] buf = new byte[1024];
assertData(new byte[] { 'A', 'B' }, buf, 0, in.read(buf, 0, 2));
assertData(new byte[] { 'C' }, buf, 0, in.read(buf, 0, 2));
@@ -134,7 +213,7 @@ public class BOMExclusionInputStreamTest
public void testSmallBufferWithBOM() throws Exception {
byte[] data = new byte[] { 'A', 'B', 'C' };
- InputStream in = new BOMExclusionInputStream(createDataStream(data, true));
+ InputStream in = new BOMInputStream(createDataStream(data, true));
byte[] buf = new byte[1024];
assertData(new byte[] { 'A', 'B' }, buf, 0, in.read(buf, 0, 2));
assertData(new byte[] { 'C' }, buf, 0, in.read(buf, 0, 2));
@@ -142,7 +221,7 @@ public class BOMExclusionInputStreamTest
public void testLeadingNonBOMSingleRead() throws Exception {
byte[] data = new byte[] { (byte) 0xEF, (byte) 0xAB, (byte) 0xCD };
- InputStream in = new BOMExclusionInputStream(createDataStream(data, false));
+ InputStream in = new BOMInputStream(createDataStream(data, false));
assertEquals(0xEF, in.read());
assertEquals(0xAB, in.read());
assertEquals(0xCD, in.read());
@@ -151,28 +230,28 @@ public class BOMExclusionInputStreamTest
public void testLeadingNonBOMBufferedRead() throws Exception {
byte[] data = new byte[] { (byte) 0xEF, (byte) 0xAB, (byte) 0xCD };
- InputStream in = new BOMExclusionInputStream(createDataStream(data, false));
+ InputStream in = new BOMInputStream(createDataStream(data, false));
byte[] buf = new byte[1024];
assertData(data, buf, 0, in.read(buf));
}
public void testSkipWithoutBOM() throws Exception {
byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
- InputStream in = new BOMExclusionInputStream(createDataStream(data, false));
+ InputStream in = new BOMInputStream(createDataStream(data, false));
in.skip(2L);
assertEquals('C', in.read());
}
public void testSkipWithBOM() throws Exception {
byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
- InputStream in = new BOMExclusionInputStream(createDataStream(data, true));
+ InputStream in = new BOMInputStream(createDataStream(data, true));
in.skip(2L);
assertEquals('C', in.read());
}
public void testMarkResetAfterReadWithoutBOM() throws Exception {
byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
- InputStream in = new BOMExclusionInputStream(createDataStream(data, false));
+ InputStream in = new BOMInputStream(createDataStream(data, false));
assertTrue(in.markSupported());
in.read();
@@ -186,7 +265,7 @@ public class BOMExclusionInputStreamTest
public void testMarkResetAfterReadWithBOM() throws Exception {
byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
- InputStream in = new BOMExclusionInputStream(createDataStream(data, true));
+ InputStream in = new BOMInputStream(createDataStream(data, true));
assertTrue(in.markSupported());
in.read();
@@ -200,7 +279,7 @@ public class BOMExclusionInputStreamTest
public void testMarkResetBeforeReadWithoutBOM() throws Exception {
byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
- InputStream in = new BOMExclusionInputStream(createDataStream(data, false));
+ InputStream in = new BOMInputStream(createDataStream(data, false));
assertTrue(in.markSupported());
in.mark(10);
@@ -213,7 +292,7 @@ public class BOMExclusionInputStreamTest
public void testMarkResetBeforeReadWithBOM() throws Exception {
byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
- InputStream in = new BOMExclusionInputStream(createDataStream(data, true));
+ InputStream in = new BOMInputStream(createDataStream(data, true));
assertTrue(in.markSupported());
in.mark(10);
@@ -226,20 +305,36 @@ public class BOMExclusionInputStreamTest
public void testAvailableWithoutBOM() throws Exception {
byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
- InputStream in = new BOMExclusionInputStream(createDataStream(data, false));
+ InputStream in = new BOMInputStream(createDataStream(data, false));
assertEquals(4, in.available());
}
public void testAvailableWithBOM() throws Exception {
byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
- InputStream in = new BOMExclusionInputStream(createDataStream(data, true));
+ InputStream in = new BOMInputStream(createDataStream(data, true));
assertEquals(7, in.available());
}
+ public void testNoBoms() throws Exception {
+ byte[] data = new byte[] { 'A', 'B', 'C' };
+ try {
+ new BOMInputStream(createDataStream(data, true), false, (ByteOrderMark[])null);
+ fail("Null BOMs, expected IllegalArgumentException");
+ } catch (IllegalArgumentException e) {
+ // expected
+ }
+ try {
+ new BOMInputStream(createDataStream(data, true), false, new ByteOrderMark[0]);
+ fail("Null BOMs, expected IllegalArgumentException");
+ } catch (IllegalArgumentException e) {
+ // expected
+ }
+ }
+
// this is here for coverage
public void testClose() throws Exception {
ExpectCloseInputStream del = new ExpectCloseInputStream();
- InputStream in = new BOMExclusionInputStream(del);
+ InputStream in = new BOMInputStream(del);
in.close();
del.assertCloseCalled();