You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ni...@apache.org on 2010/10/04 01:44:20 UTC

svn commit: r1004073 - in /commons/proper/io/trunk/src: java/org/apache/commons/io/ java/org/apache/commons/io/input/ test/org/apache/commons/io/ test/org/apache/commons/io/input/

Author: niallp
Date: Sun Oct  3 23:44:19 2010
New Revision: 1004073

URL: http://svn.apache.org/viewvc?rev=1004073&view=rev
Log:
IO-178 Extend BOMInputStream from excluding UTF-8 BOMs to detecting and optionally excluding any BOM
  - Rename BOMExclusionInputStream to BOMInputStream
  - Add new ByteOrderMark implementation
  - Enhance BOMInputStream from only processing UTF-8 BOMs to be able to configure for any BOM
  - Provide ability to get/check the BOM detected
  - Provide facility to either include or exclude the detected BOM

Added:
    commons/proper/io/trunk/src/java/org/apache/commons/io/ByteOrderMark.java   (with props)
    commons/proper/io/trunk/src/java/org/apache/commons/io/input/BOMInputStream.java
      - copied, changed from r1003693, commons/proper/io/trunk/src/java/org/apache/commons/io/input/BOMExclusionInputStream.java
    commons/proper/io/trunk/src/test/org/apache/commons/io/ByteOrderMarkTestCase.java   (with props)
    commons/proper/io/trunk/src/test/org/apache/commons/io/input/BOMInputStreamTest.java
      - copied, changed from r1003693, commons/proper/io/trunk/src/test/org/apache/commons/io/input/BOMExclusionInputStreamTest.java
Removed:
    commons/proper/io/trunk/src/java/org/apache/commons/io/input/BOMExclusionInputStream.java
    commons/proper/io/trunk/src/test/org/apache/commons/io/input/BOMExclusionInputStreamTest.java

Added: commons/proper/io/trunk/src/java/org/apache/commons/io/ByteOrderMark.java
URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/java/org/apache/commons/io/ByteOrderMark.java?rev=1004073&view=auto
==============================================================================
--- commons/proper/io/trunk/src/java/org/apache/commons/io/ByteOrderMark.java (added)
+++ commons/proper/io/trunk/src/java/org/apache/commons/io/ByteOrderMark.java Sun Oct  3 23:44:19 2010
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.io;
+
+import java.io.Serializable;
+
+/**
+ * Byte Order Mark (BOM) representation.
+ *
+ * @see org.apache.commons.io.input.BOMInputStream
+ * @see <a href="http://en.wikipedia.org/wiki/Byte_order_mark">Wikipedia - Byte Order Mark<a>
+ * @version $Id$
+ * @Since Commons IO 2.0
+ */
+public class ByteOrderMark implements Serializable {
+
+    private static final long serialVersionUID = 1L;
+
+    /** UTF-8 BOM */
+    public static final ByteOrderMark UTF_8    = new ByteOrderMark("UTF-8",    0xEF, 0xBB, 0xBF);
+    /** UTF-16BE BOM (Big Endian) */
+    public static final ByteOrderMark UTF_16BE = new ByteOrderMark("UTF-16BE", 0xFE, 0xFF);
+    /** UTF-16LE BOM (Little Endian) */
+    public static final ByteOrderMark UTF_16LE = new ByteOrderMark("UTF-16LE", 0xFF, 0xFE);
+
+    private final String charsetName;
+    private final int[] bytes;
+
+    /**
+     * Construct a new BOM.
+     *
+     * @param charsetName The name of the charset the BOM represents
+     * @param bytes The BOM's bytes
+     * @throws IllegalArgumentException if the charsetName is null or
+     * zero length
+     * @throws IllegalArgumentException if the bytes are null or zero
+     * length
+     */
+    public ByteOrderMark(String charsetName, int... bytes) {
+        if (charsetName == null || charsetName.length() == 0) {
+            throw new IllegalArgumentException("No charsetName specified");
+        }
+        if (bytes == null || bytes.length == 0) {
+            throw new IllegalArgumentException("No bytes specified");
+        }
+        this.charsetName = charsetName;
+        this.bytes = new int[bytes.length];
+        System.arraycopy(bytes, 0, this.bytes, 0, bytes.length);
+    }
+
+    /**
+     * Return the name of the {@link java.nio.charset.Charset} the BOM represents.
+     *
+     * @return the character set name
+     */
+    public String getCharsetName() {
+        return charsetName;
+    }
+
+    /**
+     * Return the length of the BOM's bytes.
+     *
+     * @return the length of the BOM's bytes
+     */
+    public int length() {
+        return bytes.length;
+    }
+
+    /**
+     * The byte at the specified position.
+     *
+     * @param pos The position
+     * @return The specified byte
+     */
+    public int get(int pos) {
+        return bytes[pos];
+    }
+
+    /**
+     * Return a copy of the BOM's bytes.
+     *
+     * @return a copy of the BOM's bytes
+     */
+    public byte[] getBytes() {
+        byte[] copy = new byte[bytes.length];
+        for (int i = 0; i < bytes.length; i++) {
+            copy[i] = (byte)bytes[i];
+        }
+        return copy;
+    }
+
+    /**
+     * Indicates if this BOM's bytes equals another.
+     *
+     * @param obj The object to compare to
+     * @return true if the bom's bytes are equal, otherwise
+     * false
+     */
+    @Override
+    public boolean equals(Object obj) {
+        if (!(obj instanceof ByteOrderMark)) {
+            return false;
+        }
+        ByteOrderMark bom = (ByteOrderMark)obj;
+        if (bytes.length != bom.length()) {
+            return false;
+        }
+        for (int i = 0; i < bytes.length; i++) {
+            if (bytes[i] != bom.get(i)) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    /**
+     * Return the hashcode for this BOM.
+     *
+     * @return the hashcode for this BOM.
+     * @see java.lang.Object#hashCode()
+     */
+    @Override
+    public int hashCode() {
+        int hashCode = getClass().hashCode();
+        for (int b : bytes) {
+            hashCode += b;
+        }
+        return hashCode;
+    }
+
+    /**
+     * Provide a String representation of the BOM.
+     *
+     * @return the length of the BOM's bytes
+     */
+    @Override
+    public String toString() {
+        StringBuilder builder = new StringBuilder();
+        builder.append(getClass().getSimpleName());
+        builder.append('[');
+        builder.append(charsetName);
+        builder.append(": ");
+        for (int i = 0; i < bytes.length; i++) {
+            if (i > 0) {
+                builder.append(",");
+            }
+            builder.append("0x");
+            Integer.toBinaryString(0);
+            Integer.toOctalString(0);
+            Integer.toHexString(0);
+            builder.append(Integer.toHexString(0xFF & bytes[i]).toUpperCase());
+        }
+        builder.append(']');
+        return builder.toString();
+    }
+
+}

Propchange: commons/proper/io/trunk/src/java/org/apache/commons/io/ByteOrderMark.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: commons/proper/io/trunk/src/java/org/apache/commons/io/ByteOrderMark.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Copied: commons/proper/io/trunk/src/java/org/apache/commons/io/input/BOMInputStream.java (from r1003693, commons/proper/io/trunk/src/java/org/apache/commons/io/input/BOMExclusionInputStream.java)
URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/java/org/apache/commons/io/input/BOMInputStream.java?p2=commons/proper/io/trunk/src/java/org/apache/commons/io/input/BOMInputStream.java&p1=commons/proper/io/trunk/src/java/org/apache/commons/io/input/BOMExclusionInputStream.java&r1=1003693&r2=1004073&rev=1004073&view=diff
==============================================================================
--- commons/proper/io/trunk/src/java/org/apache/commons/io/input/BOMExclusionInputStream.java (original)
+++ commons/proper/io/trunk/src/java/org/apache/commons/io/input/BOMInputStream.java Sun Oct  3 23:44:19 2010
@@ -18,33 +18,146 @@ package org.apache.commons.io.input;
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.commons.io.ByteOrderMark;
 
 /**
- *  This class is used to wrap a UTF8-encoded stream that includes an encoded
- *  Byte Order Mark (BOM, 0xFEFF encoded as 0xEF 0xBB 0xBF) as its first bytes.
- *  Such streams are produced by various Microsoft applications. This class
- *  will automatically skip these bytes and return the subsequent byte as the
- *  first byte in the stream.
- *  <p>
- *  If the first byte in the stream is 0xEF, this class will attempt to read
- *  the next two bytes. Results are undefined if the stream does not contain
- *  UTF-8 encoded data, as these next two bytes may not exist.
+ * This class is used to wrap a stream that includes an encoded
+ * {@link ByteOrderMark} as its first bytes.
+ *
+ * This class detects these bytes and, if required, can automatically skip these bytes
+ *  and return the subsequent byte as the first byte in the stream.
+ *
+ * <h3>Example 1 - Detect and exclude a UTF-8 BOM</h3>
+ * <pre>
+ *      BOMInputStream bomIn = new BOMInputStream(in);
+ *      if (bomIn.hasBOM()) {
+ *          // has a UTF-8 BOM
+ *      }
+ * </pre>
+ *
+ * <h3>Example 2 - Detect a UTF-8 BOM (but don't exclude it)</h3>
+ * <pre>
+ *      boolean include = true;
+ *      BOMInputStream bomIn = new BOMInputStream(in, include);
+ *      if (bomIn.hasBOM()) {
+ *          // has a UTF-8 BOM
+ *      }
+ * </pre>
  *
+ * <h3>Example 3 - Multiple BOMs</h3>
+ * <pre>
+ *      BOMInputStream bomIn = new BOMInputStream(in, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE);
+ *      if (bomIn.hasBOM() == false) {
+ *          // No BOM found
+ *      } else if (bomIn.hasBOM(ByteOrderMark.UTF_16LE)) {
+ *          // has a UTF-16LE BOM
+ *      } else if (bomIn.hasBOM(ByteOrderMark.UTF_16BE)) {
+ *          // has a UTF-16BE BOM
+ *      }
+ * </pre>
+ *
+ * @see org.apache.commons.io.ByteOrderMark
+ * @see <a href="http://en.wikipedia.org/wiki/Byte_order_mark">Wikipedia - Byte Order Mark<a>
  * @version $Revision$ $Date$
  * @since Commons IO 2.0
  */
-public class BOMExclusionInputStream extends ProxyInputStream {
+public class BOMInputStream extends ProxyInputStream {
+    private final boolean include;
+    private final List<ByteOrderMark> boms;
+    private ByteOrderMark byteOrderMark;
     private int[] firstBytes;
     private int fbLength;
     private int fbIndex;
+    private int markFbIndex;
     private boolean markedAtStart;
 
     /**
-     * Constructs a new BOM Exclusion InputStream.
+     * Constructs a new BOM InputStream that excludes
+     * a {@link ByteOrderMark#UTF_8} BOM.
      * @param delegate the InputStream to delegate to
      */
-    public BOMExclusionInputStream(InputStream delegate) {
+    public BOMInputStream(InputStream delegate) {
+        this(delegate, false, ByteOrderMark.UTF_8);
+    }
+
+    /**
+     * Constructs a new BOM InputStream that detects a
+     * a {@link ByteOrderMark#UTF_8} and optionally excludes it.
+     * @param delegate the InputStream to delegate to
+     * @param include true to include the UTF-8 BOM or
+     * false to exclude it
+     */
+    public BOMInputStream(InputStream delegate, boolean include) {
+        this(delegate, include, ByteOrderMark.UTF_8);
+    }
+
+    /**
+     * Constructs a new BOM InputStream that excludes
+     * the specified BOMs.
+     * @param delegate the InputStream to delegate to
+     * @param boms The BOMs to detect and exclude
+     */
+    public BOMInputStream(InputStream delegate, ByteOrderMark... boms) {
+        this(delegate, false, boms);
+    }
+
+    /**
+     * Constructs a new BOM InputStream that detects the
+     * specified BOMs and optionally excludes them.
+     * @param delegate the InputStream to delegate to
+     * @param include true to include the specified BOMs or
+     * false to exclude them
+     * @param boms The BOMs to detect and optionally exclude
+     */
+    public BOMInputStream(InputStream delegate, boolean include, ByteOrderMark... boms) {
         super(delegate);
+        if (boms == null || boms.length == 0) {
+            throw new IllegalArgumentException("No BOMs specified");
+        }
+        this.include = include;
+        this.boms = Arrays.asList(boms);
+    }
+
+    /**
+     * Indicates whether the stream contains one of the specified BOMs.
+     *
+     * @return true if the stream has one of the specified BOMs, otherwise false
+     * if it does not
+     * @throws IOException if an error reading the first bytes of the stream occurs
+     */
+    public boolean hasBOM() throws IOException {
+        return (getBOM() != null);
+    }
+
+    /**
+     * Indicates whether the stream contains the specified BOM.
+     *
+     * @param bom The BOM to check for
+     * @return true if the stream has the specified BOM, otherwise false
+     * if it does not
+     * @throws IllegalArgumentException if the BOM is not one the stream
+     * is configured to detect
+     * @throws IOException if an error reading the first bytes of the stream occurs
+     */
+    public boolean hasBOM(ByteOrderMark bom) throws IOException {
+        if (!boms.contains(bom)) {
+            throw new IllegalArgumentException("Stream not configure to detect " + bom);
+        }
+        return (byteOrderMark != null && getBOM().equals(bom));
+    }
+
+    /**
+     * Return the BOM (Byte Order Mark).
+     *
+     * @return The BOM or null if none
+     * @throws IOException if an error reading the first bytes of the stream occurs
+     */
+    public ByteOrderMark getBOM() throws IOException {
+        readFirstBytes();
+        return byteOrderMark;
     }
 
     /**
@@ -57,33 +170,70 @@ public class BOMExclusionInputStream ext
      */
     private int readFirstBytes() throws IOException {
         if (firstBytes == null) {
-            firstBytes = new int[3];
-            int b0 = in.read();
-            if ((b0 < 0) || (b0 != 0xEF)) {
-                return b0;
+            int max = 0;
+            for (ByteOrderMark bom : boms) {
+                max = Math.max(max, bom.length());
             }
-
-            int b1 = in.read();
-            int b2 = in.read();
-            if ((b1 == 0xBB) && (b2 == 0xBF)) {
-                return in.read();
+            firstBytes = new int[max];
+            for (int i = 0; i < firstBytes.length; i++) {
+                firstBytes[i] = in.read();
+                fbLength++;
+                if (firstBytes[i] < 0) {
+                    break;
+                }
+
+                byteOrderMark = find();
+                if (byteOrderMark != null) {
+                    if (!include) {
+                        fbLength = 0;
+                    }
+                    break;
+                }
             }
-
-            // if the stream isn't valid UTF-8, this is where things get weird
-            firstBytes[fbLength++] = b0;
-            firstBytes[fbLength++] = b1;
-            firstBytes[fbLength++] = b2;
         }
 
         return (fbIndex < fbLength) ? firstBytes[fbIndex++] : -1;
     }
 
+    /**
+     * Find a BOM with the specified bytes.
+     *
+     * @return The matched BOM or null if none matched
+     */
+    private ByteOrderMark find() {
+        for (ByteOrderMark bom : boms) {
+            if (matches(bom)) {
+                return bom;
+            }
+        }
+        return null;
+    }
+
+    /**
+     * Check if the bytes match a BOM.
+     *
+     * @param bom The BOM
+     * @return true if the bytes match the bom, otherwise false
+     */
+    private boolean matches(ByteOrderMark bom) {
+        if (bom.length() != fbLength) {
+            return false;
+        }
+        for (int i = 0; i < bom.length(); i++) {
+            if (bom.get(i) != firstBytes[i]) {
+                return false;
+            }
+        }
+        return true;
+    }
+
     //----------------------------------------------------------------------------
     //  Implementation of InputStream
     //----------------------------------------------------------------------------
 
     /**
-     * Invokes the delegate's <code>read()</code> method, skipping BOM.
+     * Invokes the delegate's <code>read()</code> method, detecting and
+     * optionally skipping BOM.
      * @return the byte read (excluding BOM) or -1 if the end of stream
      * @throws IOException if an I/O error occurs
      */
@@ -94,7 +244,8 @@ public class BOMExclusionInputStream ext
     }
 
     /**
-     * Invokes the delegate's <code>read(byte[], int, int)</code> method, skipping BOM.
+     * Invokes the delegate's <code>read(byte[], int, int)</code> method, detecting
+     * and optionally skipping BOM.
      * @param buf the buffer to read the bytes into
      * @param off The start offset
      * @param len The number of bytes to read (excluding BOM)
@@ -118,7 +269,8 @@ public class BOMExclusionInputStream ext
     }
 
     /**
-     * Invokes the delegate's <code>read(byte[])</code> method, skipping BOM.
+     * Invokes the delegate's <code>read(byte[])</code> method, detecting and
+     * optionally skipping BOM.
      * @param buf the buffer to read the bytes into
      * @return the number of bytes read (excluding BOM)
      * or -1 if the end of stream
@@ -135,6 +287,7 @@ public class BOMExclusionInputStream ext
      */
     @Override
     public synchronized void mark(int readlimit) {
+        markFbIndex = fbIndex;
         markedAtStart = (firstBytes == null);
         in.mark(readlimit);
     }
@@ -145,6 +298,7 @@ public class BOMExclusionInputStream ext
      */
     @Override
     public synchronized void reset() throws IOException {
+        fbIndex = markFbIndex;
         if (markedAtStart) {
             firstBytes = null;
         }
@@ -153,7 +307,8 @@ public class BOMExclusionInputStream ext
     }
 
     /**
-     * Invokes the delegate's <code>skip(long)</code> method, skipping BOM.
+     * Invokes the delegate's <code>skip(long)</code> method, detecting
+     * and optionallyskipping BOM.
      * @param n the number of bytes to skip
      * @return the number of bytes to skipped or -1 if the end of stream
      * @throws IOException if an I/O error occurs

Added: commons/proper/io/trunk/src/test/org/apache/commons/io/ByteOrderMarkTestCase.java
URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/org/apache/commons/io/ByteOrderMarkTestCase.java?rev=1004073&view=auto
==============================================================================
--- commons/proper/io/trunk/src/test/org/apache/commons/io/ByteOrderMarkTestCase.java (added)
+++ commons/proper/io/trunk/src/test/org/apache/commons/io/ByteOrderMarkTestCase.java Sun Oct  3 23:44:19 2010
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.io;
+
+import java.util.Arrays;
+
+import org.apache.commons.io.testtools.FileBasedTestCase;
+
+
+/**
+ * Test for {@link ByteOrderMark}.
+ *
+ * @version $Id$
+ */
+public class ByteOrderMarkTestCase extends FileBasedTestCase {
+
+    private static final ByteOrderMark TEST_BOM_1 = new ByteOrderMark("test1", 1);
+    private static final ByteOrderMark TEST_BOM_2 = new ByteOrderMark("test2", 1, 2);
+    private static final ByteOrderMark TEST_BOM_3 = new ByteOrderMark("test3", 1, 2, 3);
+
+    public ByteOrderMarkTestCase(String name) {
+        super(name);
+    }
+
+    /** Test {@link ByteOrderMark#getCharsetName()} */
+    public void testCharsetName() {
+        assertEquals("test1 name", "test1", TEST_BOM_1.getCharsetName());
+        assertEquals("test2 name", "test2", TEST_BOM_2.getCharsetName());
+        assertEquals("test3 name", "test3", TEST_BOM_3.getCharsetName());
+    }
+
+    /** Test {@link ByteOrderMark#length()} */
+    public void testLength() {
+        assertEquals("test1 length", 1, TEST_BOM_1.length());
+        assertEquals("test2 length", 2, TEST_BOM_2.length());
+        assertEquals("test3 length", 3, TEST_BOM_3.length());
+    }
+
+    /** Test {@link ByteOrderMark#get(int)} */
+    public void testGet() {
+        assertEquals("test1 get(0)", 1, TEST_BOM_1.get(0));
+        assertEquals("test2 get(0)", 1, TEST_BOM_2.get(0));
+        assertEquals("test2 get(1)", 2, TEST_BOM_2.get(1));
+        assertEquals("test3 get(0)", 1, TEST_BOM_3.get(0));
+        assertEquals("test3 get(1)", 2, TEST_BOM_3.get(1));
+        assertEquals("test3 get(2)", 3, TEST_BOM_3.get(2));
+    }
+
+    /** Test {@link ByteOrderMark#getBytes()} */
+    public void testGetBytes() {
+        assertTrue("test1 bytes", Arrays.equals(TEST_BOM_1.getBytes(), new byte[] {(byte)1}));
+        assertTrue("test1 bytes", Arrays.equals(TEST_BOM_2.getBytes(), new byte[] {(byte)1, (byte)2}));
+        assertTrue("test1 bytes", Arrays.equals(TEST_BOM_3.getBytes(), new byte[] {(byte)1, (byte)2, (byte)3}));
+    }
+
+    /** Test {@link ByteOrderMark#equals(Object)} */
+    public void testEquals() {
+        assertTrue("test1 equals", TEST_BOM_1.equals(TEST_BOM_1));
+        assertTrue("test2 equals", TEST_BOM_2.equals(TEST_BOM_2));
+        assertTrue("test3 equals", TEST_BOM_3.equals(TEST_BOM_3));
+
+        assertFalse("Object not equal",  TEST_BOM_1.equals(new Object()));
+        assertFalse("test1-1 not equal", TEST_BOM_1.equals(new ByteOrderMark("1a", 2)));
+        assertFalse("test1-2 not test2", TEST_BOM_1.equals(new ByteOrderMark("1b", 1, 2)));
+        assertFalse("test2 not equal", TEST_BOM_2.equals(new ByteOrderMark("2", 1, 1)));
+        assertFalse("test3 not equal", TEST_BOM_3.equals(new ByteOrderMark("3", 1, 2, 4)));
+    }
+
+    /** Test {@link ByteOrderMark#hashCode()} */
+    public void testHashCode() {
+        int bomClassHash = ByteOrderMark.class.hashCode();
+        assertEquals("hash test1 ", bomClassHash + 1,  TEST_BOM_1.hashCode());
+        assertEquals("hash test2 ", bomClassHash + 3,  TEST_BOM_2.hashCode());
+        assertEquals("hash test3 ", bomClassHash + 6,  TEST_BOM_3.hashCode());
+    }
+
+    /** Test Erros */
+    public void testErrors() {
+        try {
+            new ByteOrderMark(null, 1,2,3);
+            fail("null charset name, expected IllegalArgumentException");
+        } catch (IllegalArgumentException e) {
+            // expected
+        }
+        try {
+            new ByteOrderMark("", 1,2,3);
+            fail("no charset name, expected IllegalArgumentException");
+        } catch (IllegalArgumentException e) {
+            // expected
+        }
+        try {
+            new ByteOrderMark("a", (int[])null);
+            fail("null bytes, expected IllegalArgumentException");
+        } catch (IllegalArgumentException e) {
+            // expected
+        }
+        try {
+            new ByteOrderMark("b", new int[0]);
+            fail("empty bytes, expected IllegalArgumentException");
+        } catch (IllegalArgumentException e) {
+            // expected
+        }
+    }
+
+    /** Test {@link ByteOrderMark#toString()} */
+    public void testToString() {
+        assertEquals("test1 ", "ByteOrderMark[test1: 0x1]",          TEST_BOM_1.toString());
+        assertEquals("test2 ", "ByteOrderMark[test2: 0x1,0x2]",      TEST_BOM_2.toString());
+        assertEquals("test3 ", "ByteOrderMark[test3: 0x1,0x2,0x3]",  TEST_BOM_3.toString());
+    }
+}

Propchange: commons/proper/io/trunk/src/test/org/apache/commons/io/ByteOrderMarkTestCase.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: commons/proper/io/trunk/src/test/org/apache/commons/io/ByteOrderMarkTestCase.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Copied: commons/proper/io/trunk/src/test/org/apache/commons/io/input/BOMInputStreamTest.java (from r1003693, commons/proper/io/trunk/src/test/org/apache/commons/io/input/BOMExclusionInputStreamTest.java)
URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/org/apache/commons/io/input/BOMInputStreamTest.java?p2=commons/proper/io/trunk/src/test/org/apache/commons/io/input/BOMInputStreamTest.java&p1=commons/proper/io/trunk/src/test/org/apache/commons/io/input/BOMExclusionInputStreamTest.java&r1=1003693&r2=1004073&rev=1004073&view=diff
==============================================================================
--- commons/proper/io/trunk/src/test/org/apache/commons/io/input/BOMExclusionInputStreamTest.java (original)
+++ commons/proper/io/trunk/src/test/org/apache/commons/io/input/BOMInputStreamTest.java Sun Oct  3 23:44:19 2010
@@ -20,9 +20,16 @@ import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 
+import org.apache.commons.io.ByteOrderMark;
+
 import junit.framework.TestCase;
 
-public class BOMExclusionInputStreamTest extends TestCase {
+/**
+ * Test case for {@link BOMInputStream}.
+ *
+ * @version $Id$
+ */
+public class BOMInputStreamTest extends TestCase {
     //----------------------------------------------------------------------------
     //  Support code
     //----------------------------------------------------------------------------
@@ -94,39 +101,111 @@ public class BOMExclusionInputStreamTest
 
     public void testReadWithoutBOM() throws Exception {
         byte[] data = new byte[] { 'A', 'B', 'C' };
-        InputStream in = new BOMExclusionInputStream(createDataStream(data, false));
+        BOMInputStream in = new BOMInputStream(createDataStream(data, false));
         assertEquals('A', in.read());
         assertEquals('B', in.read());
         assertEquals('C', in.read());
         assertEquals(-1, in.read());
+        assertFalse("hasBOM()", in.hasBOM());
+        assertFalse("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
+        assertNull("getBOM", in.getBOM());
+    }
+
+    public void testReadEmpty() throws Exception {
+        byte[] data = new byte[] {};
+        BOMInputStream in = new BOMInputStream(createDataStream(data, false));
+        assertEquals(-1, in.read());
+        assertFalse("hasBOM()", in.hasBOM());
+        assertFalse("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
+        assertNull("getBOM", in.getBOM());
+    }
+
+    public void testReadSmall() throws Exception {
+        byte[] data = new byte[] { 'A', 'B' };
+        BOMInputStream in = new BOMInputStream(createDataStream(data, false));
+        assertEquals('A', in.read());
+        assertEquals('B', in.read());
+        assertEquals(-1, in.read());
+        assertFalse("hasBOM()", in.hasBOM());
+        assertFalse("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
+        assertNull("getBOM", in.getBOM());
     }
 
     public void testReadWithBOM() throws Exception {
         byte[] data = new byte[] { 'A', 'B', 'C' };
-        InputStream in = new BOMExclusionInputStream(createDataStream(data, true));
+        BOMInputStream in = new BOMInputStream(createDataStream(data, true));
         assertEquals('A', in.read());
         assertEquals('B', in.read());
         assertEquals('C', in.read());
         assertEquals(-1, in.read());
+        assertTrue("hasBOM()", in.hasBOM());
+        assertTrue("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
+        assertEquals("getBOM", ByteOrderMark.UTF_8, in.getBOM());
+        try {
+            in.hasBOM(ByteOrderMark.UTF_16BE);
+        } catch (IllegalArgumentException e) {
+            // expected - not configured for UTF-16BE
+        }
+    }
+
+    public void testGetBOMFirstThenRead() throws Exception {
+        byte[] data = new byte[] { 'A', 'B', 'C' };
+        BOMInputStream in = new BOMInputStream(createDataStream(data, true));
+        assertEquals("getBOM", ByteOrderMark.UTF_8, in.getBOM());
+        assertTrue("hasBOM()", in.hasBOM());
+        assertTrue("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
+        assertEquals('A', in.read());
+        assertEquals('B', in.read());
+        assertEquals('C', in.read());
+        assertEquals(-1, in.read());
+    }
+
+    public void testReadWithBOMInclude() throws Exception {
+        byte[] data = new byte[] { 'A', 'B', 'C' };
+        BOMInputStream in = new BOMInputStream(createDataStream(data, true), true);
+        assertEquals(0xEF, in.read());
+        assertEquals(0xBB, in.read());
+        assertEquals(0xBF, in.read());
+        assertEquals('A', in.read());
+        assertEquals('B', in.read());
+        assertEquals('C', in.read());
+        assertEquals(-1, in.read());
+        assertTrue("hasBOM()", in.hasBOM());
+        assertTrue("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
+        assertEquals("getBOM", ByteOrderMark.UTF_8, in.getBOM());
+    }
+
+    public void testReadWithMultipleBOM() throws Exception {
+        byte[] data = new byte[] { 'A', 'B', 'C' };
+        BOMInputStream in = new BOMInputStream(createDataStream(data, true), 
+                                            ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_8);
+        assertEquals('A', in.read());
+        assertEquals('B', in.read());
+        assertEquals('C', in.read());
+        assertEquals(-1, in.read());
+        assertTrue("hasBOM()", in.hasBOM());
+        assertTrue("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
+        assertFalse("hasBOM(UTF-16BE)", in.hasBOM(ByteOrderMark.UTF_16BE));
+        assertEquals("getBOM", ByteOrderMark.UTF_8, in.getBOM());
     }
 
     public void testLargeBufferWithoutBOM() throws Exception {
         byte[] data = new byte[] { 'A', 'B', 'C' };
-        InputStream in = new BOMExclusionInputStream(createDataStream(data, false));
+        InputStream in = new BOMInputStream(createDataStream(data, false));
         byte[] buf = new byte[1024];
         assertData(data, buf, 0, in.read(buf));
     }
 
     public void testLargeBufferWithBOM() throws Exception {
         byte[] data = new byte[] { 'A', 'B', 'C' };
-        InputStream in = new BOMExclusionInputStream(createDataStream(data, true));
+        InputStream in = new BOMInputStream(createDataStream(data, true));
         byte[] buf = new byte[1024];
         assertData(data, buf, 0, in.read(buf));
     }
 
     public void testSmallBufferWithoutBOM() throws Exception {
         byte[] data = new byte[] { 'A', 'B', 'C' };
-        InputStream in = new BOMExclusionInputStream(createDataStream(data, false));
+        InputStream in = new BOMInputStream(createDataStream(data, false));
         byte[] buf = new byte[1024];
         assertData(new byte[] { 'A', 'B' }, buf, 0, in.read(buf, 0, 2));
         assertData(new byte[] { 'C' }, buf, 0, in.read(buf, 0, 2));
@@ -134,7 +213,7 @@ public class BOMExclusionInputStreamTest
 
     public void testSmallBufferWithBOM() throws Exception {
         byte[] data = new byte[] { 'A', 'B', 'C' };
-        InputStream in = new BOMExclusionInputStream(createDataStream(data, true));
+        InputStream in = new BOMInputStream(createDataStream(data, true));
         byte[] buf = new byte[1024];
         assertData(new byte[] { 'A', 'B' }, buf, 0, in.read(buf, 0, 2));
         assertData(new byte[] { 'C' }, buf, 0, in.read(buf, 0, 2));
@@ -142,7 +221,7 @@ public class BOMExclusionInputStreamTest
 
     public void testLeadingNonBOMSingleRead() throws Exception {
         byte[] data = new byte[] { (byte) 0xEF, (byte) 0xAB, (byte) 0xCD };
-        InputStream in = new BOMExclusionInputStream(createDataStream(data, false));
+        InputStream in = new BOMInputStream(createDataStream(data, false));
         assertEquals(0xEF, in.read());
         assertEquals(0xAB, in.read());
         assertEquals(0xCD, in.read());
@@ -151,28 +230,28 @@ public class BOMExclusionInputStreamTest
 
     public void testLeadingNonBOMBufferedRead() throws Exception {
         byte[] data = new byte[] { (byte) 0xEF, (byte) 0xAB, (byte) 0xCD };
-        InputStream in = new BOMExclusionInputStream(createDataStream(data, false));
+        InputStream in = new BOMInputStream(createDataStream(data, false));
         byte[] buf = new byte[1024];
         assertData(data, buf, 0, in.read(buf));
     }
 
     public void testSkipWithoutBOM() throws Exception {
         byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
-        InputStream in = new BOMExclusionInputStream(createDataStream(data, false));
+        InputStream in = new BOMInputStream(createDataStream(data, false));
         in.skip(2L);
         assertEquals('C', in.read());
     }
 
     public void testSkipWithBOM() throws Exception {
         byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
-        InputStream in = new BOMExclusionInputStream(createDataStream(data, true));
+        InputStream in = new BOMInputStream(createDataStream(data, true));
         in.skip(2L);
         assertEquals('C', in.read());
     }
 
     public void testMarkResetAfterReadWithoutBOM() throws Exception {
         byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
-        InputStream in = new BOMExclusionInputStream(createDataStream(data, false));
+        InputStream in = new BOMInputStream(createDataStream(data, false));
         assertTrue(in.markSupported());
 
         in.read();
@@ -186,7 +265,7 @@ public class BOMExclusionInputStreamTest
 
     public void testMarkResetAfterReadWithBOM() throws Exception {
         byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
-        InputStream in = new BOMExclusionInputStream(createDataStream(data, true));
+        InputStream in = new BOMInputStream(createDataStream(data, true));
         assertTrue(in.markSupported());
 
         in.read();
@@ -200,7 +279,7 @@ public class BOMExclusionInputStreamTest
 
     public void testMarkResetBeforeReadWithoutBOM() throws Exception {
         byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
-        InputStream in = new BOMExclusionInputStream(createDataStream(data, false));
+        InputStream in = new BOMInputStream(createDataStream(data, false));
         assertTrue(in.markSupported());
 
         in.mark(10);
@@ -213,7 +292,7 @@ public class BOMExclusionInputStreamTest
 
     public void testMarkResetBeforeReadWithBOM() throws Exception {
         byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
-        InputStream in = new BOMExclusionInputStream(createDataStream(data, true));
+        InputStream in = new BOMInputStream(createDataStream(data, true));
         assertTrue(in.markSupported());
 
         in.mark(10);
@@ -226,20 +305,36 @@ public class BOMExclusionInputStreamTest
 
     public void testAvailableWithoutBOM() throws Exception {
         byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
-        InputStream in = new BOMExclusionInputStream(createDataStream(data, false));
+        InputStream in = new BOMInputStream(createDataStream(data, false));
         assertEquals(4, in.available());
     }
 
     public void testAvailableWithBOM() throws Exception {
         byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
-        InputStream in = new BOMExclusionInputStream(createDataStream(data, true));
+        InputStream in = new BOMInputStream(createDataStream(data, true));
         assertEquals(7, in.available());
     }
 
+    public void testNoBoms() throws Exception {
+        byte[] data = new byte[] { 'A', 'B', 'C' };
+        try {
+            new BOMInputStream(createDataStream(data, true), false, (ByteOrderMark[])null);
+            fail("Null BOMs, expected IllegalArgumentException");
+        } catch (IllegalArgumentException e) {
+            // expected
+        }
+        try {
+            new BOMInputStream(createDataStream(data, true), false, new ByteOrderMark[0]);
+            fail("Null BOMs, expected IllegalArgumentException");
+        } catch (IllegalArgumentException e) {
+            // expected
+        }
+    }
+
     // this is here for coverage
     public void testClose() throws Exception {
         ExpectCloseInputStream del = new ExpectCloseInputStream();
-        InputStream in = new BOMExclusionInputStream(del);
+        InputStream in = new BOMInputStream(del);
 
         in.close();
         del.assertCloseCalled();