You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by se...@apache.org on 2012/03/01 17:00:33 UTC

svn commit: r1295643 - in /commons/proper/io/trunk/src: changes/changes.xml main/java/org/apache/commons/io/input/CharSequenceInputStream.java test/java/org/apache/commons/io/input/CharSequenceInputStreamTest.java

Author: sebb
Date: Thu Mar  1 16:00:32 2012
New Revision: 1295643

URL: http://svn.apache.org/viewvc?rev=1295643&view=rev
Log:
IO-297 CharSequenceInputStream to efficiently stream content of a CharSequence

Added:
    commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/CharSequenceInputStream.java   (with props)
    commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/CharSequenceInputStreamTest.java   (with props)
Modified:
    commons/proper/io/trunk/src/changes/changes.xml

Modified: commons/proper/io/trunk/src/changes/changes.xml
URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/changes/changes.xml?rev=1295643&r1=1295642&r2=1295643&view=diff
==============================================================================
--- commons/proper/io/trunk/src/changes/changes.xml (original)
+++ commons/proper/io/trunk/src/changes/changes.xml Thu Mar  1 16:00:32 2012
@@ -40,6 +40,9 @@ The <action> type attribute can be add,u
 
   <body>
     <release version="2.2" date="TBA">
+      <action dev="sebb" type="add" issue="IO-297" due-to="Oleg Kalnichevski">
+        CharSequenceInputStream to efficiently stream content of a CharSequence
+      </action>        
       <action dev="sebb" type="update" issue="IO-296" due-to="Oleg Kalnichevski">
         ReaderInputStream optimization: more efficient reading of small chunks of data
       </action>        

Added: commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/CharSequenceInputStream.java
URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/CharSequenceInputStream.java?rev=1295643&view=auto
==============================================================================
--- commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/CharSequenceInputStream.java (added)
+++ commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/CharSequenceInputStream.java Thu Mar  1 16:00:32 2012
@@ -0,0 +1,192 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.io.input;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+import java.nio.charset.CodingErrorAction;
+
+/**
+ * {@link InputStream} implementation that can read from String, StringBuffer,
+ * StringBuilder or CharBuffer.
+ * <p>
+ * <strong>Note:</strong> Supports {@link #mark(int)} and {@link #reset()}.
+ *
+ * @since Commons IO 2.2
+ */
+public class CharSequenceInputStream extends InputStream {
+
+    private final CharsetEncoder encoder;
+    private final CharBuffer cbuf;
+    private final ByteBuffer bbuf;
+
+    private int mark;
+    
+    /**
+     * Constructor.
+     * 
+     * @param s the input character sequence
+     * @param charset the character set name to use
+     * @param bufferSize the buffer size to use.
+     */
+    public CharSequenceInputStream(final CharSequence s, final Charset charset, int bufferSize) {
+        super();
+        this.encoder = charset.newEncoder()
+            .onMalformedInput(CodingErrorAction.REPLACE)
+            .onUnmappableCharacter(CodingErrorAction.REPLACE);
+        this.bbuf = ByteBuffer.allocate(124);
+        this.bbuf.flip();
+        this.cbuf = CharBuffer.wrap(s);
+        this.mark = -1;
+    }
+
+    /**
+     * Constructor, calls {@link #CharSequenceInputStream(CharSequence, Charset, int)}.
+     * 
+     * @param s the input character sequence
+     * @param charset the character set name to use
+     * @param bufferSize the buffer size to use.
+     */
+    public CharSequenceInputStream(final CharSequence s, final String charset, int bufferSize) {
+        this(s, Charset.forName(charset), bufferSize);
+    }
+
+    /**
+     * Constructor, calls {@link #CharSequenceInputStream(CharSequence, Charset, int)}
+     * with a buffer size of 2048.
+     * 
+     * @param s the input character sequence
+     * @param charset the character set name to use
+     */
+    public CharSequenceInputStream(final CharSequence s, final Charset charset) {
+        this(s, charset, 2048);
+    }
+
+    /**
+     * Constructor, calls {@link #CharSequenceInputStream(CharSequence, String, int)}
+     * with a buffer size of 2048.
+     * 
+     * @param s the input character sequence
+     * @param charset the character set name to use
+     */
+    public CharSequenceInputStream(final CharSequence s, final String charset) {
+        this(s, charset, 2048);
+    }
+
+    private void fillBuffer() throws IOException {
+        this.bbuf.compact();
+        CoderResult result = this.encoder.encode(this.cbuf, this.bbuf, true);
+        if (result.isError()) {
+            result.throwException();
+        }
+        this.bbuf.flip();
+    }
+    
+    @Override
+    public int read(byte[] b, int off, int len) throws IOException {
+        if (b == null) {
+            throw new NullPointerException("Byte array is null");
+        }
+        if (len < 0 || (off + len) > b.length) {
+            throw new IndexOutOfBoundsException("Array Size=" + b.length +
+                    ", offset=" + off + ", length=" + len);
+        }        
+        if (!this.bbuf.hasRemaining() && !this.cbuf.hasRemaining()) {
+            return -1;
+        }
+        int bytesRead = 0;
+        while (len > 0) {
+            if (this.bbuf.hasRemaining()) {
+                int chunk = Math.min(this.bbuf.remaining(), len);
+                this.bbuf.get(b, off, chunk);
+                off += chunk;
+                len -= chunk;
+                bytesRead += chunk;
+            } else {
+                fillBuffer();
+                if (!this.bbuf.hasRemaining() && !this.cbuf.hasRemaining()) {
+                    break;
+                }
+            }
+        }
+        return bytesRead == 0 && !this.cbuf.hasRemaining() ? -1 : bytesRead;
+    }
+
+    @Override
+    public int read() throws IOException {
+        for (;;) {
+            if (this.bbuf.hasRemaining()) {
+                return this.bbuf.get() & 0xFF;
+            } else {
+                fillBuffer();
+                if (!this.bbuf.hasRemaining() && !this.cbuf.hasRemaining()) {
+                    return -1;
+                }
+            }
+        }
+    }
+
+    @Override
+    public int read(byte[] b) throws IOException {
+        return read(b, 0, b.length);
+    }
+
+    @Override
+    public long skip(long n) throws IOException {
+        int skipped = 0;
+        while (n > 0 && this.cbuf.hasRemaining()) {
+            this.cbuf.get();
+            n--;
+            skipped++;
+        }
+        return skipped;
+    }
+
+    @Override
+    public int available() throws IOException {
+        return this.cbuf.remaining();
+    }
+
+    @Override
+    public void close() throws IOException {
+    }
+
+    @Override
+    public void mark(int readlimit) {
+        this.mark = this.cbuf.position();
+    }
+
+    @Override
+    public void reset() throws IOException {
+        if (this.mark != -1) {
+            this.cbuf.position(this.mark);
+            this.mark = -1;
+        }
+    }
+
+    @Override
+    public boolean markSupported() {
+        return true;
+    }
+    
+}

Propchange: commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/CharSequenceInputStream.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/CharSequenceInputStream.java
------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Added: commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/CharSequenceInputStreamTest.java
URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/CharSequenceInputStreamTest.java?rev=1295643&view=auto
==============================================================================
--- commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/CharSequenceInputStreamTest.java (added)
+++ commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/CharSequenceInputStreamTest.java Thu Mar  1 16:00:32 2012
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.io.input;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.Charset;
+import java.util.Random;
+
+import org.junit.Test;
+
+public class CharSequenceInputStreamTest {
+    
+    private static final String TEST_STRING = "\u00e0 peine arriv\u00e9s nous entr\u00e2mes dans sa chambre";
+    private static final String LARGE_TEST_STRING;
+    
+    static {
+        StringBuilder buffer = new StringBuilder();
+        for (int i=0; i<100; i++) {
+            buffer.append(TEST_STRING);
+        }
+        LARGE_TEST_STRING = buffer.toString();
+    }
+    
+    private Random random = new Random();
+    
+    private void testWithSingleByteRead(String testString, String charsetName) throws IOException {
+        byte[] bytes = testString.getBytes(charsetName);
+        InputStream in = new CharSequenceInputStream(testString, charsetName, 512);
+        for (byte b : bytes) {
+            int read = in.read();
+            assertTrue(read >= 0);
+            assertTrue(read <= 255);
+            assertEquals(b, (byte)read);
+        }
+        assertEquals(-1, in.read());
+    }
+    
+    private void testWithBufferedRead(String testString, String charsetName) throws IOException {
+        byte[] expected = testString.getBytes(charsetName);
+        InputStream in = new CharSequenceInputStream(testString, charsetName, 512);
+        byte[] buffer = new byte[128];
+        int offset = 0;
+        while (true) {
+            int bufferOffset = random.nextInt(64);
+            int bufferLength = random.nextInt(64);
+            int read = in.read(buffer, bufferOffset, bufferLength);
+            if (read == -1) {
+                assertEquals(offset, expected.length);
+                break;
+            } else {
+                assertTrue(read <= bufferLength);
+                while (read > 0) {
+                    assertTrue(offset < expected.length);
+                    assertEquals(expected[offset], buffer[bufferOffset]);
+                    offset++;
+                    bufferOffset++;
+                    read--;
+                }
+            }
+        }
+    }
+    
+    @Test
+    public void testUTF8WithSingleByteRead() throws IOException {
+        testWithSingleByteRead(TEST_STRING, "UTF-8");
+    }
+    
+    @Test
+    public void testLargeUTF8WithSingleByteRead() throws IOException {
+        testWithSingleByteRead(LARGE_TEST_STRING, "UTF-8");
+    }
+    
+    @Test
+    public void testUTF8WithBufferedRead() throws IOException {
+        testWithBufferedRead(TEST_STRING, "UTF-8");
+    }
+    
+    @Test
+    public void testLargeUTF8WithBufferedRead() throws IOException {
+        testWithBufferedRead(LARGE_TEST_STRING, "UTF-8");
+    }
+    
+    @Test
+    public void testUTF16WithSingleByteRead() throws IOException {
+        testWithSingleByteRead(TEST_STRING, "UTF-16");
+    }
+    
+    @Test
+    public void testReadZero() throws Exception {
+        InputStream r = new CharSequenceInputStream("test", "UTF-8");
+        byte[] bytes = new byte[30];
+        assertEquals(0, r.read(bytes, 0, 0));
+    }
+    
+    @Test
+    public void testCharsetMismatchInfiniteLoop() throws IOException {
+        // Input is UTF-8 bytes: 0xE0 0xB2 0xA0
+        char[] inputChars = new char[] { (char) 0xE0, (char) 0xB2, (char) 0xA0 };
+        // Charset charset = Charset.forName("UTF-8"); // works
+        Charset charset = Charset.forName("ASCII"); // infinite loop
+        InputStream stream = new CharSequenceInputStream(new String(inputChars), charset, 512);
+        try {
+            while (stream.read() != -1) {
+            }
+        } finally {
+            stream.close();
+        }
+    }
+
+    @Test
+    public void testSkip() throws Exception {
+        InputStream r = new CharSequenceInputStream("test", "UTF-8");
+        r.skip(1);
+        r.skip(2);
+        assertEquals('t', r.read());
+        r.skip(100);
+        assertEquals(-1, r.read());
+    }
+
+    @Test
+    public void testMarkReset() throws Exception {
+        InputStream r = new CharSequenceInputStream("test", "UTF-8");
+        r.skip(2);
+        r.mark(0);
+        assertEquals('s', r.read());
+        assertEquals('t', r.read());
+        assertEquals(-1, r.read());
+        r.reset();
+        assertEquals('s', r.read());
+        assertEquals('t', r.read());
+        assertEquals(-1, r.read());
+        r.reset();
+        r.reset();
+    }
+
+}

Propchange: commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/CharSequenceInputStreamTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/CharSequenceInputStreamTest.java
------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision