You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by se...@apache.org on 2013/04/24 02:10:47 UTC

svn commit: r1471209 - in /commons/proper/io/trunk/src: changes/changes.xml main/java/org/apache/commons/io/input/CharSequenceInputStream.java test/java/org/apache/commons/io/input/CharSequenceInputStreamTest.java

Author: sebb
Date: Wed Apr 24 00:10:46 2013
New Revision: 1471209

URL: http://svn.apache.org/r1471209
Log:
IO-356 CharSequenceInputStream#reset() behaves incorrectly in case when buffer size is not dividable by data size.
       Fix code so skip relates to the encoded bytes; reset now re-encodes the data up to the point of the mark

Modified:
    commons/proper/io/trunk/src/changes/changes.xml
    commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/CharSequenceInputStream.java
    commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/CharSequenceInputStreamTest.java

Modified: commons/proper/io/trunk/src/changes/changes.xml
URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/changes/changes.xml?rev=1471209&r1=1471208&r2=1471209&view=diff
==============================================================================
--- commons/proper/io/trunk/src/changes/changes.xml (original)
+++ commons/proper/io/trunk/src/changes/changes.xml Wed Apr 24 00:10:46 2013
@@ -47,6 +47,10 @@ The <action> type attribute can be add,u
   <body>
     <!-- The release date is the date RC is cut -->
     <release version="2.5" date="2013-??-??" description="New features and bug fixes.">    
+      <action issue="IO-356" dev="sebb" type="fix">
+         CharSequenceInputStream#reset() behaves incorrectly in case when buffer size is not dividable by data size.
+         Fix code so skip relates to the encoded bytes; reset now re-encodes the data up to the point of the mark
+      </action>
       <action issue="IO-379" dev="sebb" type="add">
          CharSequenceInputStream - add tests for available()
          Fix code so it really does reflect a minimum available.

Modified: commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/CharSequenceInputStream.java
URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/CharSequenceInputStream.java?rev=1471209&r1=1471208&r2=1471209&view=diff
==============================================================================
--- commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/CharSequenceInputStream.java (original)
+++ commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/CharSequenceInputStream.java Wed Apr 24 00:10:46 2013
@@ -21,6 +21,7 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.nio.ByteBuffer;
 import java.nio.CharBuffer;
+import java.nio.InvalidMarkException;
 import java.nio.charset.CharacterCodingException;
 import java.nio.charset.Charset;
 import java.nio.charset.CharsetEncoder;
@@ -47,7 +48,8 @@ public class CharSequenceInputStream ext
     private final CharBuffer cbuf;
     private final ByteBuffer bbuf;
 
-    private int mark;
+    private int mark_cbuf; // position in cbuf
+    private int mark_bbuf; // position in bbuf
     
     /**
      * Constructor.
@@ -70,7 +72,8 @@ public class CharSequenceInputStream ext
         this.bbuf = ByteBuffer.allocate(bufferSize);
         this.bbuf.flip();
         this.cbuf = CharBuffer.wrap(cs);
-        this.mark = NO_MARK;
+        this.mark_cbuf = NO_MARK;
+        this.mark_bbuf = NO_MARK;
     }
 
     /**
@@ -178,9 +181,12 @@ public class CharSequenceInputStream ext
 
     @Override
     public long skip(long n) throws IOException {
+        /*
+         * This could be made more efficient by using position to skip within the current buffer.
+         */
         long skipped = 0;
-        while (n > 0 && this.cbuf.hasRemaining()) {
-            this.cbuf.get();
+        while (n > 0 && available() > 0) {
+            this.read();
             n--;
             skipped++;
         }
@@ -212,16 +218,45 @@ public class CharSequenceInputStream ext
      */
     @Override
     public synchronized void mark(final int readlimit) {
-        this.mark = this.cbuf.position();
+        this.mark_cbuf = this.cbuf.position();
+        this.mark_bbuf = this.bbuf.position();
+        this.cbuf.mark();
+        this.bbuf.mark();
+        // It would be nice to be able to use mark & reset on the cbuf and bbuf;
+        // however the bbuf is re-used so that won't work
     }
 
     @Override
     public synchronized void reset() throws IOException {
-        if (this.mark != NO_MARK) {
-            this.cbuf.position(this.mark);
-            this.mark = NO_MARK;
-            this.bbuf.limit(0);
-            this.encoder.reset();
+        /*
+         * This is not the most efficient implementation, as it re-encodes from the beginning.
+         *
+         * Since the bbuf is re-used, in general it's necessary to re-encode the data.
+         *
+         * It should be possible to apply some optimisations however:
+         * + use mark/reset on the cbuf and bbuf. This would only work if the buffer had not been (re)filled since the mark.
+         * The code would have to catch InvalidMarkException - does not seem possible to check if mark is valid otherwise.
+         * + Try saving the state of the cbuf before each fillBuffer; it might be possible to restart from there.
+         */
+        if (this.mark_cbuf != NO_MARK) {
+            // if cbuf is at 0, we have not started reading anything, so skip re-encoding
+            if (this.cbuf.position() != 0) {
+                this.encoder.reset();
+                this.cbuf.rewind();
+                this.bbuf.rewind();
+                this.bbuf.limit(0); // rewind does not clear the buffer
+                while(this.cbuf.position() < this.mark_cbuf) {
+                    this.bbuf.rewind(); // empty the buffer (we only refill when empty during normal processing)
+                    this.bbuf.limit(0);
+                    fillBuffer();
+                }
+            }
+            if (this.cbuf.position() != this.mark_cbuf) {
+                throw new IllegalStateException("Unexpected CharBuffer postion: actual="+cbuf.position() + " expected=" + this.mark_cbuf);
+            }
+            this.bbuf.position(this.mark_bbuf);                
+            this.mark_cbuf = NO_MARK;
+            this.mark_bbuf = NO_MARK;
         }
     }
 

Modified: commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/CharSequenceInputStreamTest.java
URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/CharSequenceInputStreamTest.java?rev=1471209&r1=1471208&r2=1471209&view=diff
==============================================================================
--- commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/CharSequenceInputStreamTest.java (original)
+++ commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/CharSequenceInputStreamTest.java Wed Apr 24 00:10:46 2013
@@ -81,7 +81,7 @@ public class CharSequenceInputStreamTest
         }
     }
 
-    @Ignore //    Unfortunately checking canEncode does not seem to work for all charsets:
+//    Unfortunately checking canEncode does not seem to work for all charsets:
 //    testBufferedRead_AvailableCharset(org.apache.commons.io.input.CharSequenceInputStreamTest)  Time elapsed: 0.682 sec  <<< ERROR!
 //    java.lang.UnsupportedOperationException: null
 //        at java.nio.CharBuffer.array(CharBuffer.java:940)
@@ -92,7 +92,7 @@ public class CharSequenceInputStreamTest
     public void testBufferedRead_AvailableCharset() throws IOException {
         for (final String csName : Charset.availableCharsets().keySet()) {
             // prevent java.lang.UnsupportedOperationException at sun.nio.cs.ext.ISO2022_CN.newEncoder. 
-            if (Charset.forName(csName).canEncode()) {
+            if (Charset.forName(csName).canEncode() && ! "COMPOUND_TEXT".equalsIgnoreCase(csName)) {
                 testBufferedRead(TEST_STRING, csName);
             }
         }
@@ -170,13 +170,11 @@ public class CharSequenceInputStreamTest
     }
 
     @Test
-    @Ignore // test is broken
     public void testIO_356_B10_D10_S1_UTF8() throws Exception {
         testIO_356(10, 10, 1, "UTF-8");
     }
 
     @Test
-    @Ignore // test is broken
     public void testIO_356_B10_D10_S2_UTF8() throws Exception {
         testIO_356(10, 10, 2, "UTF-8");
     }
@@ -187,7 +185,6 @@ public class CharSequenceInputStreamTest
     }
 
     @Test
-    @Ignore // test is broken
     public void testIO_356_B10_D13_S1_UTF8() throws Exception {
         testIO_356(10, 13, 1, "UTF-8");
     }