You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by se...@apache.org on 2013/04/24 02:10:47 UTC
svn commit: r1471209 - in /commons/proper/io/trunk/src: changes/changes.xml
main/java/org/apache/commons/io/input/CharSequenceInputStream.java
test/java/org/apache/commons/io/input/CharSequenceInputStreamTest.java
Author: sebb
Date: Wed Apr 24 00:10:46 2013
New Revision: 1471209
URL: http://svn.apache.org/r1471209
Log:
IO-356 CharSequenceInputStream#reset() behaves incorrectly in case when buffer size is not dividable by data size.
Fix code so skip relates to the encoded bytes; reset now re-encodes the data up to the point of the mark
Modified:
commons/proper/io/trunk/src/changes/changes.xml
commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/CharSequenceInputStream.java
commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/CharSequenceInputStreamTest.java
Modified: commons/proper/io/trunk/src/changes/changes.xml
URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/changes/changes.xml?rev=1471209&r1=1471208&r2=1471209&view=diff
==============================================================================
--- commons/proper/io/trunk/src/changes/changes.xml (original)
+++ commons/proper/io/trunk/src/changes/changes.xml Wed Apr 24 00:10:46 2013
@@ -47,6 +47,10 @@ The <action> type attribute can be add,u
<body>
<!-- The release date is the date RC is cut -->
<release version="2.5" date="2013-??-??" description="New features and bug fixes.">
+ <action issue="IO-356" dev="sebb" type="fix">
+ CharSequenceInputStream#reset() behaves incorrectly in case when buffer size is not dividable by data size.
+ Fix code so skip relates to the encoded bytes; reset now re-encodes the data up to the point of the mark
+ </action>
<action issue="IO-379" dev="sebb" type="add">
CharSequenceInputStream - add tests for available()
Fix code so it really does reflect a minimum available.
Modified: commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/CharSequenceInputStream.java
URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/CharSequenceInputStream.java?rev=1471209&r1=1471208&r2=1471209&view=diff
==============================================================================
--- commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/CharSequenceInputStream.java (original)
+++ commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/CharSequenceInputStream.java Wed Apr 24 00:10:46 2013
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
+import java.nio.InvalidMarkException;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
@@ -47,7 +48,8 @@ public class CharSequenceInputStream ext
private final CharBuffer cbuf;
private final ByteBuffer bbuf;
- private int mark;
+ private int mark_cbuf; // position in cbuf
+ private int mark_bbuf; // position in bbuf
/**
* Constructor.
@@ -70,7 +72,8 @@ public class CharSequenceInputStream ext
this.bbuf = ByteBuffer.allocate(bufferSize);
this.bbuf.flip();
this.cbuf = CharBuffer.wrap(cs);
- this.mark = NO_MARK;
+ this.mark_cbuf = NO_MARK;
+ this.mark_bbuf = NO_MARK;
}
/**
@@ -178,9 +181,12 @@ public class CharSequenceInputStream ext
@Override
public long skip(long n) throws IOException {
+ /*
+ * This could be made more efficient by using position to skip within the current buffer.
+ */
long skipped = 0;
- while (n > 0 && this.cbuf.hasRemaining()) {
- this.cbuf.get();
+ while (n > 0 && available() > 0) {
+ this.read();
n--;
skipped++;
}
@@ -212,16 +218,45 @@ public class CharSequenceInputStream ext
*/
@Override
public synchronized void mark(final int readlimit) {
- this.mark = this.cbuf.position();
+ this.mark_cbuf = this.cbuf.position();
+ this.mark_bbuf = this.bbuf.position();
+ this.cbuf.mark();
+ this.bbuf.mark();
+ // It would be nice to be able to use mark & reset on the cbuf and bbuf;
+ // however the bbuf is re-used so that won't work
}
@Override
public synchronized void reset() throws IOException {
- if (this.mark != NO_MARK) {
- this.cbuf.position(this.mark);
- this.mark = NO_MARK;
- this.bbuf.limit(0);
- this.encoder.reset();
+ /*
+ * This is not the most efficient implementation, as it re-encodes from the beginning.
+ *
+ * Since the bbuf is re-used, in general it's necessary to re-encode the data.
+ *
+ * It should be possible to apply some optimisations however:
+ * + use mark/reset on the cbuf and bbuf. This would only work if the buffer had not been (re)filled since the mark.
+ * The code would have to catch InvalidMarkException - does not seem possible to check if mark is valid otherwise.
+ * + Try saving the state of the cbuf before each fillBuffer; it might be possible to restart from there.
+ */
+ if (this.mark_cbuf != NO_MARK) {
+ // if cbuf is at 0, we have not started reading anything, so skip re-encoding
+ if (this.cbuf.position() != 0) {
+ this.encoder.reset();
+ this.cbuf.rewind();
+ this.bbuf.rewind();
+ this.bbuf.limit(0); // rewind does not clear the buffer
+ while(this.cbuf.position() < this.mark_cbuf) {
+ this.bbuf.rewind(); // empty the buffer (we only refill when empty during normal processing)
+ this.bbuf.limit(0);
+ fillBuffer();
+ }
+ }
+ if (this.cbuf.position() != this.mark_cbuf) {
+ throw new IllegalStateException("Unexpected CharBuffer postion: actual="+cbuf.position() + " expected=" + this.mark_cbuf);
+ }
+ this.bbuf.position(this.mark_bbuf);
+ this.mark_cbuf = NO_MARK;
+ this.mark_bbuf = NO_MARK;
}
}
Modified: commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/CharSequenceInputStreamTest.java
URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/CharSequenceInputStreamTest.java?rev=1471209&r1=1471208&r2=1471209&view=diff
==============================================================================
--- commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/CharSequenceInputStreamTest.java (original)
+++ commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/CharSequenceInputStreamTest.java Wed Apr 24 00:10:46 2013
@@ -81,7 +81,7 @@ public class CharSequenceInputStreamTest
}
}
- @Ignore // Unfortunately checking canEncode does not seem to work for all charsets:
+// Unfortunately checking canEncode does not seem to work for all charsets:
// testBufferedRead_AvailableCharset(org.apache.commons.io.input.CharSequenceInputStreamTest) Time elapsed: 0.682 sec <<< ERROR!
// java.lang.UnsupportedOperationException: null
// at java.nio.CharBuffer.array(CharBuffer.java:940)
@@ -92,7 +92,7 @@ public class CharSequenceInputStreamTest
public void testBufferedRead_AvailableCharset() throws IOException {
for (final String csName : Charset.availableCharsets().keySet()) {
// prevent java.lang.UnsupportedOperationException at sun.nio.cs.ext.ISO2022_CN.newEncoder.
- if (Charset.forName(csName).canEncode()) {
+ if (Charset.forName(csName).canEncode() && ! "COMPOUND_TEXT".equalsIgnoreCase(csName)) {
testBufferedRead(TEST_STRING, csName);
}
}
@@ -170,13 +170,11 @@ public class CharSequenceInputStreamTest
}
@Test
- @Ignore // test is broken
public void testIO_356_B10_D10_S1_UTF8() throws Exception {
testIO_356(10, 10, 1, "UTF-8");
}
@Test
- @Ignore // test is broken
public void testIO_356_B10_D10_S2_UTF8() throws Exception {
testIO_356(10, 10, 2, "UTF-8");
}
@@ -187,7 +185,6 @@ public class CharSequenceInputStreamTest
}
@Test
- @Ignore // test is broken
public void testIO_356_B10_D13_S1_UTF8() throws Exception {
testIO_356(10, 13, 1, "UTF-8");
}