You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by mr...@apache.org on 2007/07/06 23:56:15 UTC

svn commit: r554069 - /xerces/java/trunk/src/org/apache/xerces/impl/io/UTF8Reader.java

Author: mrglavas
Date: Fri Jul  6 14:56:14 2007
New Revision: 554069

URL: http://svn.apache.org/viewvc?view=rev&rev=554069
Log:
Fixing JIRA Issue #1257:
http://issues.apache.org/jira/browse/XERCESJ-1257

If the first byte of a four byte UTF-8 sequence lands on a buffer boundary the low surrogate 
will not fit into the char buffer. It needs to be saved for the next read. An AIOOBE was 
being thrown before because we were not checking for this edge case.

Modified:
    xerces/java/trunk/src/org/apache/xerces/impl/io/UTF8Reader.java

Modified: xerces/java/trunk/src/org/apache/xerces/impl/io/UTF8Reader.java
URL: http://svn.apache.org/viewvc/xerces/java/trunk/src/org/apache/xerces/impl/io/UTF8Reader.java?view=diff&rev=554069&r1=554068&r2=554069
==============================================================================
--- xerces/java/trunk/src/org/apache/xerces/impl/io/UTF8Reader.java (original)
+++ xerces/java/trunk/src/org/apache/xerces/impl/io/UTF8Reader.java Fri Jul  6 14:56:14 2007
@@ -286,22 +286,21 @@
      */
     public int read(char ch[], int offset, int length) throws IOException {
 
-        // handle surrogate
-        int out = offset;
-        if (fSurrogate != -1) {
-            ch[offset + 1] = (char)fSurrogate;
-            fSurrogate = -1;
-            length--;
-            out++;
-        }
-
         // read bytes
+        int out = offset;
         int count = 0;
         if (fOffset == 0) {
             // adjust length to read
             if (length > fBuffer.length) {
                 length = fBuffer.length;
             }
+            
+            // handle surrogate
+            if (fSurrogate != -1) {
+                ch[out++] = (char)fSurrogate;
+                fSurrogate = -1;
+                length--;
+            }
 
             // perform read operation
             count = fInputStream.read(fBuffer, 0, length);
@@ -548,8 +547,14 @@
 
                 // set characters
                 ch[out++] = (char)hs;
-                ch[out++] = (char)ls;
-                count -= 2;
+                if ((count -= 2) <= length) {
+                    ch[out++] = (char)ls;
+                }
+                // reached the end of the char buffer; save low surrogate for the next read
+                else {
+                    fSurrogate = ls;
+                    --count;
+                }
                 continue;
             }
 



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@xerces.apache.org
For additional commands, e-mail: commits-help@xerces.apache.org