You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by an...@locus.apache.org on 2000/10/18 02:21:08 UTC

cvs commit: xml-xerces/c/src/internal XMLReader.cpp

andyh       00/10/17 17:21:08

  Modified:    c/src/internal XMLReader.cpp
  Log:
  Bug Fix - problems with multi-byte characters on input buffer boundaries.
  
  Revision  Changes    Path
  1.23      +11 -14    xml-xerces/c/src/internal/XMLReader.cpp
  
  Index: XMLReader.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/internal/XMLReader.cpp,v
  retrieving revision 1.22
  retrieving revision 1.23
  diff -u -r1.22 -r1.23
  --- XMLReader.cpp	2000/09/08 00:57:37	1.22
  +++ XMLReader.cpp	2000/10/18 00:21:07	1.23
  @@ -55,7 +55,7 @@
    */
   
   /*
  - * $Id: XMLReader.cpp,v 1.22 2000/09/08 00:57:37 andyh Exp $
  + * $Id: XMLReader.cpp,v 1.23 2000/10/18 00:21:07 andyh Exp $
    */
   
   // ---------------------------------------------------------------------------
  @@ -1531,16 +1531,9 @@
   {
       //
       //  If there are any bytes left, move them down to the start. There
  -    //  should only ever be (max bytes per char - 1) at the most. If not,
  -    //  optimize and do the fast op and get out.
  +    //  should only ever be (max bytes per char - 1) at the most. 
       //
       const unsigned int bytesLeft = fRawBytesAvail - fRawBufIndex;
  -    if (!bytesLeft)
  -    {
  -        // Its empty so do the optimized version
  -        fRawBytesAvail = fStream->readBytes(fRawByteBuf, kRawBufSize);
  -        return;
  -    }
   
       // Move the existing ones down
       for (unsigned int index = 0; index < bytesLeft; index++)
  @@ -1558,7 +1551,7 @@
   
       //
       //  We need to reset the buffer index back to the start in all cases,
  -    //  since any trailing data will be copied down to the start.
  +    //  since any trailing data was copied down to the start.
       //
       fRawBufIndex = 0;
   }
  @@ -1579,13 +1572,17 @@
           return 0;
   
       //
  -    //  If our raw buffer is empty, then lets load up another batch of
  -    //  raw bytes now.
  +    //  If our raw buffer is low, then lets load up another batch of
  +    //  raw bytes now.  We can't check for exactly zero bytes left because
  +    //  transcoding of multi-byte encodings may have left a few bytes
  +    //  representing a partial character in the buffer that can't be
  +    //  used until the next buffer (and the rest of the character)
  +    //  is read.
       //
  -    if (fRawBufIndex == fRawBytesAvail)
  +    unsigned int bytesLeft = fRawBytesAvail - fRawBufIndex;
  +    if (bytesLeft < 100)
       {
           refreshRawBuffer();
  -        fRawBufIndex = 0;
   
           // If we didn't get anything more just return a zero now
           if (!fRawBytesAvail)