You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by an...@locus.apache.org on 2000/10/18 02:21:08 UTC
cvs commit: xml-xerces/c/src/internal XMLReader.cpp
andyh 00/10/17 17:21:08
Modified: c/src/internal XMLReader.cpp
Log:
Bug Fix - problems with multi-byte characters on input buffer boundaries.
Revision Changes Path
1.23 +11 -14 xml-xerces/c/src/internal/XMLReader.cpp
Index: XMLReader.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/internal/XMLReader.cpp,v
retrieving revision 1.22
retrieving revision 1.23
diff -u -r1.22 -r1.23
--- XMLReader.cpp 2000/09/08 00:57:37 1.22
+++ XMLReader.cpp 2000/10/18 00:21:07 1.23
@@ -55,7 +55,7 @@
*/
/*
- * $Id: XMLReader.cpp,v 1.22 2000/09/08 00:57:37 andyh Exp $
+ * $Id: XMLReader.cpp,v 1.23 2000/10/18 00:21:07 andyh Exp $
*/
// ---------------------------------------------------------------------------
@@ -1531,16 +1531,9 @@
{
//
// If there are any bytes left, move them down to the start. There
- // should only ever be (max bytes per char - 1) at the most. If not,
- // optimize and do the fast op and get out.
+ // should only ever be (max bytes per char - 1) at the most.
//
const unsigned int bytesLeft = fRawBytesAvail - fRawBufIndex;
- if (!bytesLeft)
- {
- // Its empty so do the optimized version
- fRawBytesAvail = fStream->readBytes(fRawByteBuf, kRawBufSize);
- return;
- }
// Move the existing ones down
for (unsigned int index = 0; index < bytesLeft; index++)
@@ -1558,7 +1551,7 @@
//
// We need to reset the buffer index back to the start in all cases,
- // since any trailing data will be copied down to the start.
+ // since any trailing data was copied down to the start.
//
fRawBufIndex = 0;
}
@@ -1579,13 +1572,17 @@
return 0;
//
- // If our raw buffer is empty, then lets load up another batch of
- // raw bytes now.
+ // If our raw buffer is low, then lets load up another batch of
+ // raw bytes now. We can't check for exactly zero bytes left because
+ // transcoding of multi-byte encodings may have left a few bytes
+ // representing a partial character in the buffer that can't be
+ // used until the next buffer (and the rest of the character)
+ // is read.
//
- if (fRawBufIndex == fRawBytesAvail)
+ unsigned int bytesLeft = fRawBytesAvail - fRawBufIndex;
+ if (bytesLeft < 100)
{
refreshRawBuffer();
- fRawBufIndex = 0;
// If we didn't get anything more just return a zero now
if (!fRawBytesAvail)