You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by kn...@apache.org on 2004/09/29 02:24:01 UTC
cvs commit: xml-xerces/c/src/xercesc/internal XMLReader.cpp XMLReader.hpp
knoaman 2004/09/28 17:24:01
Modified: c/src/xercesc/internal XMLReader.cpp XMLReader.hpp
Log:
Performance: improve src offset calculation. Patch by Anthony O'Dowd.
Revision Changes Path
1.25 +45 -8 xml-xerces/c/src/xercesc/internal/XMLReader.cpp
Index: XMLReader.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/XMLReader.cpp,v
retrieving revision 1.24
retrieving revision 1.25
diff -u -r1.24 -r1.25
--- XMLReader.cpp 8 Sep 2004 13:56:14 -0000 1.24
+++ XMLReader.cpp 29 Sep 2004 00:24:01 -0000 1.25
@@ -279,6 +279,7 @@
{
// This represents no data from the source
fCharSizeBuf[fCharsAvail] = 0;
+ fCharOfsBuf[fCharsAvail] = 0;
fCharBuf[fCharsAvail++] = chSpace;
}
}
@@ -374,6 +375,7 @@
{
// This represents no data from the source
fCharSizeBuf[fCharsAvail] = 0;
+ fCharOfsBuf[fCharsAvail] = 0;
fCharBuf[fCharsAvail++] = chSpace;
}
}
@@ -401,11 +403,15 @@
// Take the current source offset and add in the sizes that we've
// eaten from the source so far.
//
- unsigned int offset = fSrcOfsBase;
- for (unsigned int index = 0; index < fCharIndex; index++)
- offset += fCharSizeBuf[index];
+ if( fCharIndex == 0 ) {
+ return fSrcOfsBase;
+ }
+
+ if( fCharIndex < fCharsAvail ) {
+ return (fSrcOfsBase + fCharOfsBuf[fCharIndex]);
+ }
- return offset;
+ return (fSrcOfsBase + fCharOfsBuf[fCharIndex-1] + fCharSizeBuf[fCharIndex-1]);
}
@@ -536,6 +542,17 @@
fCharIndex++;
}
}
+ // If there's a utf-8 BOM (0xEF 0xBB 0xBF), skip past it.
+ else {
+ const char* asChars = (const char*)fRawByteBuf;
+ if ((fRawBytesAvail > XMLRecognizer::fgUTF8BOMLen )&&
+ (XMLString::compareNString( asChars
+ , XMLRecognizer::fgUTF8BOM
+ , XMLRecognizer::fgUTF8BOMLen) == 0) && !startInd)
+ {
+ fCharIndex += XMLRecognizer::fgUTF8BOMLen;
+ }
+ }
}
}
@@ -547,6 +564,15 @@
if (!fCharsAvail)
fNoMore = true;
+ // Calculate fCharOfsBuf using the elements from fCharBufSize
+ if (fCalculateSrcOfs)
+ {
+ fCharOfsBuf[0] = 0;
+ for (unsigned int index = 1; index < fCharsAvail; ++index) {
+ fCharOfsBuf[index] = fCharOfsBuf[index-1]+fCharSizeBuf[index-1];
+ }
+ }
+
return (fCharsAvail != 0);
}
@@ -1263,11 +1289,13 @@
if (fRawBytesAvail < 2)
break;
+ unsigned int postBOMIndex = 0;
const UTF16Ch* asUTF16 = (const UTF16Ch*)&fRawByteBuf[fRawBufIndex];
if ((*asUTF16 == chUnicodeMarker) || (*asUTF16 == chSwappedUnicodeMarker))
{
fRawBufIndex += sizeof(UTF16Ch);
asUTF16++;
+ postBOMIndex = fRawBufIndex;
}
// First check that there are enough raw bytes for there to even
@@ -1275,7 +1303,7 @@
//
if (fRawBytesAvail - fRawBufIndex < XMLRecognizer::fgUTF16PreLen)
{
- fRawBufIndex = 0;
+ fRawBufIndex = postBOMIndex;
break;
}
@@ -1287,7 +1315,7 @@
{
if (memcmp(asUTF16, XMLRecognizer::fgUTF16BPre, XMLRecognizer::fgUTF16PreLen))
{
- fRawBufIndex = 0;
+ fRawBufIndex = postBOMIndex;
break;
}
}
@@ -1295,7 +1323,7 @@
{
if (memcmp(asUTF16, XMLRecognizer::fgUTF16LPre, XMLRecognizer::fgUTF16PreLen))
{
- fRawBufIndex = 0;
+ fRawBufIndex = postBOMIndex;
break;
}
}
@@ -1372,6 +1400,15 @@
//
if ((fType == Type_PE) && (fRefFrom == RefFrom_NonLiteral))
fCharBuf[fCharsAvail++] = chSpace;
+
+ // Calculate fCharOfsBuf buffer using the elements from fCharBufSize
+ if (fCalculateSrcOfs)
+ {
+ fCharOfsBuf[0] = 0;
+ for (unsigned int index = 1; index < fCharsAvail; ++index) {
+ fCharOfsBuf[index] = fCharOfsBuf[index-1]+fCharSizeBuf[index-1];
+ }
+ }
}
1.20 +9 -0 xml-xerces/c/src/xercesc/internal/XMLReader.hpp
Index: XMLReader.hpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/XMLReader.hpp,v
retrieving revision 1.19
retrieving revision 1.20
diff -u -r1.19 -r1.20
--- XMLReader.hpp 8 Sep 2004 13:56:14 -0000 1.19
+++ XMLReader.hpp 29 Sep 2004 00:24:01 -0000 1.20
@@ -16,6 +16,9 @@
/*
* $Log$
+ * Revision 1.20 2004/09/29 00:24:01 knoaman
+ * Performance: improve src offset calculation. Patch by Anthony O'Dowd.
+ *
* Revision 1.19 2004/09/08 13:56:14 peiyongz
* Apache License Version 2.0
*
@@ -424,6 +427,11 @@
// to make the internalized char fCharBuf[x]. This only contains
// useful data if fSrcOfsSupported is true.
//
+ // fCharOfsBuf
+ // This buffer is an array that contains the offset in the
+ // fRawByteBuf buffer of each char in the fCharBuf buffer. It
+ // only contains useful data if fSrcOfsSupported is true.
+ //
// fCurCol
// fCurLine
// The current line and column that we are in within this reader's
@@ -548,6 +556,7 @@
XMLCh fCharBuf[kCharBufSize];
unsigned int fCharsAvail;
unsigned char fCharSizeBuf[kCharBufSize];
+ unsigned int fCharOfsBuf[kCharBufSize];
XMLSSize_t fCurCol;
XMLSSize_t fCurLine;
XMLRecognizer::Encodings fEncoding;
---------------------------------------------------------------------
To unsubscribe, e-mail: xerces-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: xerces-cvs-help@xml.apache.org