You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by an...@locus.apache.org on 2000/07/07 03:08:45 UTC
cvs commit: xml-xerces/c/src/internal ReaderMgr.hpp XMLReader.cpp XMLReader.hpp XMLScanner2.cpp
andyh 00/07/06 18:08:45
Modified: c/src/internal ReaderMgr.hpp XMLReader.cpp XMLReader.hpp
XMLScanner2.cpp
Log:
Parser speed up in scan of XML content.
Revision Changes Path
1.10 +11 -0 xml-xerces/c/src/internal/ReaderMgr.hpp
Index: ReaderMgr.hpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/internal/ReaderMgr.hpp,v
retrieving revision 1.9
retrieving revision 1.10
diff -u -r1.9 -r1.10
--- ReaderMgr.hpp 2000/03/02 19:54:29 1.9
+++ ReaderMgr.hpp 2000/07/07 01:08:44 1.10
@@ -56,6 +56,9 @@
/*
* $Log: ReaderMgr.hpp,v $
+ * Revision 1.10 2000/07/07 01:08:44 andyh
+ * Parser speed up in scan of XML content.
+ *
* Revision 1.9 2000/03/02 19:54:29 roddey
* This checkin includes many changes done while waiting for the
* 1.1.0 code to be finished. I can't list them all here, but a list is
@@ -159,6 +162,7 @@
bool getNameToken(XMLBuffer& toFill);
XMLCh getNextChar();
bool getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten);
+ bool getNextPlainContentChar(XMLCh& chGotten);
void getSpaces(XMLBuffer& toFill);
void getUpToCharOrWS(XMLBuffer& toFill, const XMLCh toCheck);
bool isEmpty() const;
@@ -331,6 +335,11 @@
inline bool ReaderMgr::getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten)
{
return fCurReader->getNextCharIfNot(chNotToGet, chGotten);
+}
+
+inline bool ReaderMgr::getNextPlainContentChar(XMLCh& chGotten)
+{
+ return fCurReader->getNextPlainContentChar(chGotten);
}
inline bool ReaderMgr::getThrowEOE() const
1.17 +35 -0 xml-xerces/c/src/internal/XMLReader.cpp
Index: XMLReader.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/internal/XMLReader.cpp,v
retrieving revision 1.16
retrieving revision 1.17
diff -u -r1.16 -r1.17
--- XMLReader.cpp 2000/07/06 21:00:51 1.16
+++ XMLReader.cpp 2000/07/07 01:08:44 1.17
@@ -56,6 +56,9 @@
/*
* $Log: XMLReader.cpp,v $
+ * Revision 1.17 2000/07/07 01:08:44 andyh
+ * Parser speed up in scan of XML content.
+ *
* Revision 1.16 2000/07/06 21:00:51 jpolast
* inlined getNextCharIfNot() for better performance
*
@@ -1815,6 +1818,38 @@
// ---------------------------------------------------------------------------
// XMLReader: Static data
// ---------------------------------------------------------------------------
+
+ // Plain Content Characters. These are legal content characters that need no
+ // special handling or checks at all. They do not include CR, NL, <, & or ]
+ //
+ // This really wants to be merged into the main table below, although this
+ // would require some thought and reorganization.
+ //
+ // Note: This is generated by hand, not automatically programatically like
+ // fgCharCharsTable, below.
+ //
+const XMLByte XMLReader::fgPlainContentChars[256] =
+{
+ // 0 1 2 3 4 5 6 7 9 9 A B C D E F
+ /*0*/ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ /*1*/ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ /*2*/ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ /*3*/ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x01, 0x01, 0x01,
+ /*4*/ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ /*5*/ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x01, 0x01,
+ /*6*/ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ /*7*/ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ /*8*/ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ /*9*/ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ /*A*/ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ /*B*/ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ /*C*/ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ /*D*/ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ /*E*/ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ /*F*/ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01
+};
+
+
const XMLByte XMLReader::fgCharCharsTable[0x10000] =
{
0x1A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xD0, 0xD0, 0x00, 0x00, 0xD0, 0x00, 0x00
1.11 +47 -1 xml-xerces/c/src/internal/XMLReader.hpp
Index: XMLReader.hpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/internal/XMLReader.hpp,v
retrieving revision 1.10
retrieving revision 1.11
diff -u -r1.10 -r1.11
--- XMLReader.hpp 2000/07/06 21:00:52 1.10
+++ XMLReader.hpp 2000/07/07 01:08:44 1.11
@@ -56,6 +56,9 @@
/*
* $Log: XMLReader.hpp,v $
+ * Revision 1.11 2000/07/07 01:08:44 andyh
+ * Parser speed up in scan of XML content.
+ *
* Revision 1.10 2000/07/06 21:00:52 jpolast
* inlined getNextCharIfNot() for better performance
*
@@ -232,6 +235,7 @@
bool getName(XMLBuffer& toFill, const bool token);
bool getNextChar(XMLCh& chGotten);
bool getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten);
+ bool getNextPlainContentChar(XMLCh& chGotten);
bool getSpaces(XMLBuffer& toFill);
bool getUpToCharOrWS(XMLBuffer& toFill, const XMLCh toCheck);
bool peekNextChar(XMLCh& chGotten);
@@ -503,6 +507,16 @@
// code and then hard coded into the cpp file for speed.
// -----------------------------------------------------------------------
static const XMLByte fgCharCharsTable[0x10000];
+
+ //------------------------------------------------------------------------
+ //
+ // Another character property table, to speed the handling of plain character data.
+ //
+ // ToDo: Figure out an efficient way to merge with the main character
+ // properties table.
+ //
+ // -----------------------------------------------------------------------
+ static const XMLByte fgPlainContentChars[256];
};
@@ -634,10 +648,42 @@
}
+
// ---------------------------------------------------------------------------
-// XMLReader: getNextCharIfNot() method inlined for speed
+//
+// XMLReader: getNextPlainContentChar() method inlined for speed
+//
// ---------------------------------------------------------------------------
+inline bool XMLReader::getNextPlainContentChar(XMLCh& chGotten)
+{
+ //
+ // See if there is at least a char in the buffer. If not, just
+ // return false. The more general getNextChar routine will
+ // succeed after this one fails.
+ //
+ if (fCharIndex >= fCharsAvail)
+ {
+ return false;
+ }
+
+
+ XMLCh c = fCharBuf[fCharIndex];
+ if (c < 255 && XMLReader::fgPlainContentChars[c])
+ {
+ fCharIndex++;
+ chGotten = c;
+ return true;
+ }
+
+ return false;
+}
+
+
+
+// ---------------------------------------------------------------------------
+// XMLReader: getNextCharIfNot() method inlined for speed
+// ---------------------------------------------------------------------------
inline bool XMLReader::getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten)
{
//
1.14 +14 -0 xml-xerces/c/src/internal/XMLScanner2.cpp
Index: XMLScanner2.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/internal/XMLScanner2.cpp,v
retrieving revision 1.13
retrieving revision 1.14
diff -u -r1.13 -r1.14
--- XMLScanner2.cpp 2000/05/11 23:11:33 1.13
+++ XMLScanner2.cpp 2000/07/07 01:08:44 1.14
@@ -56,6 +56,9 @@
/*
* $Log: XMLScanner2.cpp,v $
+ * Revision 1.14 2000/07/07 01:08:44 andyh
+ * Parser speed up in scan of XML content.
+ *
* Revision 1.13 2000/05/11 23:11:33 andyh
* Add missing validity checks for stand-alone documents, character range
* and Well-formed parsed entities. Changes contributed by Sean MacRoibeaird
@@ -1662,6 +1665,15 @@
}
else
{
+ // Eat through as many plain content characters as possible without
+ // needing special handling
+ //
+ if (curState == State_Waiting && !gotLeadingSurrogate)
+ {
+ while (fReaderMgr.getNextPlainContentChar(nextCh))
+ toUse.append(nextCh);
+ }
+
// Try to get another char from the source
if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh))
{