You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by ro...@locus.apache.org on 2000/01/25 02:04:22 UTC
cvs commit: xml-xerces/c/src/internal ReaderMgr.cpp ReaderMgr.hpp XMLReader.cpp XMLReader.hpp XMLScanner2.cpp
roddey 00/01/24 17:04:22
Modified: c/src/internal ReaderMgr.cpp ReaderMgr.hpp XMLReader.cpp
XMLReader.hpp XMLScanner2.cpp
Log:
Fixes a bogus error about ]]> in char data.
Revision Changes Path
1.6 +6 -32 xml-xerces/c/src/internal/ReaderMgr.cpp
Index: ReaderMgr.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/internal/ReaderMgr.cpp,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -r1.5 -r1.6
--- ReaderMgr.cpp 2000/01/15 01:26:16 1.5
+++ ReaderMgr.cpp 2000/01/25 01:04:21 1.6
@@ -56,6 +56,9 @@
/**
* $Log: ReaderMgr.cpp,v $
+ * Revision 1.6 2000/01/25 01:04:21 roddey
+ * Fixes a bogus error about ]]> in char data.
+ *
* Revision 1.5 2000/01/15 01:26:16 rahulj
* Added support for HTTP to the parser using libWWW 5.2.8.
* Renamed URL.[ch]pp to XMLURL.[ch]pp and like wise for the class name.
@@ -149,38 +152,6 @@
// ---------------------------------------------------------------------------
// ReaderMgr: Scanning APIs
// ---------------------------------------------------------------------------
-XMLCh ReaderMgr::getCharData(XMLBuffer& toFill
- , XMLScanner& owningScanner
- , bool& gotLeadingSurrogate)
-{
- //
- // NOTE: We DO NOT reset the buffer here. This is an accumulation
- // method that will be called multiple times in some cases to
- // get all the contiguous char data.
- //
- // Ok, so enter the loop and get char data until we can't go no more.
- //
- XMLCh breakCh = chNull;
- while (true)
- {
- //
- // Ask the current reader for all he's got. He will return the
- // break character that caused him to break out. If its null, then
- // it just means he has no more data, so we can pop a reader and
- // keep going. Otherwise, we return with that break char.
- //
- breakCh = fCurReader->getCharData(toFill, owningScanner, gotLeadingSurrogate);
- if (breakCh)
- break;
-
- // If we cannot pop a reader, then just break out with the null char
- if (!popReader())
- break;
- }
- return breakCh;
-}
-
-
XMLCh ReaderMgr::getNextChar()
{
XMLCh chRet;
1.5 +11 -6 xml-xerces/c/src/internal/ReaderMgr.hpp
Index: ReaderMgr.hpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/internal/ReaderMgr.hpp,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- ReaderMgr.hpp 2000/01/24 20:40:43 1.4
+++ ReaderMgr.hpp 2000/01/25 01:04:21 1.5
@@ -56,6 +56,9 @@
/**
* $Log: ReaderMgr.hpp,v $
+ * Revision 1.5 2000/01/25 01:04:21 roddey
+ * Fixes a bogus error about ]]> in char data.
+ *
* Revision 1.4 2000/01/24 20:40:43 roddey
* Exposed the APIs to get to the byte offset in the source XML buffer. This stuff
* is not tested yet, but I wanted to get the API changes in now so that the API
@@ -137,15 +140,10 @@
// character spooling methods.
// -----------------------------------------------------------------------
bool atEOF() const;
- XMLCh getCharData
- (
- XMLBuffer& toFill
- , XMLScanner& owningScanner
- , bool& gotLeadingSurrogate
- );
bool getName(XMLBuffer& toFill);
bool getNameToken(XMLBuffer& toFill);
XMLCh getNextChar();
+ bool getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten);
void getSpaces(XMLBuffer& toFill);
void getUpToCharOrWS(XMLBuffer& toFill, const XMLCh toCheck);
bool isEmpty() const;
@@ -303,6 +301,11 @@
{
toFill.reset();
return fCurReader->getName(toFill, true);
+}
+
+inline bool ReaderMgr::getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten)
+{
+ return fCurReader->getNextCharIfNot(chNotToGet, chGotten);
}
inline bool ReaderMgr::getThrowEOE() const
1.6 +101 -139 xml-xerces/c/src/internal/XMLReader.cpp
Index: XMLReader.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/internal/XMLReader.cpp,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -r1.5 -r1.6
--- XMLReader.cpp 2000/01/22 00:01:07 1.5
+++ XMLReader.cpp 2000/01/25 01:04:21 1.6
@@ -56,6 +56,9 @@
/**
* $Log: XMLReader.cpp,v $
+ * Revision 1.6 2000/01/25 01:04:21 roddey
+ * Fixes a bogus error about ]]> in char data.
+ *
* Revision 1.5 2000/01/22 00:01:07 roddey
* Simple change to get rid of two hard coded 'x' type characters, which won't
* work on EBCDIC systems.
@@ -487,139 +490,6 @@
// ---------------------------------------------------------------------------
// XMLReader: Scanning methods
// ---------------------------------------------------------------------------
-XMLCh XMLReader::getCharData( XMLBuffer& toFill
- , XMLScanner& owningScanner
- , bool& gotLeadingSurrogate)
-{
- //
- // Ok, lets loop through the chars in the buffer until we eat them up
- // and then reload and try again.
- //
- while (true)
- {
- while (fCharIndex < fCharsAvail)
- {
- // Peek the next char from the buffer, but don't eat it yet
- XMLCh nextCh = fCharBuf[fCharIndex];
-
- //
- // Ok, lets check whether its a special character data char.
- // If not, then we handle it here. Else we break out and let
- // the caller handle it.
- //
- if (!XMLReader::isSpecialCharDataChar(nextCh))
- {
- // Eat this one
- fCharIndex++;
-
- // Deal with surrogate pairs
- if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
- {
- //
- // Its a leading surrogate. If we already got one, then
- // issue an error, else set leading flag to make sure that
- // we look for a trailing next time.
- //
- if (gotLeadingSurrogate)
- owningScanner.emitError(XML4CErrs::Expected2ndSurrogateChar);
- else
- gotLeadingSurrogate = true;
- }
- else
- {
- //
- // If its a trailing surrogate, make sure that we are
- // prepared for that. Else, its just a regular char so make
- // sure that we were not expected a trailing surrogate.
- //
- if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
- {
- // Its trailing, so make sure we were expecting it
- if (!gotLeadingSurrogate)
- owningScanner.emitError(XML4CErrs::Unexpected2ndSurrogateChar);
- }
- else
- {
- //
- // Its just a char, so make sure we were not expecting a
- // trailing surrogate.
- //
- if (gotLeadingSurrogate)
- owningScanner.emitError(XML4CErrs::Expected2ndSurrogateChar);
-
- // Check for valid characters here
- if (!XMLReader::isXMLChar(nextCh))
- {
- XMLCh tmpBuf[9];
- XMLString::binToText
- (
- nextCh
- , tmpBuf
- , 8
- , 16
- );
- owningScanner.emitError(XML4CErrs::InvalidCharacter, tmpBuf);
- }
- }
- gotLeadingSurrogate = false;
- }
-
- //
- // Keep the line and column info up to date and do new
- // line normalization.
- //
- if (nextCh == chCR)
- {
- fCurLine++;
- fCurCol = 1;
-
- if (fSource == Source_External)
- {
- if (fCharIndex < fCharsAvail)
- {
- if (fCharBuf[fCharIndex] == chLF)
- fCharIndex++;
- }
- else
- {
- if (refreshCharBuffer())
- {
- if (fCharBuf[fCharIndex] == chLF)
- fCharIndex++;
- }
- }
- nextCh = chLF;
- }
- }
- else if (nextCh == chLF)
- {
- fCurLine++;
- fCurCol = 1;
- }
- else
- {
- fCurCol++;
- }
-
- // Add the (possibly normalized) char to the buffer and go again
- toFill.append(nextCh);
- }
- else
- {
- return nextCh;
- }
- }
-
- // We've used up the current buffer so try to get more
- if (!refreshCharBuffer())
- break;
- }
-
- // We ate up all this reader's data
- return chNull;
-}
-
-
bool XMLReader::getName(XMLBuffer& toFill, const bool token)
{
//
@@ -694,25 +564,116 @@
{
// If fNoMore is set, then we have nothing else to give
if (fNoMore)
- {
- chGotten = XMLCh(0);
return false;
- }
// If the buffer is empty, then try to refresh
if (fCharIndex == fCharsAvail)
{
if (!refreshCharBuffer())
{
- // If still empty, then return a zero char
+ // If still empty, then return false
if (fCharIndex == fCharsAvail)
- {
- chGotten = XMLCh(0);
return false;
+ }
+ }
+
+ chGotten = fCharBuf[fCharIndex++];
+ }
+
+ // Handle end of line normalization and line/col member maintenance.
+ if (chGotten == chCR)
+ {
+ //
+ // Do the normalization. We return chLF regardless of which was
+ // found. We also eat a chCR followed by an chLF.
+ //
+ // We only do this if the content being spooled is not already
+ // internalized.
+ //
+ if (fSource == Source_External)
+ {
+ //
+ // See if we have another char left. If not, don't bother.
+ // Else, see if its an chLF to eat. If it is, bump the
+ // index again.
+ //
+ if (fCharIndex < fCharsAvail)
+ {
+ if (fCharBuf[fCharIndex] == chLF)
+ fCharIndex++;
+ }
+ else
+ {
+ if (refreshCharBuffer())
+ {
+ if (fCharBuf[fCharIndex] == chLF)
+ fCharIndex++;
}
}
+
+ // And return just an chLF
+ chGotten = chLF;
+ }
+
+ // And handle the line/col stuff
+ fCurCol = 1;
+ fCurLine++;
+ }
+ else if (chGotten == chLF)
+ {
+ fCurLine++;
+ fCurCol = 1;
+ }
+ else if (chGotten)
+ {
+ //
+ // Only do this is not a null char. Null chars are not part of the
+ // real content. They are just marker characters inserted into
+ // the stream.
+ //
+ fCurCol++;
+ }
+ return true;
+}
+
+
+bool XMLReader::getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten)
+{
+ //
+ // See if there is at least a char in the buffer. Else, do the buffer
+ // reload logic.
+ //
+ if (fCharIndex < fCharsAvail)
+ {
+ // Check the next char
+ if (fCharBuf[fCharIndex] == chNotToGet)
+ return false;
+
+ // Its not the one we want to skip so bump the index
+ chGotten = fCharBuf[fCharIndex++];
+ }
+ else
+ {
+ // If fNoMore is set, then we have nothing else to give
+ if (fNoMore)
+ return false;
+
+ // If the buffer is empty, then try to refresh
+ if (fCharIndex == fCharsAvail)
+ {
+ if (!refreshCharBuffer())
+ {
+ // If still empty, then return false
+ if (fCharIndex == fCharsAvail)
+ return false;
+ }
}
+ // Check the next char
+ if (fCharBuf[fCharIndex] == chNotToGet)
+ return false;
+
+ // Its not the one we want to skip so bump the index
chGotten = fCharBuf[fCharIndex++];
}
1.5 +5 -6 xml-xerces/c/src/internal/XMLReader.hpp
Index: XMLReader.hpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/internal/XMLReader.hpp,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- XMLReader.hpp 2000/01/22 00:01:08 1.4
+++ XMLReader.hpp 2000/01/25 01:04:21 1.5
@@ -56,6 +56,9 @@
/**
* $Log: XMLReader.hpp,v $
+ * Revision 1.5 2000/01/25 01:04:21 roddey
+ * Fixes a bogus error about ]]> in char data.
+ *
* Revision 1.4 2000/01/22 00:01:08 roddey
* Simple change to get rid of two hard coded 'x' type characters, which won't
* work on EBCDIC systems.
@@ -199,14 +202,9 @@
// -----------------------------------------------------------------------
// Scanning methods
// -----------------------------------------------------------------------
- XMLCh getCharData
- (
- XMLBuffer& toFill
- , XMLScanner& owningScanner
- , bool& gotLeadingSurrogate
- );
bool getName(XMLBuffer& toFill, const bool token);
bool getNextChar(XMLCh& chGotten);
+ bool getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten);
bool getSpaces(XMLBuffer& toFill);
bool getUpToCharOrWS(XMLBuffer& toFill, const XMLCh toCheck);
bool peekNextChar(XMLCh& chGotten);
1.7 +8 -20 xml-xerces/c/src/internal/XMLScanner2.cpp
Index: XMLScanner2.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/internal/XMLScanner2.cpp,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -r1.6 -r1.7
--- XMLScanner2.cpp 2000/01/15 01:26:16 1.6
+++ XMLScanner2.cpp 2000/01/25 01:04:21 1.7
@@ -56,6 +56,9 @@
/**
* $Log: XMLScanner2.cpp,v $
+ * Revision 1.7 2000/01/25 01:04:21 roddey
+ * Fixes a bogus error about ]]> in char data.
+ *
* Revision 1.6 2000/01/15 01:26:16 rahulj
* Added support for HTTP to the parser using libWWW 5.2.8.
* Renamed URL.[ch]pp to XMLURL.[ch]pp and like wise for the class name.
@@ -1601,11 +1604,6 @@
{
while (true)
{
- //
- // Ok, lets get char data from the the readers until we hit
- // a special char. It returns the char that made us break out
- // of the loop. Use any second char first.
- //
if (secondCh)
{
nextCh = secondCh;
@@ -1613,18 +1611,8 @@
}
else
{
- // Reset the surrogate flag and get another block of chars
- nextCh = fReaderMgr.getCharData(toUse, *this, gotLeadingSurrogate);
-
- //
- // If we get an EOF, that's not really expected here, but it
- // could happen if there was effectively no content, so just
- // return.
- //
- // Also, if we get the open angle of some markup, then break
- // out.
- //
- if ((nextCh == chOpenAngle) || !nextCh)
+ // Try to get another char from the source
+ if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh))
{
// If we were waiting for a trailing surrogate, its an error
if (gotLeadingSurrogate)
@@ -1633,9 +1621,6 @@
notDone = false;
break;
}
-
- // Get the break char out of the input buffer now
- fReaderMgr.getNextChar();
}
//