You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by bo...@apache.org on 2008/09/17 11:31:42 UTC

svn commit: r696218 - in /xerces/c/trunk/src/xercesc/internal: DGXMLScanner.cpp IGXMLScanner.cpp ReaderMgr.hpp SGXMLScanner.cpp WFXMLScanner.cpp XMLReader.cpp XMLReader.hpp XSAXMLScanner.cpp

Author: borisk
Date: Wed Sep 17 02:31:41 2008
New Revision: 696218

URL: http://svn.apache.org/viewvc?rev=696218&view=rev
Log:
Reimplement skippedString logic to handle "short" and "long" strings separately.

Modified:
    xerces/c/trunk/src/xercesc/internal/DGXMLScanner.cpp
    xerces/c/trunk/src/xercesc/internal/IGXMLScanner.cpp
    xerces/c/trunk/src/xercesc/internal/ReaderMgr.hpp
    xerces/c/trunk/src/xercesc/internal/SGXMLScanner.cpp
    xerces/c/trunk/src/xercesc/internal/WFXMLScanner.cpp
    xerces/c/trunk/src/xercesc/internal/XMLReader.cpp
    xerces/c/trunk/src/xercesc/internal/XMLReader.hpp
    xerces/c/trunk/src/xercesc/internal/XSAXMLScanner.cpp

Modified: xerces/c/trunk/src/xercesc/internal/DGXMLScanner.cpp
URL: http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/internal/DGXMLScanner.cpp?rev=696218&r1=696217&r2=696218&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/internal/DGXMLScanner.cpp (original)
+++ xerces/c/trunk/src/xercesc/internal/DGXMLScanner.cpp Wed Sep 17 02:31:41 2008
@@ -605,7 +605,7 @@
     const bool isRoot = fElemStack.isEmpty();
 
     // Make sure that its the end of the element that we expect
-    if (!fReaderMgr.skippedString(tempElement->getFullName()))
+    if (!fReaderMgr.skippedStringLong(tempElement->getFullName()))
     {
         emitError
         (

Modified: xerces/c/trunk/src/xercesc/internal/IGXMLScanner.cpp
URL: http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/internal/IGXMLScanner.cpp?rev=696218&r1=696217&r2=696218&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/internal/IGXMLScanner.cpp (original)
+++ xerces/c/trunk/src/xercesc/internal/IGXMLScanner.cpp Wed Sep 17 02:31:41 2008
@@ -969,7 +969,7 @@
         topElem = fElemStack.topElement();
         elemName = topElem->fThisElement->getFullName();
     }
-    if (!fReaderMgr.skippedString(elemName))
+    if (!fReaderMgr.skippedStringLong(elemName))
     {
         emitError
         (

Modified: xerces/c/trunk/src/xercesc/internal/ReaderMgr.hpp
URL: http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/internal/ReaderMgr.hpp?rev=696218&r1=696217&r2=696218&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/internal/ReaderMgr.hpp (original)
+++ xerces/c/trunk/src/xercesc/internal/ReaderMgr.hpp Wed Sep 17 02:31:41 2008
@@ -94,6 +94,7 @@
     bool skippedChar(const XMLCh toSkip);
     bool skippedSpace();
     bool skippedString(const XMLCh* const toSkip);
+    bool skippedStringLong(const XMLCh* const toSkip);
     void skipQuotedString(const XMLCh quoteCh);
     XMLCh skipUntilIn(const XMLCh* const listToSkip);
     XMLCh skipUntilInOrWS(const XMLCh* const listToSkip);
@@ -346,6 +347,11 @@
     return fCurReader->skippedString(toSkip);
 }
 
+inline bool ReaderMgr::skippedStringLong(const XMLCh* const toSkip)
+{
+    return fCurReader->skippedStringLong(toSkip);
+}
+
 inline void ReaderMgr::skipToChar(const XMLCh toSkipTo)
 {
 	XMLCh nextCh = 0;

Modified: xerces/c/trunk/src/xercesc/internal/SGXMLScanner.cpp
URL: http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/internal/SGXMLScanner.cpp?rev=696218&r1=696217&r2=696218&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/internal/SGXMLScanner.cpp (original)
+++ xerces/c/trunk/src/xercesc/internal/SGXMLScanner.cpp Wed Sep 17 02:31:41 2008
@@ -859,7 +859,7 @@
     // Make sure that its the end of the element that we expect
     const XMLCh *elemName = fElemStack.getCurrentSchemaElemName();
     const ElemStack::StackElem* topElem = fElemStack.topElement();
-    if (!fReaderMgr.skippedString(elemName))
+    if (!fReaderMgr.skippedStringLong(elemName))
     {
         emitError
         (

Modified: xerces/c/trunk/src/xercesc/internal/WFXMLScanner.cpp
URL: http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/internal/WFXMLScanner.cpp?rev=696218&r1=696217&r2=696218&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/internal/WFXMLScanner.cpp (original)
+++ xerces/c/trunk/src/xercesc/internal/WFXMLScanner.cpp Wed Sep 17 02:31:41 2008
@@ -713,7 +713,7 @@
     const bool isRoot = fElemStack.isEmpty();
 
     // Make sure that its the end of the element that we expect
-    if (!fReaderMgr.skippedString(topElem->fThisElement->getFullName()))
+    if (!fReaderMgr.skippedStringLong(topElem->fThisElement->getFullName()))
     {
         emitError
         (

Modified: xerces/c/trunk/src/xercesc/internal/XMLReader.cpp
URL: http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/internal/XMLReader.cpp?rev=696218&r1=696217&r2=696218&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/internal/XMLReader.cpp (original)
+++ xerces/c/trunk/src/xercesc/internal/XMLReader.cpp Wed Sep 17 02:31:41 2008
@@ -471,7 +471,7 @@
 
 bool XMLReader::refreshCharBuffer()
 {
-    // If the no more flag is set, then don't both doing anything
+    // If the no more flag is set, then don't bother doing anything.
     if (fNoMore)
         return false;
 
@@ -1103,114 +1103,98 @@
     return false;
 }
 
-
 bool XMLReader::skippedString(const XMLCh* const toSkip)
 {
-    // Get the length of the string to skip
+    // This function works on strings that are smaller than kCharBufSize.
+    // This function guarantees that in case the comparison is unsuccessful
+    // the fCharIndex will point to the original data.
+    //
+
+    // Get the length of the string to skip.
+    //
     const XMLSize_t srcLen = XMLString::stringLen(toSkip);
     XMLSize_t charsLeft = charsLeftInBuffer();
 
-    if (srcLen <= fCharsAvail) {
-        //
-        //  See if the current reader has enough chars to test against this
-        //  string. If not, then ask it to reload its buffer. If that does not
-        //  get us enough, then it cannot match.
-        //
-        //  NOTE: This works because strings never have to cross a reader! And
-        //  a string to skip will never have a new line in it, so we will never
-        //  miss adjusting the current line.
-        //
-        while (charsLeft < srcLen)
-        {
-            if (!refreshCharBuffer())
-                return false;     // if the refreshCharBuf() did not add anything new
-                                  // give up and return.
-            charsLeft = charsLeftInBuffer();
-	    }
+    //  See if the current reader has enough chars to test against this
+    //  string. If not, then ask it to reload its buffer. If that does not
+    //  get us enough, then it cannot match.
+    //
+    //  NOTE: This works because strings never have to cross a reader! And
+    //  a string to skip will never have a new line in it, so we will never
+    //  miss adjusting the current line.
+    //
+    while (charsLeft < srcLen)
+    {
+      if (!refreshCharBuffer())
+        return false;
 
-        //
-        //  Ok, now we now that the current reader has enough chars in its
-        //  buffer and that its index is back at zero. So we can do a quick and
-        //  dirty comparison straight to its buffer with no requirement to unget
-        //  if it fails.
-        //
-        if (memcmp(&fCharBuf[fCharIndex], toSkip, srcLen*sizeof(XMLCh)))
-            return false;
+      XMLSize_t tmp = charsLeftInBuffer();
+      if (tmp == charsLeft) // if the refreshCharBuf() did not add anything new
+        return false;     // give up and return.
 
-        //
-        //  And get the character buffer index back right by just adding the
-        //  source len to it.
-        //
-        fCharIndex += srcLen;
+      charsLeft = tmp;
     }
-    else {
-        if (charsLeft == 0) {
-            refreshCharBuffer();
-            charsLeft = charsLeftInBuffer();
-            if (charsLeft == 0)
-                return false; // error situation
-        }
-        if (XMLString::compareNString(&fCharBuf[fCharIndex], toSkip, charsLeft))
-            return false;
-
-        // the remaining characters of toSkip could fail so we don't want to
-        // advance fCharIndex unless we have to.
-        // the majority of the calls to this routine are for constants stringed
-        // defined in mainly XMLUni.cpp and all the strings that call it are less
-        // than 10 characters and it could be possible that the above comparison
-        // passes but one of the next one will fail and that is why we don't want
-        // to update fCharIndex. The other possibility is that it is called for
-        // the matching endtag and the string could be really long, even longer
-        // than the buffer which forces us to advance the fCharIndex position.
-        if (srcLen < kCharBufSize/4) {
-            XMLSize_t saveCharsLeft = charsLeft;
-            //fCharIndex += charsLeft;
-    
-            XMLSize_t offset = charsLeft;
-            XMLSize_t remainingLen = srcLen - charsLeft;
-
-            while (remainingLen > 0) {
-                refreshCharBuffer();
-                charsLeft = charsLeftInBuffer() - offset;
-                if (charsLeft == 0)
-                  return false; // error situation
-                if (charsLeft > remainingLen)
-                    charsLeft = remainingLen;
-                if (XMLString::compareNString(&fCharBuf[fCharIndex+saveCharsLeft], toSkip+offset, charsLeft))
-                    return false;
-                offset += charsLeft;
-                remainingLen -= charsLeft;
-                saveCharsLeft += charsLeft;
-            }
-            fCharIndex += saveCharsLeft;
 
-        }
-        else {
-            // a really long name
-            fCharIndex += charsLeft;
-    
-            XMLSize_t offset = charsLeft;
-            XMLSize_t remainingLen = srcLen - charsLeft;
-
-            while (remainingLen > 0) {
-                refreshCharBuffer();
-                charsLeft = charsLeftInBuffer();
-                if (charsLeft == 0)
-                  return false; // error situation
-                if (charsLeft > remainingLen)
-                    charsLeft = remainingLen;
-                if (XMLString::compareNString(&fCharBuf[fCharIndex], toSkip+offset, charsLeft))
-                    return false;
-                offset += charsLeft;
-                remainingLen -= charsLeft;
-                fCharIndex += charsLeft;
-            }
-        }
-    }
+    //  Ok, now we now that the current reader has enough chars in its
+    //  buffer and that its index is back at zero. So we can do a quick and
+    //  dirty comparison straight to its buffer with no requirement to unget
+    //  if it fails.
+    //
+    if (memcmp(&fCharBuf[fCharIndex], toSkip, srcLen * sizeof(XMLCh)))
+      return false;
 
-    // Add the source length to the current column to get it back right
+    // Add the source length to the current column to get it back right.
+    //
     fCurCol += (XMLFileLoc)srcLen;
 
+    //  And get the character buffer index back right by just adding the
+    //  source len to it.
+    //
+    fCharIndex += srcLen;
+
+    return true;
+}
+
+bool XMLReader::skippedStringLong(const XMLCh* toSkip)
+{
+    // This function works on strings that are potentially longer than
+    // kCharBufSize (e.g., end tag). This function does not guarantee
+    // that in case the comparison is unsuccessful the fCharIndex will
+    // point to the original data.
+    //
+
+    XMLSize_t srcLen = XMLString::stringLen(toSkip);
+    XMLSize_t charsLeft = charsLeftInBuffer();
+
+    while (srcLen != 0)
+    {
+      // Fill up the buffer with as much data as possible.
+      //
+      while (charsLeft < srcLen && charsLeft != kCharBufSize)
+      {
+        if (!refreshCharBuffer())
+          return false;
+
+        XMLSize_t tmp = charsLeftInBuffer();
+        if (tmp == charsLeft) // if the refreshCharBuf() did not add anything
+          return false;       // new give up and return.
+
+        charsLeft = tmp;
+      }
+
+      XMLSize_t n = charsLeft < srcLen ? charsLeft : srcLen;
+
+      if (memcmp(&fCharBuf[fCharIndex], toSkip, n * sizeof(XMLCh)))
+        return false;
+
+      toSkip += n;
+      srcLen -= n;
+
+      fCharIndex += n;
+      fCurCol += (XMLFileLoc)n;
+      charsLeft -= n;
+    }
+
     return true;
 }
 

Modified: xerces/c/trunk/src/xercesc/internal/XMLReader.hpp
URL: http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/internal/XMLReader.hpp?rev=696218&r1=696217&r2=696218&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/internal/XMLReader.hpp (original)
+++ xerces/c/trunk/src/xercesc/internal/XMLReader.hpp Wed Sep 17 02:31:41 2008
@@ -183,6 +183,7 @@
     bool skippedChar(const XMLCh toSkip);
     bool skippedSpace();
     bool skippedString(const XMLCh* const toSkip);
+    bool skippedStringLong(const XMLCh* toSkip);
     bool peekString(const XMLCh* const toPeek);
 
 

Modified: xerces/c/trunk/src/xercesc/internal/XSAXMLScanner.cpp
URL: http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/internal/XSAXMLScanner.cpp?rev=696218&r1=696217&r2=696218&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/internal/XSAXMLScanner.cpp (original)
+++ xerces/c/trunk/src/xercesc/internal/XSAXMLScanner.cpp Wed Sep 17 02:31:41 2008
@@ -79,7 +79,7 @@
     // Make sure that its the end of the element that we expect
     const XMLCh *elemName = fElemStack.getCurrentSchemaElemName();
     const ElemStack::StackElem* topElem = fElemStack.popTop();
-    if (!fReaderMgr.skippedString(elemName))
+    if (!fReaderMgr.skippedStringLong(elemName))
     {
         emitError
         (



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@xerces.apache.org
For additional commands, e-mail: commits-help@xerces.apache.org