You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by am...@apache.org on 2009/08/24 16:03:57 UTC

svn commit: r807224 - /xerces/c/branches/xerces-2/src/xercesc/validators/DTD/DTDScanner.cpp

Author: amassari
Date: Mon Aug 24 14:03:57 2009
New Revision: 807224

URL: http://svn.apache.org/viewvc?rev=807224&view=rev
Log:
Avoid recursion when parsing simply nested DTD structures. 
This issue is referenced in the following document: CVE-2009-1885 (http://nvd.nist.gov/nvd.cfm?cvename=CVE-2009-1885). 
Thanks to Jukka Taimisto, Tero Rontti and Rauli Kaksonen from the CROSS project at Codenomicon Ltd. and CERT-FI for bringing this issue to our attention.

Modified:
    xerces/c/branches/xerces-2/src/xercesc/validators/DTD/DTDScanner.cpp

Modified: xerces/c/branches/xerces-2/src/xercesc/validators/DTD/DTDScanner.cpp
URL: http://svn.apache.org/viewvc/xerces/c/branches/xerces-2/src/xercesc/validators/DTD/DTDScanner.cpp?rev=807224&r1=807223&r2=807224&view=diff
==============================================================================
--- xerces/c/branches/xerces-2/src/xercesc/validators/DTD/DTDScanner.cpp (original)
+++ xerces/c/branches/xerces-2/src/xercesc/validators/DTD/DTDScanner.cpp Mon Aug 24 14:03:57 2009
@@ -27,7 +27,9 @@
 #include <xercesc/util/FlagJanitor.hpp>
 #include <xercesc/util/Janitor.hpp>
 #include <xercesc/util/XMLUniDefs.hpp>
+#include <xercesc/util/ValueStackOf.hpp>
 #include <xercesc/util/UnexpectedEOFException.hpp>
+#include <xercesc/util/OutOfMemoryException.hpp>
 #include <xercesc/sax/InputSource.hpp>
 #include <xercesc/framework/XMLDocumentHandler.hpp>
 #include <xercesc/framework/XMLEntityHandler.hpp>
@@ -39,7 +41,6 @@
 #include <xercesc/validators/DTD/DTDEntityDecl.hpp>
 #include <xercesc/validators/DTD/DocTypeHandler.hpp>
 #include <xercesc/validators/DTD/DTDScanner.hpp>
-#include <xercesc/util/OutOfMemoryException.hpp>
 
 XERCES_CPP_NAMESPACE_BEGIN
 
@@ -1046,338 +1047,354 @@
     // Check for a PE ref here, but don't require spaces
     checkForPERef(false, true);
 
-    // We have to check entity nesting here
-    unsigned int curReader;
-
+    ValueStackOf<XMLSize_t>* arrNestedDecl=NULL;
     //
     //  We know that the caller just saw an opening parenthesis, so we need
-    //  to parse until we hit the end of it, recursing for other nested
-    //  parentheses we see.
+    //  to parse until we hit the end of it; if we find several parenthesis,
+    //  store them in an array to be processed later.
     //
     //  We have to check for one up front, since it could be something like
     //  (((a)*)) etc...
     //
     ContentSpecNode* curNode = 0;
-    if (fReaderMgr->skippedChar(chOpenParen))
+    while(fReaderMgr->skippedChar(chOpenParen))
     {
-        curReader = fReaderMgr->getCurrentReaderNum();
+        // to check entity nesting
+        const unsigned int curReader = fReaderMgr->getCurrentReaderNum();
+        if(arrNestedDecl==NULL)
+            arrNestedDecl=new (fMemoryManager) ValueStackOf<XMLSize_t>(5, fMemoryManager);
+        arrNestedDecl->push(curReader);
 
-        // Lets call ourself and get back the resulting node
-        curNode = scanChildren(elemDecl, bufToUse);
+        // Check for a PE ref here, but don't require spaces
+        checkForPERef(false, true);
+    }
 
-        // If that failed, no need to go further, return failure
-        if (!curNode)
-            return 0;
+    // We must find a leaf node here, either standalone or nested in the parenthesis
+    if (!fReaderMgr->getName(bufToUse))
+    {
+        fScanner->emitError(XMLErrs::ExpectedElementName);
+        return 0;
+    }
 
-        if (curReader != fReaderMgr->getCurrentReaderNum() && fScanner->getDoValidation())
-            fScanner->getValidator()->emitError(XMLValid::PartialMarkupInPE);
+    //
+    //  Create a leaf node for it. If we can find the element id for
+    //  this element, then use it. Else, we have to fault in an element
+    //  decl, marked as created because of being in a content model.
+    //
+    XMLElementDecl* decl = fDTDGrammar->getElemDecl(fEmptyNamespaceId, 0, bufToUse.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE);
+    if (!decl)
+    {
+        decl = new (fGrammarPoolMemoryManager) DTDElementDecl
+        (
+            bufToUse.getRawBuffer()
+            , fEmptyNamespaceId
+            , DTDElementDecl::Any
+            , fGrammarPoolMemoryManager
+        );
+        decl->setCreateReason(XMLElementDecl::InContentModel);
+        decl->setExternalElemDeclaration(isReadingExternalEntity());
+        fDTDGrammar->putElemDecl(decl);
     }
-     else
+    curNode = new (fGrammarPoolMemoryManager) ContentSpecNode
+    (
+        decl->getElementName()
+        , fGrammarPoolMemoryManager
+    );
+
+    // Check for a PE ref here, but don't require spaces
+    const bool gotSpaces = checkForPERef(false, true);
+
+    // Check for a repetition character after the leaf
+    XMLCh repCh = fReaderMgr->peekNextChar();
+    ContentSpecNode* tmpNode = makeRepNode(repCh, curNode, fGrammarPoolMemoryManager);
+    if (tmpNode != curNode)
     {
-        // Not a nested paren, so it must be a leaf node
-        if (!fReaderMgr->getName(bufToUse))
+        if (gotSpaces)
         {
-            fScanner->emitError(XMLErrs::ExpectedElementName);
-            return 0;
+            if (fScanner->emitErrorWillThrowException(XMLErrs::UnexpectedWhitespace))
+            {
+                delete tmpNode;
+            }
+            fScanner->emitError(XMLErrs::UnexpectedWhitespace);
         }
+        fReaderMgr->getNextChar();
+        curNode = tmpNode;
+    }
 
+    while(arrNestedDecl==NULL || !arrNestedDecl->empty())
+    {
+        // Check for a PE ref here, but don't require spaces
+        checkForPERef(false, true);
+    
         //
-        //  Create a leaf node for it. If we can find the element id for
-        //  this element, then use it. Else, we have to fault in an element
-        //  decl, marked as created because of being in a content model.
+        //  Ok, the next character tells us what kind of content this particular
+        //  model this particular parentesized section is. Its either a choice if
+        //  we see ',', a sequence if we see '|', or a single leaf node if we see
+        //  a closing paren.
+        //
+        const XMLCh opCh = fReaderMgr->peekNextChar();
+    
+        if ((opCh != chComma)
+        &&  (opCh != chPipe)
+        &&  (opCh != chCloseParen))
+        {
+            // Not a legal char, so delete our node and return failure
+            delete curNode;
+            fScanner->emitError(XMLErrs::ExpectedSeqChoiceLeaf);
+            return 0;
+        }
+    
         //
-        XMLElementDecl* decl = fDTDGrammar->getElemDecl(fEmptyNamespaceId, 0, bufToUse.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE);
-        if (!decl)
+        //  Create the head node of the correct type. We need this to remember
+        //  the top of the local tree. If it was a single subexpr, then just
+        //  set the head node to the current node. For the others, we'll build
+        //  the tree off the second child as we move across.
+        //
+        ContentSpecNode* headNode = 0;
+        ContentSpecNode::NodeTypes curType = ContentSpecNode::UnknownType;
+        if (opCh == chComma)
         {
-            decl = new (fGrammarPoolMemoryManager) DTDElementDecl
+            curType = ContentSpecNode::Sequence;
+            headNode = new (fGrammarPoolMemoryManager) ContentSpecNode
             (
-                bufToUse.getRawBuffer()
-                , fEmptyNamespaceId
-                , DTDElementDecl::Any
+                curType
+                , curNode
+                , 0
+                , true
+                , true
                 , fGrammarPoolMemoryManager
             );
-            decl->setCreateReason(XMLElementDecl::InContentModel);
-            decl->setExternalElemDeclaration(isReadingExternalEntity());
-            fDTDGrammar->putElemDecl(decl);
+            curNode = headNode;
         }
-        curNode = new (fGrammarPoolMemoryManager) ContentSpecNode
-        (
-            decl->getElementName()
-            , fGrammarPoolMemoryManager
-        );
-
-        // Check for a PE ref here, but don't require spaces
-        const bool gotSpaces = checkForPERef(false, true);
-
-        // Check for a repetition character after the leaf
-        const XMLCh repCh = fReaderMgr->peekNextChar();
-        ContentSpecNode* tmpNode = makeRepNode(repCh, curNode, fGrammarPoolMemoryManager);
-        if (tmpNode != curNode)
+         else if (opCh == chPipe)
         {
-            if (gotSpaces)
-            {
-                if (fScanner->emitErrorWillThrowException(XMLErrs::UnexpectedWhitespace))
-                {
-                    delete tmpNode;
-                }
-                fScanner->emitError(XMLErrs::UnexpectedWhitespace);
-            }
+            curType = ContentSpecNode::Choice;
+            headNode = new (fGrammarPoolMemoryManager) ContentSpecNode
+            (
+                curType
+                , curNode
+                , 0
+                , true
+                , true
+                , fGrammarPoolMemoryManager
+            );
+            curNode = headNode;
+        }
+         else
+        {
+            headNode = curNode;
             fReaderMgr->getNextChar();
-            curNode = tmpNode;
         }
-    }
-
-    // Check for a PE ref here, but don't require spaces
-    checkForPERef(false, true);
 
-    //
-    //  Ok, the next character tells us what kind of content this particular
-    //  model this particular parentesized section is. Its either a choice if
-    //  we see ',', a sequence if we see '|', or a single leaf node if we see
-    //  a closing paren.
-    //
-    const XMLCh opCh = fReaderMgr->peekNextChar();
-
-    if ((opCh != chComma)
-    &&  (opCh != chPipe)
-    &&  (opCh != chCloseParen))
-    {
-        // Not a legal char, so delete our node and return failure
-        delete curNode;
-        fScanner->emitError(XMLErrs::ExpectedSeqChoiceLeaf);
-        return 0;
-    }
-
-    //
-    //  Create the head node of the correct type. We need this to remember
-    //  the top of the local tree. If it was a single subexpr, then just
-    //  set the head node to the current node. For the others, we'll build
-    //  the tree off the second child as we move across.
-    //
-    ContentSpecNode* headNode = 0;
-    ContentSpecNode::NodeTypes curType = ContentSpecNode::UnknownType;
-    if (opCh == chComma)
-    {
-        curType = ContentSpecNode::Sequence;
-        headNode = new (fGrammarPoolMemoryManager) ContentSpecNode
-        (
-            curType
-            , curNode
-            , 0
-            , true
-            , true
-            , fGrammarPoolMemoryManager
-        );
-        curNode = headNode;
-    }
-     else if (opCh == chPipe)
-    {
-        curType = ContentSpecNode::Choice;
-        headNode = new (fGrammarPoolMemoryManager) ContentSpecNode
-        (
-            curType
-            , curNode
-            , 0
-            , true
-            , true
-            , fGrammarPoolMemoryManager
-        );
-        curNode = headNode;
-    }
-     else
-    {
-        headNode = curNode;
-        fReaderMgr->getNextChar();
-    }
-
-    //
-    //  If it was a sequence or choice, we just loop until we get to the
-    //  end of our section, adding each new leaf or sub expression to the
-    //  right child of the current node, and making that new node the current
-    //  node.
-    //
-    if ((opCh == chComma) || (opCh == chPipe))
-    {
-        ContentSpecNode* lastNode = 0;
-        while (true)
+        //
+        //  If it was a sequence or choice, we just loop until we get to the
+        //  end of our section, adding each new leaf or sub expression to the
+        //  right child of the current node, and making that new node the current
+        //  node.
+        //
+        if ((opCh == chComma) || (opCh == chPipe))
         {
-            //
-            //  The next thing must either be another | or , character followed
-            //  by another leaf or subexpression, or a closing parenthesis, or a
-            //  PE ref.
-            //
-            if (fReaderMgr->lookingAtChar(chPercent))
-            {
-                checkForPERef(false, true);
-            }
-             else if (fReaderMgr->skippedSpace())
-            {
-                // Just skip whitespace
-                fReaderMgr->skipPastSpaces();
-            }
-             else if (fReaderMgr->skippedChar(chCloseParen))
+            ContentSpecNode* lastNode = 0;
+            while (true)
             {
                 //
-                //  We've hit the end of this section, so break out. But, we
-                //  need to see if we left a partial sequence of choice node
-                //  without a second node. If so, we have to undo that and
-                //  put its left child into the right node of the previous
-                //  node.
+                //  The next thing must either be another | or , character followed
+                //  by another leaf or subexpression, or a closing parenthesis, or a
+                //  PE ref.
                 //
-                if ((curNode->getType() == ContentSpecNode::Choice)
-                ||  (curNode->getType() == ContentSpecNode::Sequence))
+                if (fReaderMgr->lookingAtChar(chPercent))
+                {
+                    checkForPERef(false, true);
+                }
+                 else if (fReaderMgr->skippedSpace())
                 {
-                    if (!curNode->getSecond())
+                    // Just skip whitespace
+                    fReaderMgr->skipPastSpaces();
+                }
+                 else if (fReaderMgr->skippedChar(chCloseParen))
+                {
+                    //
+                    //  We've hit the end of this section, so break out. But, we
+                    //  need to see if we left a partial sequence of choice node
+                    //  without a second node. If so, we have to undo that and
+                    //  put its left child into the right node of the previous
+                    //  node.
+                    //
+                    if ((curNode->getType() == ContentSpecNode::Choice)
+                    ||  (curNode->getType() == ContentSpecNode::Sequence))
                     {
-                        ContentSpecNode* saveFirst = curNode->orphanFirst();
-                        lastNode->setSecond(saveFirst);
-                        curNode = lastNode;
+                        if (!curNode->getSecond())
+                        {
+                            ContentSpecNode* saveFirst = curNode->orphanFirst();
+                            lastNode->setSecond(saveFirst);
+                            curNode = lastNode;
+                        }
                     }
+                    break;
                 }
-                break;
-            }
-             else if (fReaderMgr->skippedChar(opCh))
-            {
-                // Check for a PE ref here, but don't require spaces
-                checkForPERef(false, true);
-
-                if (fReaderMgr->skippedChar(chOpenParen))
+                 else if (fReaderMgr->skippedChar(opCh))
                 {
-                    curReader = fReaderMgr->getCurrentReaderNum();
+                    // Check for a PE ref here, but don't require spaces
+                    checkForPERef(false, true);
 
-                    // Recurse to handle this new guy
-                    ContentSpecNode* subNode;
-                    try {
-                        subNode = scanChildren(elemDecl, bufToUse);
-                    }
-                    catch (const XMLErrs::Codes)
+                    if (fReaderMgr->skippedChar(chOpenParen))
                     {
-                        delete headNode;
-                        throw;
-                    }
+                        const unsigned int curReader = fReaderMgr->getCurrentReaderNum();
 
-                    // If it failed, we are done, clean up here and return failure
-                    if (!subNode)
-                    {
-                        delete headNode;
-                        return 0;
+                        // Recurse to handle this new guy
+                        ContentSpecNode* subNode;
+                        try {
+                            subNode = scanChildren(elemDecl, bufToUse);
+                        }
+                        catch (const XMLErrs::Codes)
+                        {
+                            delete headNode;
+                            throw;
+                        }
+
+                        // If it failed, we are done, clean up here and return failure
+                        if (!subNode)
+                        {
+                            delete headNode;
+                            return 0;
+                        }
+
+                        if (curReader != fReaderMgr->getCurrentReaderNum() && fScanner->getDoValidation())
+                            fScanner->getValidator()->emitError(XMLValid::PartialMarkupInPE);
+
+                        // Else patch it in and make it the new current
+                        ContentSpecNode* newCur = new (fGrammarPoolMemoryManager) ContentSpecNode
+                        (
+                            curType
+                            , subNode
+                            , 0
+                            , true
+                            , true
+                            , fGrammarPoolMemoryManager
+                        );
+                        curNode->setSecond(newCur);
+                        lastNode = curNode;
+                        curNode = newCur;
                     }
+                     else
+                    {
+                        //
+                        //  Got to be a leaf node, so get a name. If we cannot get
+                        //  one, then clean up and get outa here.
+                        //
+                        if (!fReaderMgr->getName(bufToUse))
+                        {
+                            delete headNode;
+                            fScanner->emitError(XMLErrs::ExpectedElementName);
+                            return 0;
+                        }
+
+                        //
+                        //  Create a leaf node for it. If we can find the element
+                        //  id for this element, then use it. Else, we have to
+                        //  fault in an element decl, marked as created because
+                        //  of being in a content model.
+                        //
+                        XMLElementDecl* decl = fDTDGrammar->getElemDecl(fEmptyNamespaceId, 0, bufToUse.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE);
+                        if (!decl)
+                        {
+                            decl = new (fGrammarPoolMemoryManager) DTDElementDecl
+                            (
+                                bufToUse.getRawBuffer()
+                                , fEmptyNamespaceId
+                                , DTDElementDecl::Any
+                                , fGrammarPoolMemoryManager
+                            );
+                            decl->setCreateReason(XMLElementDecl::InContentModel);
+                            decl->setExternalElemDeclaration(isReadingExternalEntity());
+                            fDTDGrammar->putElemDecl(decl);
+                        }
 
-                    if (curReader != fReaderMgr->getCurrentReaderNum() && fScanner->getDoValidation())
-                        fScanner->getValidator()->emitError(XMLValid::PartialMarkupInPE);
+                        ContentSpecNode* tmpLeaf = new (fGrammarPoolMemoryManager) ContentSpecNode
+                        (
+                            decl->getElementName()
+                            , fGrammarPoolMemoryManager
+                        );
 
-                    // Else patch it in and make it the new current
-                    ContentSpecNode* newCur = new (fGrammarPoolMemoryManager) ContentSpecNode
-                    (
-                        curType
-                        , subNode
-                        , 0
-                        , true
-                        , true
-                        , fGrammarPoolMemoryManager
-                    );
-                    curNode->setSecond(newCur);
-                    lastNode = curNode;
-                    curNode = newCur;
+                        // Check for a repetition character after the leaf
+                        const XMLCh repCh = fReaderMgr->peekNextChar();
+                        ContentSpecNode* tmpLeaf2 = makeRepNode(repCh, tmpLeaf, fGrammarPoolMemoryManager);
+                        if (tmpLeaf != tmpLeaf2)
+                            fReaderMgr->getNextChar();
+
+                        //
+                        //  Create a new sequence or choice node, with the leaf
+                        //  (or rep surrounding it) we just got as its first node.
+                        //  Make the new node the second node of the current node,
+                        //  and then make it the current node.
+                        //
+                        ContentSpecNode* newCur = new (fGrammarPoolMemoryManager) ContentSpecNode
+                        (
+                            curType
+                            , tmpLeaf2
+                            , 0
+                            , true
+                            , true
+                            , fGrammarPoolMemoryManager
+                        );
+                        curNode->setSecond(newCur);
+                        lastNode = curNode;
+                        curNode = newCur;
+                    }
                 }
                  else
                 {
-                    //
-                    //  Got to be a leaf node, so get a name. If we cannot get
-                    //  one, then clean up and get outa here.
-                    //
-                    if (!fReaderMgr->getName(bufToUse))
+                    // Cannot be valid
+                    delete headNode;  // emitError may do a throw so need to clean-up first
+                    if (opCh == chComma)
                     {
-                        delete headNode;
-                        fScanner->emitError(XMLErrs::ExpectedElementName);
-                        return 0;
+                        fScanner->emitError(XMLErrs::ExpectedChoiceOrCloseParen);
                     }
-
-                    //
-                    //  Create a leaf node for it. If we can find the element
-                    //  id for this element, then use it. Else, we have to
-                    //  fault in an element decl, marked as created because
-                    //  of being in a content model.
-                    //
-                    XMLElementDecl* decl = fDTDGrammar->getElemDecl(fEmptyNamespaceId, 0, bufToUse.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE);
-                    if (!decl)
+                     else
                     {
-                        decl = new (fGrammarPoolMemoryManager) DTDElementDecl
+                        fScanner->emitError
                         (
-                            bufToUse.getRawBuffer()
-                            , fEmptyNamespaceId
-                            , DTDElementDecl::Any
-                            , fGrammarPoolMemoryManager
+                            XMLErrs::ExpectedSeqOrCloseParen
+                            , elemDecl.getFullName()
                         );
-                        decl->setCreateReason(XMLElementDecl::InContentModel);
-                        decl->setExternalElemDeclaration(isReadingExternalEntity());
-                        fDTDGrammar->putElemDecl(decl);
-                    }
+                    }                
+                    return 0;
+                }
+            }
+        }
 
-                    ContentSpecNode* tmpLeaf = new (fGrammarPoolMemoryManager) ContentSpecNode
-                    (
-                        decl->getElementName()
-                        , fGrammarPoolMemoryManager
-                    );
+        //
+        //  We saw the terminating parenthesis so lets check for any repetition
+        //  character, and create a node for that, making the head node the child
+        //  of it.
+        //
+        const XMLCh repCh = fReaderMgr->peekNextChar();
+        curNode = makeRepNode(repCh, headNode, fGrammarPoolMemoryManager);
+        if (curNode != headNode)
+            fReaderMgr->getNextChar();
+
+        // prepare for recursion
+        if(arrNestedDecl==NULL)
+            break;
+        else
+        {
+            // If that failed, no need to go further, return failure
+            if (!curNode)
+                return 0;
 
-                    // Check for a repetition character after the leaf
-                    const XMLCh repCh = fReaderMgr->peekNextChar();
-                    ContentSpecNode* tmpLeaf2 = makeRepNode(repCh, tmpLeaf, fGrammarPoolMemoryManager);
-                    if (tmpLeaf != tmpLeaf2)
-                        fReaderMgr->getNextChar();
+            const unsigned int curReader = arrNestedDecl->pop();
+            if (curReader != fReaderMgr->getCurrentReaderNum() && fScanner->getValidationScheme() == XMLScanner::Val_Always)
+                fScanner->getValidator()->emitError(XMLValid::PartialMarkupInPE);
 
-                    //
-                    //  Create a new sequence or choice node, with the leaf
-                    //  (or rep surrounding it) we just got as its first node.
-                    //  Make the new node the second node of the current node,
-                    //  and then make it the current node.
-                    //
-                    ContentSpecNode* newCur = new (fGrammarPoolMemoryManager) ContentSpecNode
-                    (
-                        curType
-                        , tmpLeaf2
-                        , 0
-                        , true
-                        , true
-                        , fGrammarPoolMemoryManager
-                    );
-                    curNode->setSecond(newCur);
-                    lastNode = curNode;
-                    curNode = newCur;
-                }
-            }
-             else
+            if(arrNestedDecl->empty())
             {
-                // Cannot be valid
-                delete headNode;  // emitError may do a throw so need to clean-up first
-                if (opCh == chComma)
-                {
-                    fScanner->emitError(XMLErrs::ExpectedChoiceOrCloseParen);
-                }
-                 else
-                {
-                    fScanner->emitError
-                    (
-                        XMLErrs::ExpectedSeqOrCloseParen
-                        , elemDecl.getFullName()
-                    );
-                }                
-                return 0;
+                delete arrNestedDecl;
+                arrNestedDecl=NULL;
             }
         }
     }
 
-    //
-    //  We saw the terminating parenthesis so lets check for any repetition
-    //  character, and create a node for that, making the head node the child
-    //  of it.
-    //
-    XMLCh repCh = fReaderMgr->peekNextChar();
-    ContentSpecNode* retNode = makeRepNode(repCh, headNode, fGrammarPoolMemoryManager);
-    if (retNode != headNode)
-        fReaderMgr->getNextChar();
-
-    return retNode;
+    return curNode;
 }
 
 



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@xerces.apache.org
For additional commands, e-mail: commits-help@xerces.apache.org