You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by ne...@apache.org on 2004/06/02 21:58:10 UTC
cvs commit: xml-xerces/c/src/xercesc/internal DGXMLScanner.cpp IGXMLScanner.cpp SGXMLScanner.cpp ElemStack.cpp ElemStack.hpp

neilg       2004/06/02 12:58:10

  Modified:    c/src/xercesc/internal DGXMLScanner.cpp IGXMLScanner.cpp
                        SGXMLScanner.cpp ElemStack.cpp ElemStack.hpp
  Log:
  Fix bug where scanners would accept malformed tags of the form
  <p:a xmlns:p="b" xmlns:q="b"></q:a> when namespace processing was
  enabled.  This also opened the way for some end-tag scanning
  performance improvements.
  
  Revision  Changes    Path
  1.48      +10 -24    xml-xerces/c/src/xercesc/internal/DGXMLScanner.cpp
  
  Index: DGXMLScanner.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/DGXMLScanner.cpp,v
  retrieving revision 1.47
  retrieving revision 1.48
  diff -u -r1.47 -r1.48
  --- DGXMLScanner.cpp	27 May 2004 16:33:07 -0000	1.47
  +++ DGXMLScanner.cpp	2 Jun 2004 19:58:09 -0000	1.48
  @@ -645,44 +645,30 @@
           ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd, fMemoryManager);
       }
   
  -    // After the </ is the element QName, so get a name from the input
  -    if (!fReaderMgr.getName(fQNameBuf))
  -    {
  -        // It failed so we can't really do anything with it
  -        emitError(XMLErrs::ExpectedElementName);
  -        fReaderMgr.skipPastChar(chCloseAngle);
  -        return;
  -    }
  -
  -    // Resolve element name uri if needed
  -    unsigned int uriId = fEmptyNamespaceId;
  -    const ElemStack::StackElem* topElem = fElemStack.topElement();
  -    if (fDoNamespaces)
  -    {
  -        uriId = resolvePrefix
  -        (
  -            topElem->fThisElement->getElementName()->getPrefix()
  -            , ElemStack::Mode_Element
  -        );
  -    }
  +    //  Pop the stack of the element we are supposed to be ending. Remember
  +    //  that we don't own this. The stack just keeps them and reuses them.
  +    unsigned int uriId = (fDoNamespaces)
  +        ? fElemStack.getCurrentURI() : fEmptyNamespaceId;
   
       //  Pop the stack of the element we are supposed to be ending. Remember
       //  that we don't own this. The stack just keeps them and reuses them.
  -    fElemStack.popTop();
  +    const ElemStack::StackElem* topElem = fElemStack.popTop();
  +    XMLElementDecl *tempElement = topElem->fThisElement;
   
       // See if it was the root element, to avoid multiple calls below
       const bool isRoot = fElemStack.isEmpty();
   
       // Make sure that its the end of the element that we expect
  -    if (!XMLString::equals(topElem->fThisElement->getFullName(), fQNameBuf.getRawBuffer()))
  +    if (!fReaderMgr.skippedString(tempElement->getFullName()))
       {
           emitError
           (
               XMLErrs::ExpectedEndOfTagX
  -            , topElem->fThisElement->getFullName()
  +            , tempElement->getFullName()
           );
  +        fReaderMgr.skipPastChar(chCloseAngle);
  +        return;
       }
  -
   
       // Make sure we are back on the same reader as where we started
       if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum())
  
  
  
  1.70      +46 -59    xml-xerces/c/src/xercesc/internal/IGXMLScanner.cpp
  
  Index: IGXMLScanner.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/IGXMLScanner.cpp,v
  retrieving revision 1.69
  retrieving revision 1.70
  diff -u -r1.69 -r1.70
  --- IGXMLScanner.cpp	1 Jun 2004 16:48:13 -0000	1.69
  +++ IGXMLScanner.cpp	2 Jun 2004 19:58:09 -0000	1.70
  @@ -976,68 +976,45 @@
           ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd, fMemoryManager);
       }
   
  -    // After the </ is the element QName, so get a name from the input
  -    if (!fReaderMgr.getName(fQNameBuf))
  +    //  Pop the stack of the element we are supposed to be ending. Remember
  +    //  that we don't own this. The stack just keeps them and reuses them.
  +    unsigned int uriId = (fDoNamespaces)
  +        ? fElemStack.getCurrentURI() : fEmptyNamespaceId;
  +
  +    // these get initialized below
  +    const ElemStack::StackElem* topElem = 0;
  +    XMLElementDecl *tempElement = 0;
  +    XMLCh *elemName = 0;
  +
  +    // Make sure that its the end of the element that we expect
  +    // special case for schema validation, whose element decls,
  +    // obviously don't contain prefix information
  +    if(fGrammarType == Grammar::SchemaGrammarType)
  +    {
  +        elemName = fElemStack.getCurrentSchemaElemName();
  +        topElem = fElemStack.popTop(); 
  +        tempElement = topElem->fThisElement; 
  +    }
  +    else
       {
  -        // It failed so we can't really do anything with it
  -        emitError(XMLErrs::ExpectedElementName);
  -        fReaderMgr.skipPastChar(chCloseAngle);
  -        //REVISIT: Do we restore PSVI information?
  -        return;
  +        topElem = fElemStack.popTop(); 
  +        tempElement = topElem->fThisElement;
  +        elemName = (XMLCh *)tempElement->getFullName();
       }
  -
  -    unsigned int uriId = fEmptyNamespaceId;
  -    int prefixColonPos = -1;
  -    if (fDoNamespaces)
  +    if (!fReaderMgr.skippedString(elemName))
       {
  -        uriId = resolveQName
  +        emitError
           (
  -            fQNameBuf.getRawBuffer()
  -            , fPrefixBuf
  -            , ElemStack::Mode_Element
  -            , prefixColonPos
  +            XMLErrs::ExpectedEndOfTagX
  +            , elemName
           );
  +        fReaderMgr.skipPastChar(chCloseAngle);
  +        return;
       }
   
  -    //  Pop the stack of the element we are supposed to be ending. Remember
  -    //  that we don't own this. The stack just keeps them and reuses them.
  -    //
  -    //  NOTE: We CANNOT do this until we've resolved the element name because
  -    //  the element stack top contains the prefix to URI mappings for this
  -    //  element.
  -    unsigned int topUri = fElemStack.getCurrentURI();
  -    const ElemStack::StackElem* topElem = fElemStack.popTop();
  -
       // See if it was the root element, to avoid multiple calls below
       const bool isRoot = fElemStack.isEmpty();
   
  -    // Make sure that its the end of the element that we expect
  -    XMLElementDecl* tempElement = topElem->fThisElement;
  -    if (fDoNamespaces && fGrammarType == Grammar::SchemaGrammarType) {
  -
  -        // reset error occurred
  -        fPSVIElemContext.fErrorOccurred = fErrorStack->pop();
  -        const XMLCh* rawNameBuf = fQNameBuf.getRawBuffer();
  -        if ((topUri != uriId) || (!XMLString::equals(tempElement->getBaseName(), &rawNameBuf[prefixColonPos + 1])))
  -        {
  -            emitError
  -            (
  -                XMLErrs::ExpectedEndOfTagX
  -                , topElem->fThisElement->getFullName()
  -            );
  -        }
  -    }
  -    else {
  -        if (!XMLString::equals(tempElement->getFullName(), fQNameBuf.getRawBuffer()))
  -        {
  -            emitError
  -            (
  -                XMLErrs::ExpectedEndOfTagX
  -                , topElem->fThisElement->getFullName()
  -            );
  -        }
  -    }
  -
       // Make sure we are back on the same reader as where we started
       if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum())
           emitError(XMLErrs::PartialTagMarkupError);
  @@ -1057,6 +1034,8 @@
   
       if (fGrammarType == Grammar::SchemaGrammarType)
       {
  +        // reset error occurred
  +        fPSVIElemContext.fErrorOccurred = fErrorStack->pop();
           if (fValidate && topElem->fThisElement->isDeclared())
           {
               fPSVIElemContext.fCurrentTypeInfo = ((SchemaValidator*) fValidator)->getCurrentTypeInfo();
  @@ -2372,7 +2351,6 @@
   
       const XMLCh* nameRawBuf = &qnameRawBuf[prefixColonPos + 1];
       const XMLCh* original_uriStr = fGrammar->getTargetNamespace();
  -    unsigned orgGrammarUri = fURIStringPool->getId(original_uriStr);
   
       // REVISIT:  since all this code only really
       // makes sense for schemas, why can DTD validation theoretically pass 
  @@ -2380,12 +2358,11 @@
       if (uriId != fEmptyNamespaceId) {
   
           // Check in current grammar before switching if necessary
  -        const XMLCh *rawQName = fQNameBuf.getRawBuffer();
           elemDecl = fGrammar->getElemDecl
           (
             uriId
             , nameRawBuf
  -          , rawQName
  +          , qnameRawBuf
             , currentScope
           );
           // may have not been declared; must look everywhere:
  @@ -2393,14 +2370,14 @@
               if(fGrammarType == Grammar::DTDGrammarType) 
               {
                   // should never occur in practice
  -                elemDecl = fDTDElemNonDeclPool->getByKey(rawQName);
  +                elemDecl = fDTDElemNonDeclPool->getByKey(qnameRawBuf);
               }
               else if (fGrammarType == Grammar::SchemaGrammarType) 
               {
                   elemDecl = fSchemaElemNonDeclPool->getByKey(nameRawBuf, uriId, currentScope);
               }
   
  -        if (!elemDecl && (orgGrammarUri != uriId)) {
  +        if (!elemDecl && ( fURIStringPool->getId(original_uriStr) != uriId)) {
               // not found, switch to the specified grammar
               const XMLCh* uriStr = getURIText(uriId);
               bool errorCondition = !switchGrammar(uriStr) && fValidate;
  @@ -2524,7 +2501,12 @@
                   // before we made grammars stateless:
                   elemDecl = fSchemaElemNonDeclPool->getByKey(nameRawBuf, uriId, currentScope);
               }
  -        if (!elemDecl && orgGrammarUri != fEmptyNamespaceId) {
  +        // this is initialized correctly only if there is
  +        // no element decl.  The other uses in this scope will only
  +        // be encountered if there continues to be no element decl--which
  +        // implies that this will have been initialized correctly.
  +        unsigned orgGrammarUri = fEmptyNamespaceId;
  +        if (!elemDecl && (orgGrammarUri = fURIStringPool->getId(original_uriStr)) != fEmptyNamespaceId) {
               //not found, switch grammar and try globalNS
               bool errorCondition = !switchGrammar(XMLUni::fgZeroLenString) && fValidate;
               if (errorCondition && !laxThisOne)
  @@ -2753,6 +2735,10 @@
   
       if (fGrammarType == Grammar::SchemaGrammarType) {
   
  +        // squirrel away the element's QName, so that we can do an efficient
  +        // end-tag match
  +        fElemStack.setCurrentSchemaElemName(fQNameBuf.getRawBuffer());
  +
           ComplexTypeInfo* typeinfo = (fValidate)
               ? ((SchemaValidator*)fValidator)->getCurrentTypeInfo()
               : ((SchemaElementDecl*) elemDecl)->getComplexTypeInfo();
  @@ -3527,3 +3513,4 @@
   }
   
   XERCES_CPP_NAMESPACE_END
  +
  
  
  
  1.83      +27 -41    xml-xerces/c/src/xercesc/internal/SGXMLScanner.cpp
  
  Index: SGXMLScanner.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/SGXMLScanner.cpp,v
  retrieving revision 1.82
  retrieving revision 1.83
  diff -u -r1.82 -r1.83
  --- SGXMLScanner.cpp	27 May 2004 16:33:07 -0000	1.82
  +++ SGXMLScanner.cpp	2 Jun 2004 19:58:09 -0000	1.83
  @@ -904,53 +904,31 @@
           ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd, fMemoryManager);
       }
   
  -    // After the </ is the element QName, so get a name from the input
  -    if (!fReaderMgr.getName(fQNameBuf))
  -    {
  -        // It failed so we can't really do anything with it
  -        emitError(XMLErrs::ExpectedElementName);
  -        fReaderMgr.skipPastChar(chCloseAngle);
  -        return;
  -    }
  -
  -    int prefixColonPos = -1;
  -    unsigned int uriId = resolveQName
  -    (
  -        fQNameBuf.getRawBuffer()
  -        , fPrefixBuf
  -        , ElemStack::Mode_Element
  -        , prefixColonPos
  -    );
  -
       //  Pop the stack of the element we are supposed to be ending. Remember
       //  that we don't own this. The stack just keeps them and reuses them.
  -    //
  -    //  NOTE: We CANNOT do this until we've resolved the element name because
  -    //  the element stack top contains the prefix to URI mappings for this
  -    //  element.
  -    unsigned int topUri = fElemStack.getCurrentURI();
  -    const ElemStack::StackElem* topElem = fElemStack.popTop();
  -
  -    // See if it was the root element, to avoid multiple calls below
  -    const bool isRoot = fElemStack.isEmpty();
  +    unsigned int uriId = (fDoNamespaces)
  +        ? fElemStack.getCurrentURI() : fEmptyNamespaceId;
   
       // Make sure that its the end of the element that we expect
  -    XMLElementDecl* tempElement = topElem->fThisElement;
  -    const XMLCh* rawNameBuf = fQNameBuf.getRawBuffer();
  -
  -    // reset error occurred
  -    fPSVIElemContext.fErrorOccurred = fErrorStack->pop();
  -
  -    if ((topUri != uriId) ||
  -        (!XMLString::equals(tempElement->getBaseName(), &rawNameBuf[prefixColonPos + 1])))
  +    XMLCh *elemName = fElemStack.getCurrentSchemaElemName();
  +    const ElemStack::StackElem* topElem = fElemStack.popTop(); 
  +    XMLElementDecl *tempElement = topElem->fThisElement; 
  +    if (!fReaderMgr.skippedString(elemName))
       {
           emitError
           (
               XMLErrs::ExpectedEndOfTagX
  -            , topElem->fThisElement->getFullName()
  +            , elemName
           );
  +        fReaderMgr.skipPastChar(chCloseAngle);
  +        return;
       }
   
  +    // See if it was the root element, to avoid multiple calls below
  +    const bool isRoot = fElemStack.isEmpty();
  +
  +    fPSVIElemContext.fErrorOccurred = fErrorStack->pop();
  +
       // Make sure we are back on the same reader as where we started
       if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum())
           emitError(XMLErrs::PartialTagMarkupError);
  @@ -1202,10 +1180,11 @@
       //  First we have to do the rawest attribute scan. We don't do any
       //  normalization of them at all, since we don't know yet what type they
       //  might be (since we need the element decl in order to do that.)
  +    const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer();
       bool isEmpty;
       unsigned int attCount = rawAttrScan
       (
  -        fQNameBuf.getRawBuffer()
  +        qnameRawBuf
           , *fRawAttrList
           , isEmpty
       );
  @@ -1275,7 +1254,6 @@
       //  the element decl for this element. We have now update the prefix to
       //  namespace map so we should get the correct element now.
       int prefixColonPos = -1;
  -    const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer();
       unsigned int uriId = resolveQName
       (
           qnameRawBuf
  @@ -1302,7 +1280,6 @@
       bool laxBeforeElementFound = false;
       const XMLCh* nameRawBuf = &qnameRawBuf[prefixColonPos + 1];
       const XMLCh* original_uriStr = fGrammar->getTargetNamespace();
  -    unsigned orgGrammarUri = fURIStringPool->getId(original_uriStr);
   
       if (uriId != fEmptyNamespaceId) {
   
  @@ -1320,7 +1297,7 @@
               // before we made grammars stateless:
               elemDecl = fElemNonDeclPool->getByKey(nameRawBuf, uriId, currentScope);
           }
  -        if (!elemDecl && (orgGrammarUri != uriId)) {
  +        if (!elemDecl && ( fURIStringPool->getId(original_uriStr) != uriId)) {
               // not found, switch to the specified grammar
               const XMLCh* uriStr = getURIText(uriId);
               bool errorCondition = !switchGrammar(uriStr) && fValidate;
  @@ -1422,7 +1399,12 @@
               // before we made grammars stateless:
               elemDecl = fElemNonDeclPool->getByKey(nameRawBuf, uriId, currentScope);
           }
  -        if (!elemDecl && orgGrammarUri != fEmptyNamespaceId) {
  +        // this is initialized correctly only if there is
  +        // no element decl.  The other uses in this scope will only
  +        // be encountered if there continues to be no element decl--which
  +        // implies that this will have been initialized correctly.
  +        unsigned orgGrammarUri = fEmptyNamespaceId;
  +        if (!elemDecl && (orgGrammarUri = fURIStringPool->getId(original_uriStr)) != fEmptyNamespaceId) {
               //not found, switch grammar and try globalNS
               bool errorCondition = !switchGrammar(XMLUni::fgZeroLenString) && fValidate;
               if (errorCondition && !laxThisOne)
  @@ -1625,6 +1607,10 @@
           if (((SchemaValidator*) fValidator)->getErrorOccurred())
               fPSVIElemContext.fErrorOccurred = true;
       }
  +
  +    // squirrel away the element's QName, so that we can do an efficient
  +    // end-tag match
  +    fElemStack.setCurrentSchemaElemName(fQNameBuf.getRawBuffer());
   
       ComplexTypeInfo* typeinfo = (fValidate)
           ? ((SchemaValidator*)fValidator)->getCurrentTypeInfo()
  
  
  
  1.13      +10 -1     xml-xerces/c/src/xercesc/internal/ElemStack.cpp
  
  Index: ElemStack.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/ElemStack.cpp,v
  retrieving revision 1.12
  retrieving revision 1.13
  diff -u -r1.12 -r1.13
  --- ElemStack.cpp	27 Apr 2004 19:17:52 -0000	1.12
  +++ ElemStack.cpp	2 Jun 2004 19:58:10 -0000	1.13
  @@ -56,6 +56,12 @@
   
   /*
    * $Log$
  + * Revision 1.13  2004/06/02 19:58:10  neilg
  + * Fix bug where scanners would accept malformed tags of the form
  + * <p:a xmlns:p="b" xmlns:q="b"></q:a> when namespace processing was
  + * enabled.  This also opened the way for some end-tag scanning
  + * performance improvements.
  + *
    * Revision 1.12  2004/04/27 19:17:52  peiyongz
    * XML1.0-3rd VC: element content(children) dont allow white space from
    * EntityRef/CharRef
  @@ -209,6 +215,7 @@
   
           fMemoryManager->deallocate(fStack[stackInd]->fChildren);//delete [] fStack[stackInd]->fChildren;
           fMemoryManager->deallocate(fStack[stackInd]->fMap);//delete [] fStack[stackInd]->fMap;
  +        fMemoryManager->deallocate(fStack[stackInd]->fSchemaElemName);
           delete fStack[stackInd];
       }
   
  @@ -235,6 +242,8 @@
           fStack[fStackTop]->fChildren = 0;
           fStack[fStackTop]->fMapCapacity = 0;
           fStack[fStackTop]->fMap = 0;
  +        fStack[fStackTop]->fSchemaElemName = 0;
  +        fStack[fStackTop]->fSchemaElemNameMaxLen = 0;
       }
   
       // Set up the new top row
  @@ -271,6 +280,8 @@
           fStack[fStackTop]->fChildren = 0;
           fStack[fStackTop]->fMapCapacity = 0;
           fStack[fStackTop]->fMap = 0;
  +        fStack[fStackTop]->fSchemaElemName = 0;
  +        fStack[fStackTop]->fSchemaElemNameMaxLen = 0;
       }
   
       // Set up the new top row
  
  
  
  1.10      +30 -1     xml-xerces/c/src/xercesc/internal/ElemStack.hpp
  
  Index: ElemStack.hpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/ElemStack.hpp,v
  retrieving revision 1.9
  retrieving revision 1.10
  diff -u -r1.9 -r1.10
  --- ElemStack.hpp	27 Apr 2004 19:17:52 -0000	1.9
  +++ ElemStack.hpp	2 Jun 2004 19:58:10 -0000	1.10
  @@ -56,6 +56,12 @@
   
   /*
    * $Log$
  + * Revision 1.10  2004/06/02 19:58:10  neilg
  + * Fix bug where scanners would accept malformed tags of the form
  + * <p:a xmlns:p="b" xmlns:q="b"></q:a> when namespace processing was
  + * enabled.  This also opened the way for some end-tag scanning
  + * performance improvements.
  + *
    * Revision 1.9  2004/04/27 19:17:52  peiyongz
    * XML1.0-3rd VC: element content(children) dont allow white space from
    * EntityRef/CharRef
  @@ -221,6 +227,8 @@
           int                 fCurrentScope;
           Grammar*            fCurrentGrammar;
           unsigned int        fCurrentURI;
  +        XMLCh *             fSchemaElemName;
  +        unsigned int        fSchemaElemNameMaxLen;
       };
   
       enum MapModes
  @@ -270,6 +278,9 @@
       void setCurrentURI(unsigned int uri);
       unsigned int getCurrentURI();
   
  +    inline void setCurrentSchemaElemName(const XMLCh * const schemaElemName);
  +    inline XMLCh *getCurrentSchemaElemName();
  +
       // -----------------------------------------------------------------------
       //  Prefix map methods
       // -----------------------------------------------------------------------
  @@ -584,6 +595,26 @@
   {
       fStack[fStackTop-1]->fReferenceEscaped = true;
       return;
  +}
  +
  +inline void ElemStack::setCurrentSchemaElemName(const XMLCh * const schemaElemName)
  +{
  +    unsigned int schemaElemNameLen = XMLString::stringLen(schemaElemName);
  +    unsigned int stackPos = fStackTop-1;
  +    
  +    if(fStack[stackPos]->fSchemaElemNameMaxLen <= schemaElemNameLen)
  +    {
  +        XMLCh *tempStr = fStack[stackPos]->fSchemaElemName;
  +        fStack[stackPos]->fSchemaElemNameMaxLen = schemaElemNameLen << 1;
  +        fStack[stackPos]->fSchemaElemName = (XMLCh *)fMemoryManager->allocate((fStack[stackPos]->fSchemaElemNameMaxLen)*sizeof(XMLCh));
  +        fMemoryManager->deallocate(tempStr);
  +    }
  +    XMLString::copyString(fStack[stackPos]->fSchemaElemName, schemaElemName);
  +}
  +
  +inline XMLCh *ElemStack::getCurrentSchemaElemName()
  +{
  +    return fStack[fStackTop-1]->fSchemaElemName;
   }
   
   inline int ElemStack::getCurrentScope()
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: xerces-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: xerces-cvs-help@xml.apache.org