You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by pe...@apache.org on 2004/09/28 23:27:38 UTC

cvs commit: xml-xerces/c/src/xercesc/internal XMLScanner.hpp XMLScanner.cpp WFXMLScanner.cpp IGXMLScanner2.cpp DGXMLScanner.cpp

peiyongz    2004/09/28 14:27:38

  Modified:    c/src/xercesc/internal XMLScanner.hpp XMLScanner.cpp
                        WFXMLScanner.cpp IGXMLScanner2.cpp DGXMLScanner.cpp
  Log:
  Optimized duplicated attributes checking for large number of attributes
  
  Revision  Changes    Path
  1.40      +34 -0     xml-xerces/c/src/xercesc/internal/XMLScanner.hpp
  
  Index: XMLScanner.hpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/XMLScanner.hpp,v
  retrieving revision 1.39
  retrieving revision 1.40
  diff -u -r1.39 -r1.40
  --- XMLScanner.hpp	28 Sep 2004 02:14:13 -0000	1.39
  +++ XMLScanner.hpp	28 Sep 2004 21:27:38 -0000	1.40
  @@ -16,6 +16,9 @@
   
   /*
    * $Log$
  + * Revision 1.40  2004/09/28 21:27:38  peiyongz
  + * Optimized duplicated attributes checking for large number of attributes
  + *
    * Revision 1.39  2004/09/28 02:14:13  cargilld
    * Add support for validating annotations.
    *
  @@ -731,6 +734,13 @@
       void resetUIntPool();
       void recreateUIntPool();
   
  +    inline
  +    void setAttrDupChkRegistry
  +         (
  +            const unsigned int &attrNumber
  +          ,       bool         &toUseHashTable
  +         );
  +
       // -----------------------------------------------------------------------
       //  Data members
       //
  @@ -986,6 +996,7 @@
       XMLUInt32                   fScannerId;
       XMLUInt32                   fSequenceId;
       RefVectorOf<XMLAttr>*       fAttrList;
  +    RefHash2KeysTableOf<XMLAttr>*  fAttrDupChkRegistry;    
       XMLDocumentHandler*         fDocHandler;
       DocTypeHandler*             fDocTypeHandler;
       XMLEntityHandler*           fEntityHandler;
  @@ -1519,6 +1530,29 @@
       fValidationContext->clearIdRefList();
       fValidationContext->setEntityDeclPool(0);
       fEntityDeclPoolRetrieved = false;
  +}
  +
  +inline void XMLScanner::setAttrDupChkRegistry(const unsigned int &attrNumber
  +                                            ,       bool         &toUseHashTable)
  +{
  +   // once the attribute exceed 20, we use hash table to check duplication
  +    if (attrNumber > 20)
  +   {
  +        toUseHashTable = true;
  +
  +        if (!fAttrDupChkRegistry)
  +        {
  +            fAttrDupChkRegistry = new (fMemoryManager) RefHash2KeysTableOf<XMLAttr>
  +            (
  +              2*attrNumber+1, false, new (fMemoryManager)HashXMLCh(), fMemoryManager
  +            );
  +        }
  +        else
  +        {
  +            fAttrDupChkRegistry->removeAll();
  +        }
  +    }
  +
   }
   
   XERCES_CPP_NAMESPACE_END
  
  
  
  1.71      +4 -1      xml-xerces/c/src/xercesc/internal/XMLScanner.cpp
  
  Index: XMLScanner.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/XMLScanner.cpp,v
  retrieving revision 1.70
  retrieving revision 1.71
  diff -u -r1.70 -r1.71
  --- XMLScanner.cpp	28 Sep 2004 02:14:13 -0000	1.70
  +++ XMLScanner.cpp	28 Sep 2004 21:27:38 -0000	1.71
  @@ -157,6 +157,7 @@
       , fScannerId(0)
       , fSequenceId(0)
       , fAttrList(0)
  +    , fAttrDupChkRegistry(0)
       , fDocHandler(0)
       , fDocTypeHandler(0)
       , fEntityHandler(0)
  @@ -237,6 +238,7 @@
       , fScannerId(0)
       , fSequenceId(0)
       , fAttrList(0)
  +    , fAttrDupChkRegistry(0)
       , fDocHandler(docHandler)
       , fDocTypeHandler(docTypeHandler)
       , fEntityHandler(entityHandler)
  @@ -279,6 +281,7 @@
   XMLScanner::~XMLScanner()
   {
       delete fAttrList;
  +    delete fAttrDupChkRegistry;
       delete fValidationContext;
       fMemoryManager->deallocate(fRootElemName);//delete [] fRootElemName;
       fMemoryManager->deallocate(fExternalSchemaLocation);//delete [] fExternalSchemaLocation;
  
  
  
  1.25      +33 -8     xml-xerces/c/src/xercesc/internal/WFXMLScanner.cpp
  
  Index: WFXMLScanner.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/WFXMLScanner.cpp,v
  retrieving revision 1.24
  retrieving revision 1.25
  diff -u -r1.24 -r1.25
  --- WFXMLScanner.cpp	8 Sep 2004 13:56:13 -0000	1.24
  +++ WFXMLScanner.cpp	28 Sep 2004 21:27:38 -0000	1.25
  @@ -1457,6 +1457,13 @@
       }
   
       if(attCount) {
  +
  +        //
  +        // Decide if to use hash table to do duplicate checking
  +        //
  +        bool toUseHashTable = false;
  +        setAttrDupChkRegistry(attCount, toUseHashTable);
  +
           // check for duplicate namespace attributes:
           // by checking for qualified names with the same local part and with prefixes 
           // which have been bound to namespace names that are identical. 
  @@ -1464,17 +1471,35 @@
           XMLAttr* curAtt;
           for (unsigned int attrIndex=0; attrIndex < attCount-1; attrIndex++) {
               loopAttr = fAttrList->elementAt(attrIndex);
  -            for (unsigned int curAttrIndex = attrIndex+1; curAttrIndex < attCount; curAttrIndex++) {
  -                curAtt = fAttrList->elementAt(curAttrIndex);
  -                if (curAtt->getURIId() == loopAttr->getURIId() &&
  -                    XMLString::equals(curAtt->getName(), loopAttr->getName())) {
  -                    emitError
  -                    ( 
  -                        XMLErrs::AttrAlreadyUsedInSTag
  +
  +            if (!toUseHashTable)
  +            {
  +                for (unsigned int curAttrIndex = attrIndex+1; curAttrIndex < attCount; curAttrIndex++) {
  +                    curAtt = fAttrList->elementAt(curAttrIndex);
  +                    if (curAtt->getURIId() == loopAttr->getURIId() &&
  +                        XMLString::equals(curAtt->getName(), loopAttr->getName())) {
  +                        emitError
  +                            ( 
  +                            XMLErrs::AttrAlreadyUsedInSTag
                               , curAtt->getName()
                               , elemDecl->getFullName()
  +                            );
  +                    }
  +                }
  +            }
  +            else 
  +            {
  +                if (fAttrDupChkRegistry->containsKey((void*)loopAttr->getName(), loopAttr->getURIId()))
  +                {
  +                    emitError
  +                    ( 
  +                    XMLErrs::AttrAlreadyUsedInSTag
  +                    , loopAttr->getName()
  +                    , elemDecl->getFullName()
                       );
                   }
  +
  +                fAttrDupChkRegistry->put((void*)loopAttr->getName(), loopAttr->getURIId(), loopAttr);
               }
           }  
       }
  
  
  
  1.72      +38 -7     xml-xerces/c/src/xercesc/internal/IGXMLScanner2.cpp
  
  Index: IGXMLScanner2.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/IGXMLScanner2.cpp,v
  retrieving revision 1.71
  retrieving revision 1.72
  diff -u -r1.71 -r1.72
  --- IGXMLScanner2.cpp	26 Sep 2004 18:23:50 -0000	1.71
  +++ IGXMLScanner2.cpp	28 Sep 2004 21:27:38 -0000	1.72
  @@ -110,6 +110,15 @@
       XMLBufBid bbNormal(&fBufMgr);
       XMLBuffer& normBuf = bbNormal.getBuffer();
   
  +    //
  +    // Decide if to use hash table to do duplicate checking
  +    //
  +    bool toUseHashTable = false;
  +    if (fGrammarType == Grammar::DTDGrammarType)
  +    {
  +        setAttrDupChkRegistry(attCount, toUseHashTable);
  +    }
  +
       //  Loop through our explicitly provided attributes, which are in the raw
       //  scanned form, and build up XMLAttr objects.
       unsigned int index;
  @@ -617,16 +626,32 @@
           // by checking for qualified names with the same local part and with prefixes 
           // which have been bound to namespace names that are identical. 
           if (fGrammarType == Grammar::DTDGrammarType) {
  -            for (unsigned int attrIndex=0; attrIndex < retCount; attrIndex++) {
  -                curAttr = toFill.elementAt(attrIndex);
  -                if (uriId == curAttr->getURIId() &&
  -                    XMLString::equals(suffPtr, curAttr->getName())) {
  +            if (!toUseHashTable)
  +            {
  +                for (unsigned int attrIndex=0; attrIndex < retCount; attrIndex++) {
  +                    curAttr = toFill.elementAt(attrIndex);
  +                    if (uriId == curAttr->getURIId() &&
  +                        XMLString::equals(suffPtr, curAttr->getName())) {
  +                        emitError
  +                        ( 
  +
  +                         XMLErrs::AttrAlreadyUsedInSTag
  +                        , curAttr->getName()
  +                        , elemDecl->getFullName()
  +                        );
  +                    }
  +                }
  +            }
  +            else
  +            {
  +                if (fAttrDupChkRegistry->containsKey((void*)suffPtr, uriId))
  +                {
                       emitError
  -                    ( 
  +                        ( 
                           XMLErrs::AttrAlreadyUsedInSTag
                           , curAttr->getName()
                           , elemDecl->getFullName()
  -                    );
  +                        );
                   }
               }  
           }
  @@ -658,6 +683,12 @@
               );
               curAttr->setSpecified(true);
           }
  +
  +        if (toUseHashTable)
  +        {
  +            fAttrDupChkRegistry->put((void*)suffPtr, uriId, curAttr);
  +        }
  +
           if(psviAttr)
               psviAttr->setValue(curAttr->getValue());
               
  
  
  
  1.54      +32 -7     xml-xerces/c/src/xercesc/internal/DGXMLScanner.cpp
  
  Index: DGXMLScanner.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/DGXMLScanner.cpp,v
  retrieving revision 1.53
  retrieving revision 1.54
  diff -u -r1.53 -r1.54
  --- DGXMLScanner.cpp	20 Sep 2004 15:00:49 -0000	1.53
  +++ DGXMLScanner.cpp	28 Sep 2004 21:27:38 -0000	1.54
  @@ -2378,6 +2378,13 @@
   void DGXMLScanner::scanAttrListforNameSpaces(RefVectorOf<XMLAttr>* theAttrList, int attCount, 
                                                   XMLElementDecl*       elemDecl)
   {
  +
  +    //
  +    // Decide if to use hash table to do duplicate checking
  +    //
  +    bool toUseHashTable = false;
  +    setAttrDupChkRegistry((unsigned int&)attCount, toUseHashTable);
  +
       //  Make an initial pass through the list and find any xmlns attributes or
       //  schema attributes.
       //  When we find one, send it off to be used to update the element stack's
  @@ -2412,17 +2419,35 @@
           // by checking for qualified names with the same local part and with prefixes 
           // which have been bound to namespace names that are identical.         
           XMLAttr* loopAttr;
  -        for (int attrIndex=0; attrIndex < index; attrIndex++) {
  -            loopAttr = theAttrList->elementAt(attrIndex);
  -            if (loopAttr->getURIId() == curAttr->getURIId() &&
  -                XMLString::equals(loopAttr->getName(), curAttr->getName())) {
  +
  +        if (!toUseHashTable)
  +        {
  +            for (int attrIndex=0; attrIndex < index; attrIndex++) {
  +                loopAttr = theAttrList->elementAt(attrIndex);
  +                if (loopAttr->getURIId() == curAttr->getURIId() &&
  +                    XMLString::equals(loopAttr->getName(), curAttr->getName())) {
  +                    emitError
  +                        ( 
  +                        XMLErrs::AttrAlreadyUsedInSTag
  +                        , curAttr->getName()
  +                        , elemDecl->getFullName()
  +                        );
  +                }
  +            }
  +        }
  +        else 
  +        {
  +            if (fAttrDupChkRegistry->containsKey((void*)curAttr->getName(), curAttr->getURIId()))
  +            {
                   emitError
  -                ( 
  +                    ( 
                       XMLErrs::AttrAlreadyUsedInSTag
                       , curAttr->getName()
                       , elemDecl->getFullName()
  -                );
  +                    );
               }
  +
  +            fAttrDupChkRegistry->put((void*)curAttr->getName(), curAttr->getURIId(), curAttr);
           }                 
       }
   }
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: xerces-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: xerces-cvs-help@xml.apache.org