You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by ne...@apache.org on 2003/11/24 06:09:39 UTC

cvs commit: xml-xerces/c/src/xercesc/internal XMLScanner.cpp XMLScanner.hpp SGXMLScanner.cpp SGXMLScanner.hpp IGXMLScanner.cpp IGXMLScanner.hpp IGXMLScanner2.cpp DGXMLScanner.cpp DGXMLScanner.hpp

neilg       2003/11/23 21:09:39

  Modified:    c/src/xercesc/internal XMLScanner.cpp XMLScanner.hpp
                        SGXMLScanner.cpp SGXMLScanner.hpp IGXMLScanner.cpp
                        IGXMLScanner.hpp IGXMLScanner2.cpp DGXMLScanner.cpp
                        DGXMLScanner.hpp
  Log:
  implement new, statless, method for detecting duplicate attributes
  
  Revision  Changes    Path
  1.54      +82 -1     xml-xerces/c/src/xercesc/internal/XMLScanner.cpp
  
  Index: XMLScanner.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/XMLScanner.cpp,v
  retrieving revision 1.53
  retrieving revision 1.54
  diff -u -r1.53 -r1.54
  --- XMLScanner.cpp	13 Nov 2003 15:00:44 -0000	1.53
  +++ XMLScanner.cpp	24 Nov 2003 05:09:38 -0000	1.54
  @@ -224,6 +224,10 @@
       , fPrefixBuf(1023, manager)
       , fURIBuf(1023, manager)
       , fElemStack(manager)
  +    , fUIntPool(0)
  +    , fUIntPoolRow(0)
  +    , fUIntPoolCol(0)
  +    , fUIntPoolRowTotal(2)
   {
      commonInit();
   
  @@ -295,6 +299,10 @@
       , fPrefixBuf(1023, manager)
       , fURIBuf(1023, manager)
       , fElemStack(manager)
  +    , fUIntPool(0)
  +    , fUIntPoolRow(0)
  +    , fUIntPoolCol(0)
  +    , fUIntPoolRowTotal(2)
   {
      commonInit();
   
  @@ -311,6 +319,12 @@
       fMemoryManager->deallocate(fRootElemName);//delete [] fRootElemName;
       fMemoryManager->deallocate(fExternalSchemaLocation);//delete [] fExternalSchemaLocation;
       fMemoryManager->deallocate(fExternalNoNamespaceSchemaLocation);//delete [] fExternalNoNamespaceSchemaLocation;
  +    // delete fUIntPool
  +    for (unsigned int i=0; i<=fUIntPoolRow; i++)
  +    {
  +        fMemoryManager->deallocate(fUIntPool[i]);
  +    }
  +    fMemoryManager->deallocate(fUIntPool);
   }
   
   
  @@ -710,6 +724,8 @@
       setExternalSchemaLocation(refScanner->getExternalSchemaLocation());
       setExternalNoNamespaceSchemaLocation(refScanner->getExternalNoNamespaceSchemaLocation());
       setValidationScheme(refScanner->getValidationScheme());
  +    setSecurityManager(refScanner->getSecurityManager());
  +    setPSVIHandler(refScanner->getPSVIHandler());
   }
   
   // ---------------------------------------------------------------------------
  @@ -740,6 +756,12 @@
   
       //  Create the GrammarResolver
       //fGrammarResolver = new GrammarResolver();
  +
  +    // create initial, 64-element, fUIntPool
  +    fUIntPool = (unsigned int **)fMemoryManager->allocate(sizeof(unsigned int *) *fUIntPoolRowTotal);
  +    fUIntPool[0] = (unsigned int *)fMemoryManager->allocate(sizeof(unsigned int) << 6);
  +    memset(fUIntPool[0], 0, sizeof(unsigned int) << 6);
  +    fUIntPool[1] = 0;
   }
   
   
  @@ -2126,6 +2148,65 @@
   {
       fReaderMgr.getUpToCharOrWS(toFill, chEndChar);
       return toFill.getLen();
  +}
  +
  +unsigned int *XMLScanner::getNewUIntPtr()
  +{
  +    // this method hands back a new pointer initialized to 0
  +    unsigned int *retVal;
  +    if(fUIntPoolCol < 64)
  +    {
  +        retVal = fUIntPool[fUIntPoolRow]+fUIntPoolCol;
  +        fUIntPoolCol++;
  +        return retVal;
  +    }
  +    // time to grow the pool...
  +    if(fUIntPoolRow+1 == fUIntPoolRowTotal)
  +    {
  +        // and time to add some space for new rows:
  +        fUIntPoolRowTotal <<= 1;
  +        unsigned int **newArray = (unsigned int **)fMemoryManager->allocate(sizeof(unsigned int *) * fUIntPoolRowTotal );
  +        memcpy(newArray, fUIntPool, (fUIntPoolRow+1) * sizeof(unsigned int *));
  +        fMemoryManager->deallocate(fUIntPool);
  +        fUIntPool = newArray;
  +        // need to 0 out new elements we won't need:
  +        for (unsigned int i=fUIntPoolRow+2; i<fUIntPoolRowTotal; i++)
  +            fUIntPool[i] = 0;
  +    }
  +    // now to add a new row; we just made sure we have space
  +    fUIntPoolRow++;
  +    fUIntPool[fUIntPoolRow] = (unsigned int *)fMemoryManager->allocate(sizeof(unsigned int) << 6);
  +    memset(fUIntPool[fUIntPoolRow], 0, sizeof(unsigned int) << 6);
  +    // point to next element
  +    fUIntPoolCol = 1; 
  +    return fUIntPool[fUIntPoolRow];
  +}
  +
  +void XMLScanner::resetUIntPool()
  +{
  +    // to reuse the unsigned int pool--and the hashtables that use it--
  +    // simply reinitialize everything to 0's
  +    for(unsigned int i = 0; i<= fUIntPoolRow; i++)
  +        memset(fUIntPool[i], 0, sizeof(unsigned int) << 6);
  +}
  +
  +void XMLScanner::recreateUIntPool()
  +{
  +    // this allows a bloated unsigned int pool to be dispensed with
  +
  +    // first, delete old fUIntPool
  +    for (unsigned int i=0; i<=fUIntPoolRow; i++)
  +    {
  +        fMemoryManager->deallocate(fUIntPool[i]);
  +    }
  +    fMemoryManager->deallocate(fUIntPool);
  +
  +    fUIntPoolRow = fUIntPoolCol = 0;
  +    fUIntPoolRowTotal = 2;
  +    fUIntPool = (unsigned int **)fMemoryManager->allocate(sizeof(unsigned int *) * fUIntPoolRowTotal);
  +    fUIntPool[0] = (unsigned int *)fMemoryManager->allocate(sizeof(unsigned int) << 6);
  +    memset(fUIntPool[fUIntPoolRow], 0, sizeof(unsigned int) << 6);
  +    fUIntPool[1] = 0;
   }
   
   XERCES_CPP_NAMESPACE_END
  
  
  
  1.28      +22 -2     xml-xerces/c/src/xercesc/internal/XMLScanner.hpp
  
  Index: XMLScanner.hpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/XMLScanner.hpp,v
  retrieving revision 1.27
  retrieving revision 1.28
  diff -u -r1.27 -r1.28
  --- XMLScanner.hpp	13 Nov 2003 15:00:44 -0000	1.27
  +++ XMLScanner.hpp	24 Nov 2003 05:09:38 -0000	1.28
  @@ -56,6 +56,9 @@
   
   /*
    * $Log$
  + * Revision 1.28  2003/11/24 05:09:38  neilg
  + * implement new, statless, method for detecting duplicate attributes
  + *
    * Revision 1.27  2003/11/13 15:00:44  peiyongz
    * Solve Compilation/Linkage error on AIX/Solaris/HP/Linux
    *
  @@ -705,6 +708,9 @@
       XMLTokens senseNextToken(unsigned int& orgReader);
       void initValidator(XMLValidator* theValidator);
       inline void resetValidationContext();
  +    unsigned int *getNewUIntPtr();
  +    void resetUIntPool();
  +    void recreateUIntPool();
   
       // -----------------------------------------------------------------------
       //  Data members
  @@ -714,7 +720,7 @@
       //      the document handler the attributes found. To make it more
       //      efficient we keep this ref vector of XMLAttr objects around. We
       //      just reuse it over and over, allowing it to grow to meet the
  -    //      peek need.
  +    //      peak need.
       //
       //  fBufMgr
       //      This is a manager for temporary buffers used during scanning.
  @@ -909,6 +915,15 @@
       //
       //  fXMLVersion
       //      Enum to indicate if the main doc is XML 1.1 or XML 1.0 conformant    
  +    //  fUIntPool
  +    //      pool of unsigned integers to help with duplicate attribute
  +    //      detection and filling in default/fixed attributes
  +    //  fUIntPoolRow
  +    //      current row in fUIntPool
  +    //  fUIntPoolCol
  +    //      current column i row
  +    //  fUIntPoolRowTotal
  +    //      total number of rows in table
       //
       //  fMemoryManager
       //      Pluggable memory manager for dynamic allocation/deallocation.
  @@ -937,6 +952,10 @@
       unsigned int                fXMLNamespaceId;
       unsigned int                fXMLNSNamespaceId;
       unsigned int                fSchemaNamespaceId;
  +    unsigned int **             fUIntPool;
  +    unsigned int                fUIntPoolRow;
  +    unsigned int                fUIntPoolCol;
  +    unsigned int                fUIntPoolRowTotal;
       XMLUInt32                   fScannerId;
       XMLUInt32                   fSequenceId;
       RefVectorOf<XMLAttr>*       fAttrList;
  @@ -1448,3 +1467,4 @@
   XERCES_CPP_NAMESPACE_END
   
   #endif
  +
  
  
  
  1.50      +136 -43   xml-xerces/c/src/xercesc/internal/SGXMLScanner.cpp
  
  Index: SGXMLScanner.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/SGXMLScanner.cpp,v
  retrieving revision 1.49
  retrieving revision 1.50
  diff -u -r1.49 -r1.50
  --- SGXMLScanner.cpp	21 Nov 2003 22:38:19 -0000	1.49
  +++ SGXMLScanner.cpp	24 Nov 2003 05:09:38 -0000	1.50
  @@ -86,6 +86,7 @@
   #include <xercesc/validators/schema/identity/ValueStore.hpp>
   #include <xercesc/util/OutOfMemoryException.hpp>
   #include <xercesc/util/XMLResourceIdentifier.hpp>
  +#include <xercesc/util/HashPtr.hpp>
   
   XERCES_CPP_NAMESPACE_BEGIN
   
  @@ -110,6 +111,9 @@
       , fValueStoreCache(0)
       , fFieldActivator(0)
       , fElemNonDeclPool(0)
  +    , fElemCount(0)
  +    , fAttDefRegistry(0)
  +    , fUndeclaredAttrRegistryNS(0)
   {
       try
       {
  @@ -156,6 +160,9 @@
       , fValueStoreCache(0)
       , fFieldActivator(0)
       , fElemNonDeclPool(0)
  +    , fElemCount(0)
  +    , fAttDefRegistry(0)
  +    , fUndeclaredAttrRegistryNS(0)
   {
       try
       {	
  @@ -2015,6 +2022,14 @@
       fEntityTable->put((void*) XMLUni::fgQuot, chDoubleQuote);
       fEntityTable->put((void*) XMLUni::fgApos, chSingleQuote);
       fElemNonDeclPool = new (fMemoryManager) RefHash3KeysIdPool<SchemaElementDecl>(29, true, 128, fMemoryManager);
  +    fAttDefRegistry = new (fMemoryManager) RefHashTableOf<unsigned int>
  +    (
  +        509, false, new (fMemoryManager)HashPtr(), fMemoryManager
  +    );
  +    fUndeclaredAttrRegistryNS = new (fMemoryManager) RefHash2KeysTableOf<unsigned int>
  +    (
  +        509, false, new (fMemoryManager)HashXMLCh(), fMemoryManager
  +    );
   }
   
   void SGXMLScanner::cleanUp()
  @@ -2028,6 +2043,8 @@
       delete fMatcherStack;
       delete fValueStoreCache;
       delete fElemNonDeclPool;
  +    delete fAttDefRegistry;
  +    delete fUndeclaredAttrRegistryNS;
   }
   
   void SGXMLScanner::resizeElemState() {
  @@ -2072,6 +2089,8 @@
               ? currType->resetDefs()
               : elemDecl->resetDefs();
   
  +    fElemCount++;
  +
       //  If there are no expliclitily provided attributes and there are no
       //  defined attributes for the element, the we don't have anything to do.
       //  So just return zero in this case.
  @@ -2149,6 +2168,7 @@
           //  If its not a special case namespace attr of some sort, then we
           //  do normal checking and processing.
           XMLAttDef::AttTypes attType;
  +        DatatypeValidator *attrValidator = 0;
           if (!isNSAttr)
           {
               // Some checking for attribute wild card first (for schema)
  @@ -2233,20 +2253,54 @@
               //  Find this attribute within the parent element. We pass both
               //  the uriID/name and the raw QName buffer, since we don't know
               //  how the derived validator and its elements store attributes.
  -            bool wasAdded = false;
               if (!attDef) {
  -                attDef = elemDecl->findAttr
  -                (
  -                    curPair->getKey()
  -                    , uriId
  -                    , suffPtr
  -                    , prefPtr
  -                    , XMLElementDecl::AddIfNotFound
  -                    , wasAdded
  -                );
  +                attDef = ((SchemaElementDecl *)elemDecl)->getAttDef(suffPtr, uriId);
               }
   
  -            if(!skipThisOne && fGrammarType == Grammar::SchemaGrammarType) {
  +            // now need to prepare for duplicate detection
  +            if(attDef)
  +            {
  +                unsigned int *curCountPtr = fAttDefRegistry->get(attDef);
  +                if(!curCountPtr)
  +                {
  +                    curCountPtr = getNewUIntPtr();
  +                    *curCountPtr = fElemCount;
  +                    fAttDefRegistry->put(attDef, curCountPtr);
  +                }
  +                else if(*curCountPtr < fElemCount)
  +                    *curCountPtr = fElemCount;
  +                else
  +                {
  +                    emitError
  +                    ( 
  +                        XMLErrs::AttrAlreadyUsedInSTag
  +                        , attDef->getFullName()
  +                        , elemDecl->getFullName()
  +                    );
  +                }
  +            }
  +            else
  +            {
  +                unsigned int *curCountPtr = fUndeclaredAttrRegistryNS->get(suffPtr, uriId);
  +                if(!curCountPtr)
  +                {
  +                    curCountPtr = getNewUIntPtr();
  +                    *curCountPtr = fElemCount;
  +                    fUndeclaredAttrRegistryNS->put((void *)suffPtr, uriId, curCountPtr);
  +                }
  +                else if(*curCountPtr < fElemCount)
  +                    *curCountPtr = fElemCount;
  +                else
  +                {
  +                    emitError
  +                    ( 
  +                        XMLErrs::AttrAlreadyUsedInSTag
  +                        , namePtr
  +                        , elemDecl->getFullName()
  +                    );
  +                }
  +            }
  +            if(!skipThisOne && fGrammarType == Grammar::SchemaGrammarType && attDef) {
                   //we may have set it to invalid already, but this is the first time we are guarenteed to have the attDef
                   if(((SchemaAttDef *)(attDef))->getValidity() != PSVIDefs::INVALID)
                   {
  @@ -2266,15 +2320,7 @@
                   }
               }
   
  -            if (wasAdded)
  -            {
  -                // This is to tell the Validator that this attribute was
  -                // faulted-in, was not an attribute in the attdef originally
  -                attDef->setCreateReason(XMLAttDef::JustFaultIn);
  -            }
  -
  -            bool errorCondition = fValidate && !attDefForWildCard && 
  -                attDef->getCreateReason() == XMLAttDef::JustFaultIn && !attDef->getProvided();
  +            bool errorCondition = fValidate && !attDefForWildCard && !attDef;
               if (errorCondition && !skipThisOne && !laxThisOne)
               {
                   //
  @@ -2300,14 +2346,15 @@
                       , bufMsg.getRawBuffer()
                       , elemDecl->getFullName()
                   );
  -                ((SchemaAttDef *)attDef)->setValidity(PSVIDefs::INVALID);
  +                if(attDef)
  +                    ((SchemaAttDef *)attDef)->setValidity(PSVIDefs::INVALID);
                   if (getPSVIHandler())
                   {
                       // REVISIT:                
                       // PSVIAttribute->setValidity(PSVIItem::VALIDITY_INVALID);
                   }
               }
  -            else if(errorCondition && laxThisOne) {
  +            else if(errorCondition && laxThisOne && attDef) {
                   ((SchemaAttDef *)(attDef))->setValidationAttempted(PSVIDefs::NONE);
                   ((SchemaAttDef *)(attDef))->setValidity(PSVIDefs::UNKNOWN);
                   if (getPSVIHandler())
  @@ -2319,6 +2366,7 @@
               }
   
   
  +            /**** REVISIT:  excise this dead code
               //  If its already provided, then there are more than one of
               //  this attribute in this start tag, so emit an error.
               if (attDef->getProvided())
  @@ -2341,15 +2389,18 @@
               {
                   attDef->setProvided(true);
               }
  +            *******/
   
               //  Now normalize the raw value since we have the attribute type. We
               //  don't care about the return status here. If it failed, an error
               //  was issued, which is all we care about.
               if (attDefForWildCard) {
  -                ((SchemaAttDef*)attDef)->setAnyDatatypeValidator(((SchemaAttDef*) attDefForWildCard)->getDatatypeValidator());
  +                if(attDef)
  +                    ((SchemaAttDef*)attDef)->setAnyDatatypeValidator(((SchemaAttDef*) attDefForWildCard)->getDatatypeValidator());
                   normalizeAttValue
                   (
                       attDefForWildCard
  +                    , namePtr
                       , curPair->getValue()
                       , normBuf
                   );
  @@ -2377,26 +2428,32 @@
                           , false
                           , elemDecl
                       );
  +                    attrValidator = ((SchemaValidator *)fValidator)->getMostRecentAttrValidator();
                   }
  +                else // no decl; default DOMTypeInfo to anySimpleType
  +                    attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE);
  +
                   // Save the type for later use
                   attType = attDefForWildCard->getType();
   
  -                ((SchemaElementDecl *)(elemDecl))->updateValidityFromAttribute((SchemaAttDef *)attDef);
  +                if(attDef)
  +                    ((SchemaElementDecl *)(elemDecl))->updateValidityFromAttribute((SchemaAttDef *)attDef);
   
                   DatatypeValidator* tempDV = ((SchemaAttDef*) attDefForWildCard)->getDatatypeValidator();
  -                if(tempDV && tempDV->getType() == DatatypeValidator::Union)
  -                    ((SchemaAttDef*)attDef)->setMembertypeValidator(((UnionDatatypeValidator *)tempDV)->getMemberTypeValidator());
  +                if(tempDV && tempDV->getType() == DatatypeValidator::Union && attDef)
  +                    ((SchemaAttDef*)attDef)->setMembertypeValidator(attrValidator);
               }
               else {
                   normalizeAttValue
                   (
                       attDef
  +                    , namePtr
                       , curPair->getValue()
                       , normBuf
                   );
   
                   //  If we found an attdef for this one, then lets validate it.
  -                if (attDef->getCreateReason() != XMLAttDef::JustFaultIn)
  +                if (attDef)
                   {
                       if (fNormalizeData && (fGrammarType == Grammar::SchemaGrammarType))
                       {
  @@ -2421,12 +2478,20 @@
                               , false
                               , elemDecl
                           );
  +                        attrValidator = ((SchemaValidator *)fValidator)->getMostRecentAttrValidator();
                       }
  +                    else
  +                        attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE);
                   }
  +                else 
  +                    attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE);
   
                   // Save the type for later use
  -                attType = attDef->getType();
  -                ((SchemaElementDecl *)(elemDecl))->updateValidityFromAttribute((SchemaAttDef *)attDef);
  +                attType = (attDef)?attDef->getType():XMLAttDef::CData;
  +                if(attDef)
  +                {
  +                    ((SchemaElementDecl *)(elemDecl))->updateValidityFromAttribute((SchemaAttDef *)attDef);
  +                } 
               }
           }
           else
  @@ -2435,10 +2500,13 @@
               attType = XMLAttDef::CData;
               normalizeAttRawValue
               (
  -                curPair->getKey()
  +                namePtr
                   , curPair->getValue()
                   , normBuf
               );
  +            if((uriId == fXMLNSNamespaceId)
  +                  || XMLString::equals(getURIText(uriId), SchemaSymbols::fgURI_XSI))
  +                attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYURI);
           }
   
   
  @@ -2457,6 +2525,8 @@
                   , attType
                   , true
                   , fMemoryManager
  +                , attrValidator
  +                , true
               );
               toFill.addElement(curAttr);
           }
  @@ -2470,6 +2540,8 @@
                   , prefPtr
                   , normBuf.getRawBuffer()
                   , attType
  +                , attrValidator
  +                , true
               );
               curAttr->setSpecified(true);
           }
  @@ -2496,8 +2568,9 @@
               XMLAttDef *curDef = &attDefList.getAttDef(i);            
               const XMLAttDef::DefAttTypes defType = curDef->getDefaultType();
   
  -            if (!curDef->getProvided())
  -            {
  +            unsigned int *attCountPtr = fAttDefRegistry->get(curDef);
  +            if (!attCountPtr || *attCountPtr < fElemCount)
  +            { // did not occur
                   ((SchemaAttDef *)curDef)->setValidationAttempted(PSVIDefs::FULL);
                   ((SchemaAttDef *)curDef)->setValidity(PSVIDefs::VALID);
                   if (getPSVIHandler())
  @@ -2507,7 +2580,7 @@
                       // PSVIAttribute->setValidity(PSVIItem::VALIDITY_VALID);
                   }
   
  -                //the attributes is not provided
  +                //the attribute is not provided
                   if (fValidate)
                   {
                       // If we are validating and its required, then an error
  @@ -2546,9 +2619,8 @@
                   }
   
                   //  Fault in the value if needed, and bump the att count.
  -                //  We have to
                   if ((defType == XMLAttDef::Default)
  -                ||  (defType == XMLAttDef::Fixed))
  +                    ||  (defType == XMLAttDef::Fixed))
                   {
                       // Let the validator pass judgement on the attribute value
                       if (fValidate)
  @@ -2582,7 +2654,7 @@
   
                   ((SchemaElementDecl *)elemDecl)->updateValidityFromAttribute((SchemaAttDef *)curDef);
               }
  -            else
  +            else if (attCountPtr)
               {
                   //attribute is provided
                   // (schema) report error for PROHIBITED attrs that are present (V_TAGc)
  @@ -2617,6 +2689,7 @@
   //  are legal if escaped only. And some escape chars are not subject to
   //  normalization rules.
   bool SGXMLScanner::normalizeAttValue( const   XMLAttDef* const    attDef
  +                                      , const XMLCh* const        attrName
                                         , const XMLCh* const        value
                                         ,       XMLBuffer&          toFill)
   {
  @@ -2628,15 +2701,18 @@
       };
   
       // Get the type and name
  -    const XMLAttDef::AttTypes type = attDef->getType();
  -    const XMLCh* const attrName = attDef->getFullName();
  +    const XMLAttDef::AttTypes type = (attDef)
  +                            ?attDef->getType()
  +                            :XMLAttDef::CData;
   
       // Assume its going to go fine, and empty the target buffer in preperation
       bool retVal = true;
       toFill.reset();
   
       // Get attribute def - to check to see if it's declared externally or not
  -    bool  isAttExternal = attDef->isExternal();
  +    bool  isAttExternal = (attDef)
  +                        ?attDef->isExternal()
  +                        :false;
   
       //  Loop through the chars of the source value and normalize it according
       //  to the type.
  @@ -2676,7 +2752,8 @@
                           // Can't have a standalone document declaration of "yes" if  attribute
                           // values are subject to normalisation
                           fValidator->emitError(XMLValid::NoAttNormForStandalone, attrName);
  -                        ((SchemaAttDef *)attDef)->setValidity(PSVIDefs::INVALID);     
  +                        if(attDef)
  +                            ((SchemaAttDef *)attDef)->setValidity(PSVIDefs::INVALID);     
                           if (getPSVIHandler())
                           {
                               // REVISIT:               
  @@ -2720,7 +2797,8 @@
                               // Can't have a standalone document declaration of "yes" if  attribute
                               // values are subject to normalisation
                               fValidator->emitError(XMLValid::NoAttNormForStandalone, attrName);
  -                            ((SchemaAttDef *)attDef)->setValidity(PSVIDefs::INVALID);
  +                            if(attDef)
  +                                ((SchemaAttDef *)attDef)->setValidity(PSVIDefs::INVALID);
                               if (getPSVIHandler())
                               {
                                   // REVISIT:                
  @@ -2741,7 +2819,8 @@
           srcPtr++;
       }
   
  -    ((SchemaElementDecl *)fElemStack.topElement()->fThisElement)->updateValidityFromAttribute((SchemaAttDef *)attDef);
  +    if(attDef)
  +        ((SchemaElementDecl *)fElemStack.topElement()->fThisElement)->updateValidityFromAttribute((SchemaAttDef *)attDef);
   
       return retVal;
   }
  @@ -2957,6 +3036,20 @@
           fEntityExpansionLimit = fSecurityManager->getEntityExpansionLimit();
           fEntityExpansionCount = 0;
       }
  +    fElemCount = 0;
  +    if(fUIntPoolRowTotal >= 32) 
  +    { // 8 KB tied up with validating attributes...
  +        fAttDefRegistry->removeAll();
  +        fUndeclaredAttrRegistryNS->removeAll();
  +        recreateUIntPool();
  +    }
  +    else
  +    {
  +        // note that this will implicitly reset the values of the hashtables,
  +        // though their buckets will still be tied up
  +        resetUIntPool();
  +    }
  +    
   }
   
   
  
  
  
  1.11      +18 -1     xml-xerces/c/src/xercesc/internal/SGXMLScanner.hpp
  
  Index: SGXMLScanner.hpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/SGXMLScanner.hpp,v
  retrieving revision 1.10
  retrieving revision 1.11
  diff -u -r1.10 -r1.11
  --- SGXMLScanner.hpp	22 Oct 2003 20:22:30 -0000	1.10
  +++ SGXMLScanner.hpp	24 Nov 2003 05:09:38 -0000	1.11
  @@ -56,6 +56,9 @@
   
   /*
    * $Log$
  + * Revision 1.11  2003/11/24 05:09:38  neilg
  + * implement new, statless, method for detecting duplicate attributes
  + *
    * Revision 1.10  2003/10/22 20:22:30  knoaman
    * Prepare for annotation support.
    *
  @@ -207,6 +210,7 @@
       bool normalizeAttValue
       (
           const   XMLAttDef* const    attDef
  +        , const XMLCh* const        attrName 
           , const XMLCh* const        value
           ,       XMLBuffer&          toFill
       );
  @@ -310,6 +314,16 @@
       //      its xpath.
       // fElemNonDeclPool
       //      registry for elements without decls in the grammar
  +    // fElemCount
  +    //      count of the number of start tags seen so far (starts at 1).
  +    //      Used for duplicate attribute detection/processing of required/defaulted attributes
  +    // fAttDefRegistry
  +    //      mapping from XMLAttDef instances to the count of the last
  +    //      start tag where they were utilized.
  +    // fUndeclaredAttrRegistryNS
  +    //      mapping of namespaceId/localName pairs to the count of the last
  +    //      start tag in which they occurred.
  +    //
       //
       // -----------------------------------------------------------------------
       bool                        fSeeXsi;
  @@ -325,6 +339,9 @@
       ValueStoreCache*            fValueStoreCache;
       FieldActivator*             fFieldActivator;
       RefHash3KeysIdPool<SchemaElementDecl>* fElemNonDeclPool;
  +    unsigned int                            fElemCount;
  +    RefHashTableOf<unsigned int>*           fAttDefRegistry;
  +    RefHash2KeysTableOf<unsigned int>*      fUndeclaredAttrRegistryNS;
   };
   
   inline const XMLCh* SGXMLScanner::getName() const
  
  
  
  1.33      +26 -2     xml-xerces/c/src/xercesc/internal/IGXMLScanner.cpp
  
  Index: IGXMLScanner.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/IGXMLScanner.cpp,v
  retrieving revision 1.32
  retrieving revision 1.33
  diff -u -r1.32 -r1.33
  --- IGXMLScanner.cpp	21 Nov 2003 22:38:19 -0000	1.32
  +++ IGXMLScanner.cpp	24 Nov 2003 05:09:38 -0000	1.33
  @@ -64,6 +64,7 @@
   // ---------------------------------------------------------------------------
   #include <xercesc/internal/IGXMLScanner.hpp>
   #include <xercesc/util/RuntimeException.hpp>
  +#include <xercesc/util/HashPtr.hpp>
   #include <xercesc/util/UnexpectedEOFException.hpp>
   #include <xercesc/sax/InputSource.hpp>
   #include <xercesc/framework/XMLDocumentHandler.hpp>
  @@ -108,6 +109,10 @@
       , fFieldActivator(0)
       , fDTDElemNonDeclPool(0)
       , fSchemaElemNonDeclPool(0)
  +    , fElemCount(0)
  +    , fAttDefRegistry(0)
  +    , fUndeclaredAttrRegistry(0)
  +    , fUndeclaredAttrRegistryNS(0)
   {
       try
       {
  @@ -150,6 +155,10 @@
       , fFieldActivator(0)
       , fDTDElemNonDeclPool(0)
       , fSchemaElemNonDeclPool(0)
  +    , fElemCount(0)
  +    , fAttDefRegistry(0)
  +    , fUndeclaredAttrRegistry(0)
  +    , fUndeclaredAttrRegistryNS(0)
   {
       try
       {	
  @@ -554,7 +563,19 @@
       fLocationPairs = new (fMemoryManager) ValueVectorOf<XMLCh*>(8, fMemoryManager);
       // create pools for undeclared elements
       fDTDElemNonDeclPool = new (fMemoryManager) NameIdPool<DTDElementDecl>(29, 128, fMemoryManager);
  -    fSchemaElemNonDeclPool = new (fMemoryManager) RefHash3KeysIdPool<SchemaElementDecl>(29, true, 128, fMemoryManager);
  +    fSchemaElemNonDeclPool = new (fMemoryManager) RefHash3KeysIdPool<SchemaElementDecl>(29, true, 128, fMemoryManager); 
  +    fAttDefRegistry = new (fMemoryManager) RefHashTableOf<unsigned int>
  +    (
  +        509, false, new (fMemoryManager)HashPtr(), fMemoryManager
  +    );
  +    fUndeclaredAttrRegistry = new (fMemoryManager) RefHashTableOf<unsigned int>
  +    (
  +        509, false, new (fMemoryManager)HashXMLCh(), fMemoryManager
  +    );
  +    fUndeclaredAttrRegistryNS = new (fMemoryManager) RefHash2KeysTableOf<unsigned int>
  +    (
  +        509, false, new (fMemoryManager)HashXMLCh(), fMemoryManager
  +    );
   }
   
   void IGXMLScanner::cleanUp()
  @@ -569,6 +590,9 @@
       delete fLocationPairs;
       delete fDTDElemNonDeclPool;
       delete fSchemaElemNonDeclPool;
  +    delete fAttDefRegistry;
  +    delete fUndeclaredAttrRegistry;
  +    delete fUndeclaredAttrRegistryNS;
   }
   
   // ---------------------------------------------------------------------------
  
  
  
  1.12      +20 -1     xml-xerces/c/src/xercesc/internal/IGXMLScanner.hpp
  
  Index: IGXMLScanner.hpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/IGXMLScanner.hpp,v
  retrieving revision 1.11
  retrieving revision 1.12
  diff -u -r1.11 -r1.12
  --- IGXMLScanner.hpp	22 Oct 2003 20:22:30 -0000	1.11
  +++ IGXMLScanner.hpp	24 Nov 2003 05:09:38 -0000	1.12
  @@ -56,6 +56,9 @@
   
   /*
    * $Log$
  + * Revision 1.12  2003/11/24 05:09:38  neilg
  + * implement new, statless, method for detecting duplicate attributes
  + *
    * Revision 1.11  2003/10/22 20:22:30  knoaman
    * Prepare for annotation support.
    *
  @@ -213,6 +216,7 @@
       bool normalizeAttValue
       (
           const   XMLAttDef* const    attDef
  +        , const XMLCh* const       name 
           , const XMLCh* const        value
           ,       XMLBuffer&          toFill
       );
  @@ -324,6 +328,17 @@
       //      registry of "faulted-in" DTD element decls
       // fSchemaElemNonDeclPool
       //      registry for elements without decls in the grammar
  +    // fElemCount
  +    //      count of the number of start tags seen so far (starts at 1).
  +    //      Used for duplicate attribute detection/processing of required/defaulted attributes
  +    // fAttDefRegistry
  +    //      mapping from XMLAttDef instances to the count of the last
  +    //      start tag where they were utilized.
  +    // fUndeclaredAttrRegistry
  +    //      mapping of attr QNames to the count of the last start tag in which they occurred
  +    // fUndeclaredAttrRegistryNS
  +    //      mapping of namespaceId/localName pairs to the count of the last
  +    //      start tag in which they occurred.
       //
       // -----------------------------------------------------------------------
       bool                        fSeeXsi;
  @@ -341,6 +356,10 @@
       ValueVectorOf<XMLCh*>*      fLocationPairs;
       NameIdPool<DTDElementDecl>* fDTDElemNonDeclPool;
       RefHash3KeysIdPool<SchemaElementDecl>* fSchemaElemNonDeclPool;
  +    unsigned int                            fElemCount;
  +    RefHashTableOf<unsigned int>*           fAttDefRegistry;
  +    RefHashTableOf<unsigned int>*           fUndeclaredAttrRegistry;
  +    RefHash2KeysTableOf<unsigned int>*      fUndeclaredAttrRegistryNS;
   };
   
   inline const XMLCh* IGXMLScanner::getName() const
  
  
  
  1.42      +157 -53   xml-xerces/c/src/xercesc/internal/IGXMLScanner2.cpp
  
  Index: IGXMLScanner2.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/IGXMLScanner2.cpp,v
  retrieving revision 1.41
  retrieving revision 1.42
  diff -u -r1.41 -r1.42
  --- IGXMLScanner2.cpp	21 Nov 2003 22:38:19 -0000	1.41
  +++ IGXMLScanner2.cpp	24 Nov 2003 05:09:38 -0000	1.42
  @@ -119,7 +119,8 @@
       //  that it owns, and to return us a boolean indicating whether it has
       //  any defs.  If schemas are being validated, the complexType
       // at the top of the SchemaValidator's stack will
  -    // know what's best.  REVISIT:  don't modify grammar at all...
  +    // know what's best.  REVISIT:  don't modify grammar at all; eliminate
  +    // this step...
       ComplexTypeInfo *currType = 0;
       if(fGrammar->getGrammarType() == Grammar::SchemaGrammarType)
           currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
  @@ -127,6 +128,9 @@
               ? currType->resetDefs()
               : elemDecl->resetDefs();
   
  +    // another set of attributes; increment element counter
  +    fElemCount++;
  +
       //  If there are no expliclitily provided attributes and there are no
       //  defined attributes for the element, the we don't have anything to do.
       //  So just return zero in this case.
  @@ -204,6 +208,7 @@
           //  If its not a special case namespace attr of some sort, then we
           //  do normal checking and processing.
           XMLAttDef::AttTypes attType;
  +        DatatypeValidator *attrValidator = 0;
           if (!isNSAttr || fGrammarType == Grammar::DTDGrammarType)
           {
               // Some checking for attribute wild card first (for schema)
  @@ -295,20 +300,82 @@
               //  Find this attribute within the parent element. We pass both
               //  the uriID/name and the raw QName buffer, since we don't know
               //  how the derived validator and its elements store attributes.
  -            bool wasAdded = false;
               if (!attDef) {
  -                attDef = elemDecl->findAttr
  -                (
  -                    curPair->getKey()
  -                    , uriId
  -                    , suffPtr
  -                    , prefPtr
  -                    , XMLElementDecl::AddIfNotFound
  -                    , wasAdded
  -                );
  +                if(fGrammarType == Grammar::SchemaGrammarType) 
  +                    attDef = ((SchemaElementDecl *)elemDecl)->getAttDef( suffPtr , uriId);
  +                else 
  +                    attDef = ((DTDElementDecl *)elemDecl)->getAttDef ( namePtr);
  +            } 
  +
  +            // now need to prepare for duplicate detection
  +            if(attDef)
  +            {
  +                unsigned int *curCountPtr = fAttDefRegistry->get(attDef);
  +                if(!curCountPtr)
  +                {
  +                    curCountPtr = getNewUIntPtr();
  +                    *curCountPtr = fElemCount;
  +                    fAttDefRegistry->put(attDef, curCountPtr);
  +                }
  +                else if(*curCountPtr < fElemCount)
  +                    *curCountPtr = fElemCount;
  +                else
  +                {
  +                    emitError
  +                    ( 
  +                        XMLErrs::AttrAlreadyUsedInSTag
  +                        , attDef->getFullName()
  +                        , elemDecl->getFullName()
  +                    );
  +                }
  +            }
  +            else
  +            {
  +                if(fGrammarType == Grammar::DTDGrammarType) 
  +                {
  +                    unsigned int *curCountPtr = fUndeclaredAttrRegistry->get(namePtr);
  +                    if(!curCountPtr)
  +                    {
  +                        curCountPtr = getNewUIntPtr();
  +                        *curCountPtr = fElemCount;
  +                        fUndeclaredAttrRegistry->put((void *)namePtr, curCountPtr);
  +                    }
  +                    else if(*curCountPtr < fElemCount)
  +                        *curCountPtr = fElemCount;
  +                    else
  +                    {
  +                        emitError
  +                        ( 
  +                            XMLErrs::AttrAlreadyUsedInSTag
  +                            , namePtr
  +                            , elemDecl->getFullName()
  +                        );
  +                    }
  +                }
  +                else // schema grammar
  +                {
  +                    unsigned int *curCountPtr = fUndeclaredAttrRegistryNS->get(suffPtr, uriId);
  +                    if(!curCountPtr)
  +                    {
  +                        curCountPtr = getNewUIntPtr();
  +                        *curCountPtr = fElemCount;
  +                        fUndeclaredAttrRegistryNS->put((void *)suffPtr, uriId, curCountPtr);
  +                    }
  +                    else if(*curCountPtr < fElemCount)
  +                        *curCountPtr = fElemCount;
  +                    else
  +                    {
  +                        emitError
  +                        ( 
  +                            XMLErrs::AttrAlreadyUsedInSTag
  +                            , namePtr
  +                            , elemDecl->getFullName()
  +                        );
  +                    }
  +                }
               }
   
  -            if(!skipThisOne && fGrammarType == Grammar::SchemaGrammarType) {
  +            if(!skipThisOne && fGrammarType == Grammar::SchemaGrammarType && attDef) {
                   //we may have set it to invalid already, but this is the first time we are guarenteed to have the attDef
                   if(((SchemaAttDef *)(attDef))->getValidity() != PSVIDefs::INVALID)             
                       ((SchemaAttDef *)(attDef))->setValidity(PSVIDefs::VALID);
  @@ -326,15 +393,7 @@
                   }
               }
   
  -            if (wasAdded)
  -            {
  -                // This is to tell the Validator that this attribute was
  -                // faulted-in, was not an attribute in the attdef originally
  -                attDef->setCreateReason(XMLAttDef::JustFaultIn);
  -            }
  -
  -            bool errorCondition = fValidate && !attDefForWildCard && 
  -                attDef->getCreateReason() == XMLAttDef::JustFaultIn && !attDef->getProvided();
  +            bool errorCondition = fValidate && !attDefForWildCard && !attDef;
               if (errorCondition && !skipThisOne && !laxThisOne)
               {
                   //
  @@ -360,7 +419,7 @@
                       , bufMsg.getRawBuffer()
                       , elemDecl->getFullName()
                   );
  -                if(fGrammarType == Grammar::SchemaGrammarType) {
  +                if(fGrammarType == Grammar::SchemaGrammarType && attDef) {
                       ((SchemaAttDef *)(attDef))->setValidity(PSVIDefs::INVALID);
                       if (getPSVIHandler())
                       {
  @@ -369,7 +428,7 @@
                       }
                   }
               }
  -            else if(errorCondition && laxThisOne && fGrammarType == Grammar::SchemaGrammarType) {
  +            else if(errorCondition && laxThisOne && fGrammarType == Grammar::SchemaGrammarType && attDef) {
                   ((SchemaAttDef *)(attDef))->setValidationAttempted(PSVIDefs::NONE);
                   ((SchemaAttDef *)(attDef))->setValidity(PSVIDefs::UNKNOWN);
                   if (getPSVIHandler())
  @@ -381,6 +440,7 @@
               }
   
   
  +            /**** REVISIT:  excise this dead code
               //  If its already provided, then there are more than one of
               //  this attribute in this start tag, so emit an error.
               if (attDef->getProvided())
  @@ -404,15 +464,18 @@
               {
                   attDef->setProvided(true);
               }
  +            ********/
   
               //  Now normalize the raw value since we have the attribute type. We
               //  don't care about the return status here. If it failed, an error
               //  was issued, which is all we care about.
               if (attDefForWildCard) {
  -                ((SchemaAttDef*)attDef)->setAnyDatatypeValidator(((SchemaAttDef*) attDefForWildCard)->getDatatypeValidator());
  +                if(attDef)
  +                    ((SchemaAttDef*)attDef)->setAnyDatatypeValidator(((SchemaAttDef*) attDefForWildCard)->getDatatypeValidator());
                   normalizeAttValue
                   (
                       attDefForWildCard
  +                    , namePtr
                       , curPair->getValue()
                       , normBuf
                   );
  @@ -440,28 +503,33 @@
                           , false
                           , elemDecl
                       );
  +                    attrValidator = ((SchemaValidator*)fValidator)->getMostRecentAttrValidator();
                   }
  +                else // no decl; default DOMTypeInfo to anySimpleType
  +                    attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE);
   
                   // Save the type for later use
                   attType = attDefForWildCard->getType();
  -                if(fGrammarType == Grammar::SchemaGrammarType) {
  +                if(fGrammarType == Grammar::SchemaGrammarType && attDef) 
  +                {
                       ((SchemaElementDecl *)(elemDecl))->updateValidityFromAttribute((SchemaAttDef *)attDef);
   
                       DatatypeValidator* tempDV = ((SchemaAttDef*) attDefForWildCard)->getDatatypeValidator();
  -                    if(tempDV && tempDV->getType() == DatatypeValidator::Union)
  -                        ((SchemaAttDef*)attDef)->setMembertypeValidator(((UnionDatatypeValidator *)tempDV)->getMemberTypeValidator());
  +                    if(tempDV && tempDV->getType() == DatatypeValidator::Union )
  +                        ((SchemaAttDef*)attDef)->setMembertypeValidator(attrValidator);
                   }
               }
               else {
                   normalizeAttValue
                   (
                       attDef
  +                    , namePtr
                       , curPair->getValue()
                       , normBuf
                   );
   
                   //  If we found an attdef for this one, then lets validate it.
  -                if (attDef->getCreateReason() != XMLAttDef::JustFaultIn)
  +                if (attDef)
                   {
                       if (fNormalizeData && (fGrammarType == Grammar::SchemaGrammarType))
                       {
  @@ -486,15 +554,25 @@
                               , false
                               , elemDecl
                           );
  +                        attrValidator = ((SchemaValidator*)fValidator)->getMostRecentAttrValidator();
                       }
  +                    else if(fGrammarType == Grammar::SchemaGrammarType)
  +                        attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE);
  +                }
  +                else // no attDef at all; default to anySimpleType
  +                {
  +                    if(fGrammarType == Grammar::SchemaGrammarType) 
  +                        attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE);
                   }
   
                   // Save the type for later use
  -                attType = attDef->getType();
  +                attType = (attDef)?attDef->getType():XMLAttDef::CData;
   
   
  -                if(fGrammarType == Grammar::SchemaGrammarType)
  +                if(fGrammarType == Grammar::SchemaGrammarType && attDef)
  +                {
                       ((SchemaElementDecl *)(elemDecl))->updateValidityFromAttribute((SchemaAttDef *)attDef);
  +                }
   
   
               }
  @@ -505,10 +583,13 @@
               attType = XMLAttDef::CData;
               normalizeAttRawValue
               (
  -                curPair->getKey()
  +                namePtr
                   , curPair->getValue()
                   , normBuf
               );
  +            if((uriId == fXMLNSNamespaceId)
  +                  || XMLString::equals(getURIText(uriId), SchemaSymbols::fgURI_XSI))
  +                attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYURI);
           }
   
           //  Add this attribute to the attribute list that we use to pass them
  @@ -545,6 +626,8 @@
                   , attType
                   , true
                   , fMemoryManager
  +                , attrValidator
  +                , (fGrammarType == Grammar::SchemaGrammarType )
               );
               toFill.addElement(curAttr);
           }
  @@ -558,10 +641,12 @@
                   , prefPtr
                   , normBuf.getRawBuffer()
                   , attType
  +                , attrValidator
  +                , (fGrammarType == Grammar::SchemaGrammarType )
               );
               curAttr->setSpecified(true);
           }
  -
  +            
           // Bump the count of attrs in the list
           retCount++;
       }
  @@ -584,9 +669,9 @@
               // Get the current att def, for convenience and its def type
               const XMLAttDef *curDef = &attDefList.getAttDef(i);
               const XMLAttDef::DefAttTypes defType = curDef->getDefaultType();
  -
  -            if (!curDef->getProvided())
  -            {
  +            unsigned int *attCountPtr = fAttDefRegistry->get((void *)curDef);
  +            if (!attCountPtr || *attCountPtr < fElemCount)
  +            { // did not occur
                   if(fGrammarType == Grammar::SchemaGrammarType) {
                       ((SchemaAttDef *)curDef)->setValidationAttempted(PSVIDefs::FULL);
                       ((SchemaAttDef *)curDef)->setValidity(PSVIDefs::VALID);
  @@ -598,7 +683,7 @@
                       }
                   }
   
  -                //the attributes is not provided
  +                //the attribute is not provided
                   if (fValidate)
                   {
                       // If we are validating and its required, then an error
  @@ -622,7 +707,7 @@
                           }
                       }
                       else if ((defType == XMLAttDef::Default) ||
  -                             (defType == XMLAttDef::Fixed)  )
  +                            (defType == XMLAttDef::Fixed)  )
                       {
                           if (fStandalone && curDef->isExternal())
                           {
  @@ -643,9 +728,8 @@
                   }
   
                   //  Fault in the value if needed, and bump the att count.
  -                //  We have to
                   if ((defType == XMLAttDef::Default)
  -                ||  (defType == XMLAttDef::Fixed))
  +                    ||  (defType == XMLAttDef::Fixed))
                   {
                       // Let the validator pass judgement on the attribute value
                       if (fValidate)
  @@ -691,7 +775,7 @@
                       ((SchemaElementDecl *)elemDecl)->updateValidityFromAttribute((SchemaAttDef *)curDef);
   
               }
  -            else
  +            else if(attCountPtr)
               {
                   //attribute is provided
                   // (schema) report error for PROHIBITED attrs that are present (V_TAGc)
  @@ -728,6 +812,7 @@
   //  are legal if escaped only. And some escape chars are not subject to
   //  normalization rules.
   bool IGXMLScanner::normalizeAttValue( const   XMLAttDef* const    attDef
  +                                      , const XMLCh* const        attName
                                         , const XMLCh* const        value
                                         ,       XMLBuffer&          toFill)
   {
  @@ -739,14 +824,18 @@
       };
   
       // Get the type and name
  -    const XMLAttDef::AttTypes type = attDef->getType();
  +    const XMLAttDef::AttTypes type = (attDef)
  +                    ?attDef->getType()
  +                    :XMLAttDef::CData;
   
       // Assume its going to go fine, and empty the target buffer in preperation
       bool retVal = true;
       toFill.reset();
   
       // Get attribute def - to check to see if it's declared externally or not
  -    bool  isAttExternal = attDef->isExternal();
  +    bool  isAttExternal = (attDef)
  +                ?attDef->isExternal()
  +                :false;
   
       //  Loop through the chars of the source value and normalize it according
       //  to the type.
  @@ -769,7 +858,7 @@
           //  not allowed in attribute values.
           if (!escaped && (*srcPtr == chOpenAngle))
           {
  -            emitError(XMLErrs::BracketInAttrValue, attDef->getFullName());
  +            emitError(XMLErrs::BracketInAttrValue, attName);
               retVal = false;
           }
   
  @@ -783,17 +872,18 @@
                       // XML 1.0, Section 2.9
                       if (fStandalone && fValidate && isAttExternal)
                       {
  -                        // Can't have a standalone document declaration of "yes" if  attribute
  -                        // values are subject to normalisation
  -                        fValidator->emitError(XMLValid::NoAttNormForStandalone, attDef->getFullName());
  -                        if(fGrammarType == Grammar::SchemaGrammarType) {
  -                            ((SchemaAttDef *)(attDef))->setValidity(PSVIDefs::INVALID);
  +                         // Can't have a standalone document declaration of "yes" if  attribute
  +                         // values are subject to normalisation
  +                         fValidator->emitError(XMLValid::NoAttNormForStandalone, attName);
  +                         if(fGrammarType == Grammar::SchemaGrammarType && attDef) {
  +                             ((SchemaAttDef *)(attDef))->setValidity(PSVIDefs::INVALID);
                               if (getPSVIHandler())
                               {
                                   // REVISIT:                                   
                                   // PSVIAttribute->setValidity(PSVIItem::VALIDITY_INVALID); 
                               }
  -                        }
  +                         }
  +
                       }
                       nextCh = chSpace;
                   }
  @@ -831,8 +921,8 @@
                           {
                               // Can't have a standalone document declaration of "yes" if  attribute
                               // values are subject to normalisation
  -                            fValidator->emitError(XMLValid::NoAttNormForStandalone, attDef->getFullName());
  -                            if(fGrammarType == Grammar::SchemaGrammarType) {
  +                            fValidator->emitError(XMLValid::NoAttNormForStandalone, attName);
  +                            if(fGrammarType == Grammar::SchemaGrammarType && attDef) {
                                   ((SchemaAttDef *)(attDef))->setValidity(PSVIDefs::INVALID);
                                   if (getPSVIHandler())
                                   {
  @@ -855,7 +945,7 @@
           srcPtr++;
       }
   
  -    if(fGrammarType == Grammar::SchemaGrammarType)
  +    if(fGrammarType == Grammar::SchemaGrammarType && attDef)
           ((SchemaElementDecl *)fElemStack.topElement()->fThisElement)->updateValidityFromAttribute((SchemaAttDef *)attDef);
   
       return retVal;
  @@ -1093,6 +1183,20 @@
       {
           fEntityExpansionLimit = fSecurityManager->getEntityExpansionLimit();
           fEntityExpansionCount = 0;
  +    }
  +    fElemCount = 0;
  +    if(fUIntPoolRowTotal >= 32) 
  +    { // 8 KB tied up with validating attributes...
  +        fAttDefRegistry->removeAll();
  +        fUndeclaredAttrRegistry->removeAll();
  +        fUndeclaredAttrRegistryNS->removeAll();
  +        recreateUIntPool();
  +    }
  +    else
  +    {
  +        // note that this will implicitly reset the values of the hashtables,
  +        // though their buckets will still be tied up
  +        resetUIntPool();
       }
   }
   
  
  
  
  1.31      +94 -57    xml-xerces/c/src/xercesc/internal/DGXMLScanner.cpp
  
  Index: DGXMLScanner.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/DGXMLScanner.cpp,v
  retrieving revision 1.30
  retrieving revision 1.31
  diff -u -r1.30 -r1.31
  --- DGXMLScanner.cpp	21 Nov 2003 14:46:56 -0000	1.30
  +++ DGXMLScanner.cpp	24 Nov 2003 05:09:38 -0000	1.31
  @@ -80,6 +80,7 @@
   #include <xercesc/validators/DTD/DTDValidator.hpp>
   #include <xercesc/util/OutOfMemoryException.hpp>
   #include <xercesc/util/XMLResourceIdentifier.hpp>
  +#include <xercesc/util/HashPtr.hpp>
   
   XERCES_CPP_NAMESPACE_BEGIN
   
  @@ -95,6 +96,9 @@
       , fDTDValidator(0)
       , fDTDGrammar(0)
       , fDTDElemNonDeclPool(0)
  +    , fElemCount(0)
  +    , fAttDefRegistry(0)
  +    , fUndeclaredAttrRegistry(0)
   {
       try
       {
  @@ -134,6 +138,9 @@
       , fDTDValidator(0)
       , fDTDGrammar(0)
       , fDTDElemNonDeclPool(0)
  +    , fElemCount(0)
  +    , fAttDefRegistry(0)
  +    , fUndeclaredAttrRegistry(0)
   {
       try
       {	
  @@ -1169,6 +1176,9 @@
       //  pairs until we get there.
       unsigned int    attCount = 0;
       unsigned int    curAttListSize = fAttrList->size();
  +    wasAdded = false;
  +    fElemCount++;
  +
       while (true)
       {
           // And get the next non-space character
  @@ -1253,37 +1263,57 @@
               //  See if this attribute is declared for this element. If we are
               //  not validating of course it will not be at first, but we will
               //  fault it into the pool (to avoid lots of redundant errors.)
  -            wasAdded = false;
  -            XMLAttDef* attDef = elemDecl->findAttr
  -            (
  -                fAttNameBuf.getRawBuffer()
  -                , 0
  -                , 0
  -                , 0
  -                , XMLElementDecl::AddIfNotFound
  -                , wasAdded
  -            );
  +            XMLAttDef* attDef = ((DTDElementDecl *)elemDecl)->getAttDef ( fAttNameBuf.getRawBuffer());
   
  -            if (fValidate)
  +            // now need to prepare for duplicate detection
  +            if(attDef)
               {
  -                if (wasAdded)
  +                unsigned int *curCountPtr = fAttDefRegistry->get(attDef);
  +                if(!curCountPtr)
                   {
  -                    // This is to tell the Validator that this attribute was
  -                    // faulted-in, was not an attribute in the attdef originally
  -                    attDef->setCreateReason(XMLAttDef::JustFaultIn);
  -
  -                    fValidator->emitError
  -                    (
  -                        XMLValid::AttNotDefinedForElement
  -                        , fAttNameBuf.getRawBuffer()
  -                        , qnameRawBuf
  +                    curCountPtr = getNewUIntPtr();
  +                    *curCountPtr = fElemCount;
  +                    fAttDefRegistry->put(attDef, curCountPtr);
  +                }
  +                else if(*curCountPtr < fElemCount)
  +                    *curCountPtr = fElemCount;
  +                else
  +                {
  +                    emitError
  +                    ( 
  +                        XMLErrs::AttrAlreadyUsedInSTag
  +                        , attDef->getFullName()
  +                        , elemDecl->getFullName()
  +                    );
  +                }
  +            }
  +            else
  +            {
  +                XMLCh * namePtr = fAttNameBuf.getRawBuffer();
  +                unsigned int *curCountPtr = fUndeclaredAttrRegistry->get(namePtr);
  +                if(!curCountPtr)
  +                {
  +                    curCountPtr = getNewUIntPtr();
  +                     *curCountPtr = fElemCount;
  +                    fUndeclaredAttrRegistry->put((void *)namePtr, curCountPtr);
  +                }
  +                else if(*curCountPtr < fElemCount)
  +                    *curCountPtr = fElemCount;
  +                else
  +                {
  +                    emitError
  +                    ( 
  +                        XMLErrs::AttrAlreadyUsedInSTag
  +                        , namePtr
  +                        , elemDecl->getFullName()
                       );
                   }
  -                // If this attribute was faulted-in and first occurence,
  -                // then emit an error
  -                else if (attDef->getCreateReason() == XMLAttDef::JustFaultIn
  -                         && !attDef->getProvided())
  +            }
  +            if (fValidate)
  +            {
  +                if (!attDef)
                   {
  +
                       fValidator->emitError
                       (
                           XMLValid::AttNotDefinedForElement
  @@ -1293,28 +1323,11 @@
                   }
               }
   
  -            //  If its already provided, then there are more than one of
  -            //  this attribute in this start tag, so emit an error.
  -            if (attDef->getProvided())
  -            {
  -                emitError
  -                (
  -                    XMLErrs::AttrAlreadyUsedInSTag
  -                    , attDef->getFullName()
  -                    , qnameRawBuf
  -                );
  -            }
  -            else
  -            {
  -                // Mark this one as already seen
  -                attDef->setProvided(true);
  -            }
  -
               //  Skip any whitespace before the value and then scan the att
               //  value. This will come back normalized with entity refs and
               //  char refs expanded.
               fReaderMgr.skipPastSpaces();
  -            if (!scanAttValue(attDef, fAttValueBuf))
  +            if (!scanAttValue(attDef, fAttNameBuf.getRawBuffer(), fAttValueBuf))
               {
                   static const XMLCh tmpList[] =
                   {
  @@ -1352,7 +1365,7 @@
               //  determine if it has a valid value. It will output any needed
               //  errors, but we just keep going. We only need to do this if
               //  we are validating.
  -            if (!wasAdded && attDef->getCreateReason() != XMLAttDef::JustFaultIn)
  +            if (attDef)
               {
                   // Let the validator pass judgement on the attribute value
                   if (fValidate)
  @@ -1403,7 +1416,7 @@
                           fEmptyNamespaceId
                           , fAttNameBuf.getRawBuffer()
                           , fAttValueBuf.getRawBuffer()
  -                        , attDef->getType()
  +                        , (attDef)?attDef->getType():XMLAttDef::CData
                           , true
                           , fMemoryManager
                       );
  @@ -1416,7 +1429,7 @@
                           , fAttNameBuf.getRawBuffer()
                           , XMLUni::fgZeroLenString
                           , fAttValueBuf.getRawBuffer()
  -                        , attDef->getType()
  +                        , (attDef)?attDef->getType():XMLAttDef::CData
                           , true
                           , fMemoryManager
                       );
  @@ -1434,7 +1447,7 @@
                           fEmptyNamespaceId
                           , fAttNameBuf.getRawBuffer()
                           , fAttValueBuf.getRawBuffer()
  -                        , attDef->getType()
  +                        , (attDef)?attDef->getType():XMLAttDef::CData
                       );
                   }
                   else
  @@ -1445,7 +1458,7 @@
                           , fAttNameBuf.getRawBuffer()
                           , XMLUni::fgZeroLenString
                           , fAttValueBuf.getRawBuffer()
  -                        , attDef->getType()
  +                        , (attDef)?attDef->getType():XMLAttDef::CData
                       );
                   }
                   curAtt->setSpecified(true);
  @@ -1870,6 +1883,14 @@
       fDTDValidator = new (fMemoryManager) DTDValidator();
       initValidator(fDTDValidator);
       fDTDElemNonDeclPool = new (fMemoryManager) NameIdPool<DTDElementDecl>(29, 128, fMemoryManager);
  +    fAttDefRegistry = new (fMemoryManager) RefHashTableOf<unsigned int>
  +    (
  +        509, false, new (fMemoryManager)HashPtr(), fMemoryManager
  +    );
  +    fUndeclaredAttrRegistry = new (fMemoryManager) RefHashTableOf<unsigned int>
  +    (
  +        509, false, new (fMemoryManager)HashXMLCh(), fMemoryManager
  +    );
   }
   
   void DGXMLScanner::cleanUp()
  @@ -1877,6 +1898,8 @@
       delete fAttrNSList;
       delete fDTDValidator;
       delete fDTDElemNonDeclPool;
  +    delete fAttDefRegistry;
  +    delete fUndeclaredAttrRegistry;
   }
   
   
  @@ -1921,8 +1944,9 @@
               // Get the current att def, for convenience and its def type
               XMLAttDef& curDef = attDefList.getAttDef(i);
   
  -            if (!curDef.getProvided() && curDef.getCreateReason() != XMLAttDef::JustFaultIn)
  -            {
  +            unsigned int *attCountPtr = fAttDefRegistry->get(&curDef);
  +            if (!attCountPtr || *attCountPtr < fElemCount)
  +            { // did not occur
                   const XMLAttDef::DefAttTypes defType = curDef.getDefaultType();
   
                   if (fValidate)
  @@ -2039,9 +2063,6 @@
                       retCount++;
                   }
               }
  -            else {
  -                curDef.setProvided(false);
  -            }
           }
       }
   
  @@ -2182,6 +2203,18 @@
           fEntityExpansionLimit = fSecurityManager->getEntityExpansionLimit();
           fEntityExpansionCount = 0;
       }
  +    if(fUIntPoolRowTotal >= 32) 
  +    { // 8 KB tied up with validating attributes...
  +        fAttDefRegistry->removeAll();
  +        fUndeclaredAttrRegistry->removeAll();
  +        recreateUIntPool();
  +    }
  +    else
  +    {
  +        // note that this will implicitly reset the values of the hashtables,
  +        // though their buckets will still be tied up
  +        resetUIntPool();
  +    }
   }
   
   
  @@ -2440,6 +2473,7 @@
   //  DGXMLScanner: Private parsing methods
   // ---------------------------------------------------------------------------
   bool DGXMLScanner::scanAttValue(  const   XMLAttDef* const    attDef
  +                                  , const XMLCh *const attrName
                                     ,       XMLBuffer&          toFill)
   {
       enum States
  @@ -2449,8 +2483,9 @@
       };
   
       // Get the type and name
  -    const XMLAttDef::AttTypes type = attDef->getType();
  -    const XMLCh* const attrName = attDef->getFullName();
  +    const XMLAttDef::AttTypes type = (attDef)
  +                        ?attDef->getType()
  +                        :XMLAttDef::CData;
   
       // Reset the target buffer
       toFill.reset();
  @@ -2465,7 +2500,9 @@
       const unsigned int curReader = fReaderMgr.getCurrentReaderNum();
   
       // Get attribute def - to check to see if it's declared externally or not
  -    bool  isAttExternal = attDef->isExternal();
  +    bool  isAttExternal = (attDef)
  +                        ?attDef->isExternal()
  +                        :false;
   
       //  Loop until we get the attribute value. Note that we use a double
       //  loop here to avoid the setup/teardown overhead of the exception
  
  
  
  1.11      +16 -1     xml-xerces/c/src/xercesc/internal/DGXMLScanner.hpp
  
  Index: DGXMLScanner.hpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/DGXMLScanner.hpp,v
  retrieving revision 1.10
  retrieving revision 1.11
  diff -u -r1.10 -r1.11
  --- DGXMLScanner.hpp	22 Oct 2003 20:22:30 -0000	1.10
  +++ DGXMLScanner.hpp	24 Nov 2003 05:09:39 -0000	1.11
  @@ -56,6 +56,9 @@
   
   /*
    * $Log$
  + * Revision 1.11  2003/11/24 05:09:39  neilg
  + * implement new, statless, method for detecting duplicate attributes
  + *
    * Revision 1.10  2003/10/22 20:22:30  knoaman
    * Prepare for annotation support.
    *
  @@ -210,6 +213,7 @@
       bool scanAttValue
       (
           const   XMLAttDef* const    attDef
  +        , const XMLCh *const        attrName
           ,       XMLBuffer&          toFill
       );
       bool scanContent(const bool extEntity);
  @@ -240,12 +244,23 @@
       //
       // fDTDElemNonDeclPool
       //     registry of "faulted-in" DTD element decls
  +    // fElemCount
  +    //      count of the number of start tags seen so far (starts at 1).
  +    //      Used for duplicate attribute detection/processing of required/defaulted attributes
  +    // fAttDefRegistry
  +    //      mapping from XMLAttDef instances to the count of the last
  +    //      start tag where they were utilized.
  +    // fUndeclaredAttrRegistry
  +    //      mapping of attr QNames to the count of the last start tag in which they occurred
       //
       // -----------------------------------------------------------------------
       ValueVectorOf<XMLAttr*>*    fAttrNSList;
       DTDValidator*               fDTDValidator;
       DTDGrammar*                 fDTDGrammar;
       NameIdPool<DTDElementDecl>* fDTDElemNonDeclPool;
  +    unsigned int                fElemCount;
  +    RefHashTableOf<unsigned int>* fAttDefRegistry;
  +    RefHashTableOf<unsigned int>* fUndeclaredAttrRegistry;
   };
   
   inline const XMLCh* DGXMLScanner::getName() const
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: xerces-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: xerces-cvs-help@xml.apache.org