You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by ne...@apache.org on 2003/11/24 06:09:39 UTC
cvs commit: xml-xerces/c/src/xercesc/internal XMLScanner.cpp XMLScanner.hpp SGXMLScanner.cpp SGXMLScanner.hpp IGXMLScanner.cpp IGXMLScanner.hpp IGXMLScanner2.cpp DGXMLScanner.cpp DGXMLScanner.hpp
neilg 2003/11/23 21:09:39
Modified: c/src/xercesc/internal XMLScanner.cpp XMLScanner.hpp
SGXMLScanner.cpp SGXMLScanner.hpp IGXMLScanner.cpp
IGXMLScanner.hpp IGXMLScanner2.cpp DGXMLScanner.cpp
DGXMLScanner.hpp
Log:
implement new, statless, method for detecting duplicate attributes
Revision Changes Path
1.54 +82 -1 xml-xerces/c/src/xercesc/internal/XMLScanner.cpp
Index: XMLScanner.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/XMLScanner.cpp,v
retrieving revision 1.53
retrieving revision 1.54
diff -u -r1.53 -r1.54
--- XMLScanner.cpp 13 Nov 2003 15:00:44 -0000 1.53
+++ XMLScanner.cpp 24 Nov 2003 05:09:38 -0000 1.54
@@ -224,6 +224,10 @@
, fPrefixBuf(1023, manager)
, fURIBuf(1023, manager)
, fElemStack(manager)
+ , fUIntPool(0)
+ , fUIntPoolRow(0)
+ , fUIntPoolCol(0)
+ , fUIntPoolRowTotal(2)
{
commonInit();
@@ -295,6 +299,10 @@
, fPrefixBuf(1023, manager)
, fURIBuf(1023, manager)
, fElemStack(manager)
+ , fUIntPool(0)
+ , fUIntPoolRow(0)
+ , fUIntPoolCol(0)
+ , fUIntPoolRowTotal(2)
{
commonInit();
@@ -311,6 +319,12 @@
fMemoryManager->deallocate(fRootElemName);//delete [] fRootElemName;
fMemoryManager->deallocate(fExternalSchemaLocation);//delete [] fExternalSchemaLocation;
fMemoryManager->deallocate(fExternalNoNamespaceSchemaLocation);//delete [] fExternalNoNamespaceSchemaLocation;
+ // delete fUIntPool
+ for (unsigned int i=0; i<=fUIntPoolRow; i++)
+ {
+ fMemoryManager->deallocate(fUIntPool[i]);
+ }
+ fMemoryManager->deallocate(fUIntPool);
}
@@ -710,6 +724,8 @@
setExternalSchemaLocation(refScanner->getExternalSchemaLocation());
setExternalNoNamespaceSchemaLocation(refScanner->getExternalNoNamespaceSchemaLocation());
setValidationScheme(refScanner->getValidationScheme());
+ setSecurityManager(refScanner->getSecurityManager());
+ setPSVIHandler(refScanner->getPSVIHandler());
}
// ---------------------------------------------------------------------------
@@ -740,6 +756,12 @@
// Create the GrammarResolver
//fGrammarResolver = new GrammarResolver();
+
+ // create initial, 64-element, fUIntPool
+ fUIntPool = (unsigned int **)fMemoryManager->allocate(sizeof(unsigned int *) *fUIntPoolRowTotal);
+ fUIntPool[0] = (unsigned int *)fMemoryManager->allocate(sizeof(unsigned int) << 6);
+ memset(fUIntPool[0], 0, sizeof(unsigned int) << 6);
+ fUIntPool[1] = 0;
}
@@ -2126,6 +2148,65 @@
{
fReaderMgr.getUpToCharOrWS(toFill, chEndChar);
return toFill.getLen();
+}
+
+unsigned int *XMLScanner::getNewUIntPtr()
+{
+ // this method hands back a new pointer initialized to 0
+ unsigned int *retVal;
+ if(fUIntPoolCol < 64)
+ {
+ retVal = fUIntPool[fUIntPoolRow]+fUIntPoolCol;
+ fUIntPoolCol++;
+ return retVal;
+ }
+ // time to grow the pool...
+ if(fUIntPoolRow+1 == fUIntPoolRowTotal)
+ {
+ // and time to add some space for new rows:
+ fUIntPoolRowTotal <<= 1;
+ unsigned int **newArray = (unsigned int **)fMemoryManager->allocate(sizeof(unsigned int *) * fUIntPoolRowTotal );
+ memcpy(newArray, fUIntPool, (fUIntPoolRow+1) * sizeof(unsigned int *));
+ fMemoryManager->deallocate(fUIntPool);
+ fUIntPool = newArray;
+ // need to 0 out new elements we won't need:
+ for (unsigned int i=fUIntPoolRow+2; i<fUIntPoolRowTotal; i++)
+ fUIntPool[i] = 0;
+ }
+ // now to add a new row; we just made sure we have space
+ fUIntPoolRow++;
+ fUIntPool[fUIntPoolRow] = (unsigned int *)fMemoryManager->allocate(sizeof(unsigned int) << 6);
+ memset(fUIntPool[fUIntPoolRow], 0, sizeof(unsigned int) << 6);
+ // point to next element
+ fUIntPoolCol = 1;
+ return fUIntPool[fUIntPoolRow];
+}
+
+void XMLScanner::resetUIntPool()
+{
+ // to reuse the unsigned int pool--and the hashtables that use it--
+ // simply reinitialize everything to 0's
+ for(unsigned int i = 0; i<= fUIntPoolRow; i++)
+ memset(fUIntPool[i], 0, sizeof(unsigned int) << 6);
+}
+
+void XMLScanner::recreateUIntPool()
+{
+ // this allows a bloated unsigned int pool to be dispensed with
+
+ // first, delete old fUIntPool
+ for (unsigned int i=0; i<=fUIntPoolRow; i++)
+ {
+ fMemoryManager->deallocate(fUIntPool[i]);
+ }
+ fMemoryManager->deallocate(fUIntPool);
+
+ fUIntPoolRow = fUIntPoolCol = 0;
+ fUIntPoolRowTotal = 2;
+ fUIntPool = (unsigned int **)fMemoryManager->allocate(sizeof(unsigned int *) * fUIntPoolRowTotal);
+ fUIntPool[0] = (unsigned int *)fMemoryManager->allocate(sizeof(unsigned int) << 6);
+ memset(fUIntPool[fUIntPoolRow], 0, sizeof(unsigned int) << 6);
+ fUIntPool[1] = 0;
}
XERCES_CPP_NAMESPACE_END
1.28 +22 -2 xml-xerces/c/src/xercesc/internal/XMLScanner.hpp
Index: XMLScanner.hpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/XMLScanner.hpp,v
retrieving revision 1.27
retrieving revision 1.28
diff -u -r1.27 -r1.28
--- XMLScanner.hpp 13 Nov 2003 15:00:44 -0000 1.27
+++ XMLScanner.hpp 24 Nov 2003 05:09:38 -0000 1.28
@@ -56,6 +56,9 @@
/*
* $Log$
+ * Revision 1.28 2003/11/24 05:09:38 neilg
+ * implement new, statless, method for detecting duplicate attributes
+ *
* Revision 1.27 2003/11/13 15:00:44 peiyongz
* Solve Compilation/Linkage error on AIX/Solaris/HP/Linux
*
@@ -705,6 +708,9 @@
XMLTokens senseNextToken(unsigned int& orgReader);
void initValidator(XMLValidator* theValidator);
inline void resetValidationContext();
+ unsigned int *getNewUIntPtr();
+ void resetUIntPool();
+ void recreateUIntPool();
// -----------------------------------------------------------------------
// Data members
@@ -714,7 +720,7 @@
// the document handler the attributes found. To make it more
// efficient we keep this ref vector of XMLAttr objects around. We
// just reuse it over and over, allowing it to grow to meet the
- // peek need.
+ // peak need.
//
// fBufMgr
// This is a manager for temporary buffers used during scanning.
@@ -909,6 +915,15 @@
//
// fXMLVersion
// Enum to indicate if the main doc is XML 1.1 or XML 1.0 conformant
+ // fUIntPool
+ // pool of unsigned integers to help with duplicate attribute
+ // detection and filling in default/fixed attributes
+ // fUIntPoolRow
+ // current row in fUIntPool
+ // fUIntPoolCol
+ // current column i row
+ // fUIntPoolRowTotal
+ // total number of rows in table
//
// fMemoryManager
// Pluggable memory manager for dynamic allocation/deallocation.
@@ -937,6 +952,10 @@
unsigned int fXMLNamespaceId;
unsigned int fXMLNSNamespaceId;
unsigned int fSchemaNamespaceId;
+ unsigned int ** fUIntPool;
+ unsigned int fUIntPoolRow;
+ unsigned int fUIntPoolCol;
+ unsigned int fUIntPoolRowTotal;
XMLUInt32 fScannerId;
XMLUInt32 fSequenceId;
RefVectorOf<XMLAttr>* fAttrList;
@@ -1448,3 +1467,4 @@
XERCES_CPP_NAMESPACE_END
#endif
+
1.50 +136 -43 xml-xerces/c/src/xercesc/internal/SGXMLScanner.cpp
Index: SGXMLScanner.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/SGXMLScanner.cpp,v
retrieving revision 1.49
retrieving revision 1.50
diff -u -r1.49 -r1.50
--- SGXMLScanner.cpp 21 Nov 2003 22:38:19 -0000 1.49
+++ SGXMLScanner.cpp 24 Nov 2003 05:09:38 -0000 1.50
@@ -86,6 +86,7 @@
#include <xercesc/validators/schema/identity/ValueStore.hpp>
#include <xercesc/util/OutOfMemoryException.hpp>
#include <xercesc/util/XMLResourceIdentifier.hpp>
+#include <xercesc/util/HashPtr.hpp>
XERCES_CPP_NAMESPACE_BEGIN
@@ -110,6 +111,9 @@
, fValueStoreCache(0)
, fFieldActivator(0)
, fElemNonDeclPool(0)
+ , fElemCount(0)
+ , fAttDefRegistry(0)
+ , fUndeclaredAttrRegistryNS(0)
{
try
{
@@ -156,6 +160,9 @@
, fValueStoreCache(0)
, fFieldActivator(0)
, fElemNonDeclPool(0)
+ , fElemCount(0)
+ , fAttDefRegistry(0)
+ , fUndeclaredAttrRegistryNS(0)
{
try
{
@@ -2015,6 +2022,14 @@
fEntityTable->put((void*) XMLUni::fgQuot, chDoubleQuote);
fEntityTable->put((void*) XMLUni::fgApos, chSingleQuote);
fElemNonDeclPool = new (fMemoryManager) RefHash3KeysIdPool<SchemaElementDecl>(29, true, 128, fMemoryManager);
+ fAttDefRegistry = new (fMemoryManager) RefHashTableOf<unsigned int>
+ (
+ 509, false, new (fMemoryManager)HashPtr(), fMemoryManager
+ );
+ fUndeclaredAttrRegistryNS = new (fMemoryManager) RefHash2KeysTableOf<unsigned int>
+ (
+ 509, false, new (fMemoryManager)HashXMLCh(), fMemoryManager
+ );
}
void SGXMLScanner::cleanUp()
@@ -2028,6 +2043,8 @@
delete fMatcherStack;
delete fValueStoreCache;
delete fElemNonDeclPool;
+ delete fAttDefRegistry;
+ delete fUndeclaredAttrRegistryNS;
}
void SGXMLScanner::resizeElemState() {
@@ -2072,6 +2089,8 @@
? currType->resetDefs()
: elemDecl->resetDefs();
+ fElemCount++;
+
// If there are no expliclitily provided attributes and there are no
// defined attributes for the element, the we don't have anything to do.
// So just return zero in this case.
@@ -2149,6 +2168,7 @@
// If its not a special case namespace attr of some sort, then we
// do normal checking and processing.
XMLAttDef::AttTypes attType;
+ DatatypeValidator *attrValidator = 0;
if (!isNSAttr)
{
// Some checking for attribute wild card first (for schema)
@@ -2233,20 +2253,54 @@
// Find this attribute within the parent element. We pass both
// the uriID/name and the raw QName buffer, since we don't know
// how the derived validator and its elements store attributes.
- bool wasAdded = false;
if (!attDef) {
- attDef = elemDecl->findAttr
- (
- curPair->getKey()
- , uriId
- , suffPtr
- , prefPtr
- , XMLElementDecl::AddIfNotFound
- , wasAdded
- );
+ attDef = ((SchemaElementDecl *)elemDecl)->getAttDef(suffPtr, uriId);
}
- if(!skipThisOne && fGrammarType == Grammar::SchemaGrammarType) {
+ // now need to prepare for duplicate detection
+ if(attDef)
+ {
+ unsigned int *curCountPtr = fAttDefRegistry->get(attDef);
+ if(!curCountPtr)
+ {
+ curCountPtr = getNewUIntPtr();
+ *curCountPtr = fElemCount;
+ fAttDefRegistry->put(attDef, curCountPtr);
+ }
+ else if(*curCountPtr < fElemCount)
+ *curCountPtr = fElemCount;
+ else
+ {
+ emitError
+ (
+ XMLErrs::AttrAlreadyUsedInSTag
+ , attDef->getFullName()
+ , elemDecl->getFullName()
+ );
+ }
+ }
+ else
+ {
+ unsigned int *curCountPtr = fUndeclaredAttrRegistryNS->get(suffPtr, uriId);
+ if(!curCountPtr)
+ {
+ curCountPtr = getNewUIntPtr();
+ *curCountPtr = fElemCount;
+ fUndeclaredAttrRegistryNS->put((void *)suffPtr, uriId, curCountPtr);
+ }
+ else if(*curCountPtr < fElemCount)
+ *curCountPtr = fElemCount;
+ else
+ {
+ emitError
+ (
+ XMLErrs::AttrAlreadyUsedInSTag
+ , namePtr
+ , elemDecl->getFullName()
+ );
+ }
+ }
+ if(!skipThisOne && fGrammarType == Grammar::SchemaGrammarType && attDef) {
//we may have set it to invalid already, but this is the first time we are guarenteed to have the attDef
if(((SchemaAttDef *)(attDef))->getValidity() != PSVIDefs::INVALID)
{
@@ -2266,15 +2320,7 @@
}
}
- if (wasAdded)
- {
- // This is to tell the Validator that this attribute was
- // faulted-in, was not an attribute in the attdef originally
- attDef->setCreateReason(XMLAttDef::JustFaultIn);
- }
-
- bool errorCondition = fValidate && !attDefForWildCard &&
- attDef->getCreateReason() == XMLAttDef::JustFaultIn && !attDef->getProvided();
+ bool errorCondition = fValidate && !attDefForWildCard && !attDef;
if (errorCondition && !skipThisOne && !laxThisOne)
{
//
@@ -2300,14 +2346,15 @@
, bufMsg.getRawBuffer()
, elemDecl->getFullName()
);
- ((SchemaAttDef *)attDef)->setValidity(PSVIDefs::INVALID);
+ if(attDef)
+ ((SchemaAttDef *)attDef)->setValidity(PSVIDefs::INVALID);
if (getPSVIHandler())
{
// REVISIT:
// PSVIAttribute->setValidity(PSVIItem::VALIDITY_INVALID);
}
}
- else if(errorCondition && laxThisOne) {
+ else if(errorCondition && laxThisOne && attDef) {
((SchemaAttDef *)(attDef))->setValidationAttempted(PSVIDefs::NONE);
((SchemaAttDef *)(attDef))->setValidity(PSVIDefs::UNKNOWN);
if (getPSVIHandler())
@@ -2319,6 +2366,7 @@
}
+ /**** REVISIT: excise this dead code
// If its already provided, then there are more than one of
// this attribute in this start tag, so emit an error.
if (attDef->getProvided())
@@ -2341,15 +2389,18 @@
{
attDef->setProvided(true);
}
+ *******/
// Now normalize the raw value since we have the attribute type. We
// don't care about the return status here. If it failed, an error
// was issued, which is all we care about.
if (attDefForWildCard) {
- ((SchemaAttDef*)attDef)->setAnyDatatypeValidator(((SchemaAttDef*) attDefForWildCard)->getDatatypeValidator());
+ if(attDef)
+ ((SchemaAttDef*)attDef)->setAnyDatatypeValidator(((SchemaAttDef*) attDefForWildCard)->getDatatypeValidator());
normalizeAttValue
(
attDefForWildCard
+ , namePtr
, curPair->getValue()
, normBuf
);
@@ -2377,26 +2428,32 @@
, false
, elemDecl
);
+ attrValidator = ((SchemaValidator *)fValidator)->getMostRecentAttrValidator();
}
+ else // no decl; default DOMTypeInfo to anySimpleType
+ attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE);
+
// Save the type for later use
attType = attDefForWildCard->getType();
- ((SchemaElementDecl *)(elemDecl))->updateValidityFromAttribute((SchemaAttDef *)attDef);
+ if(attDef)
+ ((SchemaElementDecl *)(elemDecl))->updateValidityFromAttribute((SchemaAttDef *)attDef);
DatatypeValidator* tempDV = ((SchemaAttDef*) attDefForWildCard)->getDatatypeValidator();
- if(tempDV && tempDV->getType() == DatatypeValidator::Union)
- ((SchemaAttDef*)attDef)->setMembertypeValidator(((UnionDatatypeValidator *)tempDV)->getMemberTypeValidator());
+ if(tempDV && tempDV->getType() == DatatypeValidator::Union && attDef)
+ ((SchemaAttDef*)attDef)->setMembertypeValidator(attrValidator);
}
else {
normalizeAttValue
(
attDef
+ , namePtr
, curPair->getValue()
, normBuf
);
// If we found an attdef for this one, then lets validate it.
- if (attDef->getCreateReason() != XMLAttDef::JustFaultIn)
+ if (attDef)
{
if (fNormalizeData && (fGrammarType == Grammar::SchemaGrammarType))
{
@@ -2421,12 +2478,20 @@
, false
, elemDecl
);
+ attrValidator = ((SchemaValidator *)fValidator)->getMostRecentAttrValidator();
}
+ else
+ attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE);
}
+ else
+ attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE);
// Save the type for later use
- attType = attDef->getType();
- ((SchemaElementDecl *)(elemDecl))->updateValidityFromAttribute((SchemaAttDef *)attDef);
+ attType = (attDef)?attDef->getType():XMLAttDef::CData;
+ if(attDef)
+ {
+ ((SchemaElementDecl *)(elemDecl))->updateValidityFromAttribute((SchemaAttDef *)attDef);
+ }
}
}
else
@@ -2435,10 +2500,13 @@
attType = XMLAttDef::CData;
normalizeAttRawValue
(
- curPair->getKey()
+ namePtr
, curPair->getValue()
, normBuf
);
+ if((uriId == fXMLNSNamespaceId)
+ || XMLString::equals(getURIText(uriId), SchemaSymbols::fgURI_XSI))
+ attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYURI);
}
@@ -2457,6 +2525,8 @@
, attType
, true
, fMemoryManager
+ , attrValidator
+ , true
);
toFill.addElement(curAttr);
}
@@ -2470,6 +2540,8 @@
, prefPtr
, normBuf.getRawBuffer()
, attType
+ , attrValidator
+ , true
);
curAttr->setSpecified(true);
}
@@ -2496,8 +2568,9 @@
XMLAttDef *curDef = &attDefList.getAttDef(i);
const XMLAttDef::DefAttTypes defType = curDef->getDefaultType();
- if (!curDef->getProvided())
- {
+ unsigned int *attCountPtr = fAttDefRegistry->get(curDef);
+ if (!attCountPtr || *attCountPtr < fElemCount)
+ { // did not occur
((SchemaAttDef *)curDef)->setValidationAttempted(PSVIDefs::FULL);
((SchemaAttDef *)curDef)->setValidity(PSVIDefs::VALID);
if (getPSVIHandler())
@@ -2507,7 +2580,7 @@
// PSVIAttribute->setValidity(PSVIItem::VALIDITY_VALID);
}
- //the attributes is not provided
+ //the attribute is not provided
if (fValidate)
{
// If we are validating and its required, then an error
@@ -2546,9 +2619,8 @@
}
// Fault in the value if needed, and bump the att count.
- // We have to
if ((defType == XMLAttDef::Default)
- || (defType == XMLAttDef::Fixed))
+ || (defType == XMLAttDef::Fixed))
{
// Let the validator pass judgement on the attribute value
if (fValidate)
@@ -2582,7 +2654,7 @@
((SchemaElementDecl *)elemDecl)->updateValidityFromAttribute((SchemaAttDef *)curDef);
}
- else
+ else if (attCountPtr)
{
//attribute is provided
// (schema) report error for PROHIBITED attrs that are present (V_TAGc)
@@ -2617,6 +2689,7 @@
// are legal if escaped only. And some escape chars are not subject to
// normalization rules.
bool SGXMLScanner::normalizeAttValue( const XMLAttDef* const attDef
+ , const XMLCh* const attrName
, const XMLCh* const value
, XMLBuffer& toFill)
{
@@ -2628,15 +2701,18 @@
};
// Get the type and name
- const XMLAttDef::AttTypes type = attDef->getType();
- const XMLCh* const attrName = attDef->getFullName();
+ const XMLAttDef::AttTypes type = (attDef)
+ ?attDef->getType()
+ :XMLAttDef::CData;
// Assume its going to go fine, and empty the target buffer in preperation
bool retVal = true;
toFill.reset();
// Get attribute def - to check to see if it's declared externally or not
- bool isAttExternal = attDef->isExternal();
+ bool isAttExternal = (attDef)
+ ?attDef->isExternal()
+ :false;
// Loop through the chars of the source value and normalize it according
// to the type.
@@ -2676,7 +2752,8 @@
// Can't have a standalone document declaration of "yes" if attribute
// values are subject to normalisation
fValidator->emitError(XMLValid::NoAttNormForStandalone, attrName);
- ((SchemaAttDef *)attDef)->setValidity(PSVIDefs::INVALID);
+ if(attDef)
+ ((SchemaAttDef *)attDef)->setValidity(PSVIDefs::INVALID);
if (getPSVIHandler())
{
// REVISIT:
@@ -2720,7 +2797,8 @@
// Can't have a standalone document declaration of "yes" if attribute
// values are subject to normalisation
fValidator->emitError(XMLValid::NoAttNormForStandalone, attrName);
- ((SchemaAttDef *)attDef)->setValidity(PSVIDefs::INVALID);
+ if(attDef)
+ ((SchemaAttDef *)attDef)->setValidity(PSVIDefs::INVALID);
if (getPSVIHandler())
{
// REVISIT:
@@ -2741,7 +2819,8 @@
srcPtr++;
}
- ((SchemaElementDecl *)fElemStack.topElement()->fThisElement)->updateValidityFromAttribute((SchemaAttDef *)attDef);
+ if(attDef)
+ ((SchemaElementDecl *)fElemStack.topElement()->fThisElement)->updateValidityFromAttribute((SchemaAttDef *)attDef);
return retVal;
}
@@ -2957,6 +3036,20 @@
fEntityExpansionLimit = fSecurityManager->getEntityExpansionLimit();
fEntityExpansionCount = 0;
}
+ fElemCount = 0;
+ if(fUIntPoolRowTotal >= 32)
+ { // 8 KB tied up with validating attributes...
+ fAttDefRegistry->removeAll();
+ fUndeclaredAttrRegistryNS->removeAll();
+ recreateUIntPool();
+ }
+ else
+ {
+ // note that this will implicitly reset the values of the hashtables,
+ // though their buckets will still be tied up
+ resetUIntPool();
+ }
+
}
1.11 +18 -1 xml-xerces/c/src/xercesc/internal/SGXMLScanner.hpp
Index: SGXMLScanner.hpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/SGXMLScanner.hpp,v
retrieving revision 1.10
retrieving revision 1.11
diff -u -r1.10 -r1.11
--- SGXMLScanner.hpp 22 Oct 2003 20:22:30 -0000 1.10
+++ SGXMLScanner.hpp 24 Nov 2003 05:09:38 -0000 1.11
@@ -56,6 +56,9 @@
/*
* $Log$
+ * Revision 1.11 2003/11/24 05:09:38 neilg
+ * implement new, statless, method for detecting duplicate attributes
+ *
* Revision 1.10 2003/10/22 20:22:30 knoaman
* Prepare for annotation support.
*
@@ -207,6 +210,7 @@
bool normalizeAttValue
(
const XMLAttDef* const attDef
+ , const XMLCh* const attrName
, const XMLCh* const value
, XMLBuffer& toFill
);
@@ -310,6 +314,16 @@
// its xpath.
// fElemNonDeclPool
// registry for elements without decls in the grammar
+ // fElemCount
+ // count of the number of start tags seen so far (starts at 1).
+ // Used for duplicate attribute detection/processing of required/defaulted attributes
+ // fAttDefRegistry
+ // mapping from XMLAttDef instances to the count of the last
+ // start tag where they were utilized.
+ // fUndeclaredAttrRegistryNS
+ // mapping of namespaceId/localName pairs to the count of the last
+ // start tag in which they occurred.
+ //
//
// -----------------------------------------------------------------------
bool fSeeXsi;
@@ -325,6 +339,9 @@
ValueStoreCache* fValueStoreCache;
FieldActivator* fFieldActivator;
RefHash3KeysIdPool<SchemaElementDecl>* fElemNonDeclPool;
+ unsigned int fElemCount;
+ RefHashTableOf<unsigned int>* fAttDefRegistry;
+ RefHash2KeysTableOf<unsigned int>* fUndeclaredAttrRegistryNS;
};
inline const XMLCh* SGXMLScanner::getName() const
1.33 +26 -2 xml-xerces/c/src/xercesc/internal/IGXMLScanner.cpp
Index: IGXMLScanner.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/IGXMLScanner.cpp,v
retrieving revision 1.32
retrieving revision 1.33
diff -u -r1.32 -r1.33
--- IGXMLScanner.cpp 21 Nov 2003 22:38:19 -0000 1.32
+++ IGXMLScanner.cpp 24 Nov 2003 05:09:38 -0000 1.33
@@ -64,6 +64,7 @@
// ---------------------------------------------------------------------------
#include <xercesc/internal/IGXMLScanner.hpp>
#include <xercesc/util/RuntimeException.hpp>
+#include <xercesc/util/HashPtr.hpp>
#include <xercesc/util/UnexpectedEOFException.hpp>
#include <xercesc/sax/InputSource.hpp>
#include <xercesc/framework/XMLDocumentHandler.hpp>
@@ -108,6 +109,10 @@
, fFieldActivator(0)
, fDTDElemNonDeclPool(0)
, fSchemaElemNonDeclPool(0)
+ , fElemCount(0)
+ , fAttDefRegistry(0)
+ , fUndeclaredAttrRegistry(0)
+ , fUndeclaredAttrRegistryNS(0)
{
try
{
@@ -150,6 +155,10 @@
, fFieldActivator(0)
, fDTDElemNonDeclPool(0)
, fSchemaElemNonDeclPool(0)
+ , fElemCount(0)
+ , fAttDefRegistry(0)
+ , fUndeclaredAttrRegistry(0)
+ , fUndeclaredAttrRegistryNS(0)
{
try
{
@@ -554,7 +563,19 @@
fLocationPairs = new (fMemoryManager) ValueVectorOf<XMLCh*>(8, fMemoryManager);
// create pools for undeclared elements
fDTDElemNonDeclPool = new (fMemoryManager) NameIdPool<DTDElementDecl>(29, 128, fMemoryManager);
- fSchemaElemNonDeclPool = new (fMemoryManager) RefHash3KeysIdPool<SchemaElementDecl>(29, true, 128, fMemoryManager);
+ fSchemaElemNonDeclPool = new (fMemoryManager) RefHash3KeysIdPool<SchemaElementDecl>(29, true, 128, fMemoryManager);
+ fAttDefRegistry = new (fMemoryManager) RefHashTableOf<unsigned int>
+ (
+ 509, false, new (fMemoryManager)HashPtr(), fMemoryManager
+ );
+ fUndeclaredAttrRegistry = new (fMemoryManager) RefHashTableOf<unsigned int>
+ (
+ 509, false, new (fMemoryManager)HashXMLCh(), fMemoryManager
+ );
+ fUndeclaredAttrRegistryNS = new (fMemoryManager) RefHash2KeysTableOf<unsigned int>
+ (
+ 509, false, new (fMemoryManager)HashXMLCh(), fMemoryManager
+ );
}
void IGXMLScanner::cleanUp()
@@ -569,6 +590,9 @@
delete fLocationPairs;
delete fDTDElemNonDeclPool;
delete fSchemaElemNonDeclPool;
+ delete fAttDefRegistry;
+ delete fUndeclaredAttrRegistry;
+ delete fUndeclaredAttrRegistryNS;
}
// ---------------------------------------------------------------------------
1.12 +20 -1 xml-xerces/c/src/xercesc/internal/IGXMLScanner.hpp
Index: IGXMLScanner.hpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/IGXMLScanner.hpp,v
retrieving revision 1.11
retrieving revision 1.12
diff -u -r1.11 -r1.12
--- IGXMLScanner.hpp 22 Oct 2003 20:22:30 -0000 1.11
+++ IGXMLScanner.hpp 24 Nov 2003 05:09:38 -0000 1.12
@@ -56,6 +56,9 @@
/*
* $Log$
+ * Revision 1.12 2003/11/24 05:09:38 neilg
+ * implement new, statless, method for detecting duplicate attributes
+ *
* Revision 1.11 2003/10/22 20:22:30 knoaman
* Prepare for annotation support.
*
@@ -213,6 +216,7 @@
bool normalizeAttValue
(
const XMLAttDef* const attDef
+ , const XMLCh* const name
, const XMLCh* const value
, XMLBuffer& toFill
);
@@ -324,6 +328,17 @@
// registry of "faulted-in" DTD element decls
// fSchemaElemNonDeclPool
// registry for elements without decls in the grammar
+ // fElemCount
+ // count of the number of start tags seen so far (starts at 1).
+ // Used for duplicate attribute detection/processing of required/defaulted attributes
+ // fAttDefRegistry
+ // mapping from XMLAttDef instances to the count of the last
+ // start tag where they were utilized.
+ // fUndeclaredAttrRegistry
+ // mapping of attr QNames to the count of the last start tag in which they occurred
+ // fUndeclaredAttrRegistryNS
+ // mapping of namespaceId/localName pairs to the count of the last
+ // start tag in which they occurred.
//
// -----------------------------------------------------------------------
bool fSeeXsi;
@@ -341,6 +356,10 @@
ValueVectorOf<XMLCh*>* fLocationPairs;
NameIdPool<DTDElementDecl>* fDTDElemNonDeclPool;
RefHash3KeysIdPool<SchemaElementDecl>* fSchemaElemNonDeclPool;
+ unsigned int fElemCount;
+ RefHashTableOf<unsigned int>* fAttDefRegistry;
+ RefHashTableOf<unsigned int>* fUndeclaredAttrRegistry;
+ RefHash2KeysTableOf<unsigned int>* fUndeclaredAttrRegistryNS;
};
inline const XMLCh* IGXMLScanner::getName() const
1.42 +157 -53 xml-xerces/c/src/xercesc/internal/IGXMLScanner2.cpp
Index: IGXMLScanner2.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/IGXMLScanner2.cpp,v
retrieving revision 1.41
retrieving revision 1.42
diff -u -r1.41 -r1.42
--- IGXMLScanner2.cpp 21 Nov 2003 22:38:19 -0000 1.41
+++ IGXMLScanner2.cpp 24 Nov 2003 05:09:38 -0000 1.42
@@ -119,7 +119,8 @@
// that it owns, and to return us a boolean indicating whether it has
// any defs. If schemas are being validated, the complexType
// at the top of the SchemaValidator's stack will
- // know what's best. REVISIT: don't modify grammar at all...
+ // know what's best. REVISIT: don't modify grammar at all; eliminate
+ // this step...
ComplexTypeInfo *currType = 0;
if(fGrammar->getGrammarType() == Grammar::SchemaGrammarType)
currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
@@ -127,6 +128,9 @@
? currType->resetDefs()
: elemDecl->resetDefs();
+ // another set of attributes; increment element counter
+ fElemCount++;
+
// If there are no expliclitily provided attributes and there are no
// defined attributes for the element, the we don't have anything to do.
// So just return zero in this case.
@@ -204,6 +208,7 @@
// If its not a special case namespace attr of some sort, then we
// do normal checking and processing.
XMLAttDef::AttTypes attType;
+ DatatypeValidator *attrValidator = 0;
if (!isNSAttr || fGrammarType == Grammar::DTDGrammarType)
{
// Some checking for attribute wild card first (for schema)
@@ -295,20 +300,82 @@
// Find this attribute within the parent element. We pass both
// the uriID/name and the raw QName buffer, since we don't know
// how the derived validator and its elements store attributes.
- bool wasAdded = false;
if (!attDef) {
- attDef = elemDecl->findAttr
- (
- curPair->getKey()
- , uriId
- , suffPtr
- , prefPtr
- , XMLElementDecl::AddIfNotFound
- , wasAdded
- );
+ if(fGrammarType == Grammar::SchemaGrammarType)
+ attDef = ((SchemaElementDecl *)elemDecl)->getAttDef( suffPtr , uriId);
+ else
+ attDef = ((DTDElementDecl *)elemDecl)->getAttDef ( namePtr);
+ }
+
+ // now need to prepare for duplicate detection
+ if(attDef)
+ {
+ unsigned int *curCountPtr = fAttDefRegistry->get(attDef);
+ if(!curCountPtr)
+ {
+ curCountPtr = getNewUIntPtr();
+ *curCountPtr = fElemCount;
+ fAttDefRegistry->put(attDef, curCountPtr);
+ }
+ else if(*curCountPtr < fElemCount)
+ *curCountPtr = fElemCount;
+ else
+ {
+ emitError
+ (
+ XMLErrs::AttrAlreadyUsedInSTag
+ , attDef->getFullName()
+ , elemDecl->getFullName()
+ );
+ }
+ }
+ else
+ {
+ if(fGrammarType == Grammar::DTDGrammarType)
+ {
+ unsigned int *curCountPtr = fUndeclaredAttrRegistry->get(namePtr);
+ if(!curCountPtr)
+ {
+ curCountPtr = getNewUIntPtr();
+ *curCountPtr = fElemCount;
+ fUndeclaredAttrRegistry->put((void *)namePtr, curCountPtr);
+ }
+ else if(*curCountPtr < fElemCount)
+ *curCountPtr = fElemCount;
+ else
+ {
+ emitError
+ (
+ XMLErrs::AttrAlreadyUsedInSTag
+ , namePtr
+ , elemDecl->getFullName()
+ );
+ }
+ }
+ else // schema grammar
+ {
+ unsigned int *curCountPtr = fUndeclaredAttrRegistryNS->get(suffPtr, uriId);
+ if(!curCountPtr)
+ {
+ curCountPtr = getNewUIntPtr();
+ *curCountPtr = fElemCount;
+ fUndeclaredAttrRegistryNS->put((void *)suffPtr, uriId, curCountPtr);
+ }
+ else if(*curCountPtr < fElemCount)
+ *curCountPtr = fElemCount;
+ else
+ {
+ emitError
+ (
+ XMLErrs::AttrAlreadyUsedInSTag
+ , namePtr
+ , elemDecl->getFullName()
+ );
+ }
+ }
}
- if(!skipThisOne && fGrammarType == Grammar::SchemaGrammarType) {
+ if(!skipThisOne && fGrammarType == Grammar::SchemaGrammarType && attDef) {
//we may have set it to invalid already, but this is the first time we are guarenteed to have the attDef
if(((SchemaAttDef *)(attDef))->getValidity() != PSVIDefs::INVALID)
((SchemaAttDef *)(attDef))->setValidity(PSVIDefs::VALID);
@@ -326,15 +393,7 @@
}
}
- if (wasAdded)
- {
- // This is to tell the Validator that this attribute was
- // faulted-in, was not an attribute in the attdef originally
- attDef->setCreateReason(XMLAttDef::JustFaultIn);
- }
-
- bool errorCondition = fValidate && !attDefForWildCard &&
- attDef->getCreateReason() == XMLAttDef::JustFaultIn && !attDef->getProvided();
+ bool errorCondition = fValidate && !attDefForWildCard && !attDef;
if (errorCondition && !skipThisOne && !laxThisOne)
{
//
@@ -360,7 +419,7 @@
, bufMsg.getRawBuffer()
, elemDecl->getFullName()
);
- if(fGrammarType == Grammar::SchemaGrammarType) {
+ if(fGrammarType == Grammar::SchemaGrammarType && attDef) {
((SchemaAttDef *)(attDef))->setValidity(PSVIDefs::INVALID);
if (getPSVIHandler())
{
@@ -369,7 +428,7 @@
}
}
}
- else if(errorCondition && laxThisOne && fGrammarType == Grammar::SchemaGrammarType) {
+ else if(errorCondition && laxThisOne && fGrammarType == Grammar::SchemaGrammarType && attDef) {
((SchemaAttDef *)(attDef))->setValidationAttempted(PSVIDefs::NONE);
((SchemaAttDef *)(attDef))->setValidity(PSVIDefs::UNKNOWN);
if (getPSVIHandler())
@@ -381,6 +440,7 @@
}
+ /**** REVISIT: excise this dead code
// If its already provided, then there are more than one of
// this attribute in this start tag, so emit an error.
if (attDef->getProvided())
@@ -404,15 +464,18 @@
{
attDef->setProvided(true);
}
+ ********/
// Now normalize the raw value since we have the attribute type. We
// don't care about the return status here. If it failed, an error
// was issued, which is all we care about.
if (attDefForWildCard) {
- ((SchemaAttDef*)attDef)->setAnyDatatypeValidator(((SchemaAttDef*) attDefForWildCard)->getDatatypeValidator());
+ if(attDef)
+ ((SchemaAttDef*)attDef)->setAnyDatatypeValidator(((SchemaAttDef*) attDefForWildCard)->getDatatypeValidator());
normalizeAttValue
(
attDefForWildCard
+ , namePtr
, curPair->getValue()
, normBuf
);
@@ -440,28 +503,33 @@
, false
, elemDecl
);
+ attrValidator = ((SchemaValidator*)fValidator)->getMostRecentAttrValidator();
}
+ else // no decl; default DOMTypeInfo to anySimpleType
+ attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE);
// Save the type for later use
attType = attDefForWildCard->getType();
- if(fGrammarType == Grammar::SchemaGrammarType) {
+ if(fGrammarType == Grammar::SchemaGrammarType && attDef)
+ {
((SchemaElementDecl *)(elemDecl))->updateValidityFromAttribute((SchemaAttDef *)attDef);
DatatypeValidator* tempDV = ((SchemaAttDef*) attDefForWildCard)->getDatatypeValidator();
- if(tempDV && tempDV->getType() == DatatypeValidator::Union)
- ((SchemaAttDef*)attDef)->setMembertypeValidator(((UnionDatatypeValidator *)tempDV)->getMemberTypeValidator());
+ if(tempDV && tempDV->getType() == DatatypeValidator::Union )
+ ((SchemaAttDef*)attDef)->setMembertypeValidator(attrValidator);
}
}
else {
normalizeAttValue
(
attDef
+ , namePtr
, curPair->getValue()
, normBuf
);
// If we found an attdef for this one, then lets validate it.
- if (attDef->getCreateReason() != XMLAttDef::JustFaultIn)
+ if (attDef)
{
if (fNormalizeData && (fGrammarType == Grammar::SchemaGrammarType))
{
@@ -486,15 +554,25 @@
, false
, elemDecl
);
+ attrValidator = ((SchemaValidator*)fValidator)->getMostRecentAttrValidator();
}
+ else if(fGrammarType == Grammar::SchemaGrammarType)
+ attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE);
+ }
+ else // no attDef at all; default to anySimpleType
+ {
+ if(fGrammarType == Grammar::SchemaGrammarType)
+ attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE);
}
// Save the type for later use
- attType = attDef->getType();
+ attType = (attDef)?attDef->getType():XMLAttDef::CData;
- if(fGrammarType == Grammar::SchemaGrammarType)
+ if(fGrammarType == Grammar::SchemaGrammarType && attDef)
+ {
((SchemaElementDecl *)(elemDecl))->updateValidityFromAttribute((SchemaAttDef *)attDef);
+ }
}
@@ -505,10 +583,13 @@
attType = XMLAttDef::CData;
normalizeAttRawValue
(
- curPair->getKey()
+ namePtr
, curPair->getValue()
, normBuf
);
+ if((uriId == fXMLNSNamespaceId)
+ || XMLString::equals(getURIText(uriId), SchemaSymbols::fgURI_XSI))
+ attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYURI);
}
// Add this attribute to the attribute list that we use to pass them
@@ -545,6 +626,8 @@
, attType
, true
, fMemoryManager
+ , attrValidator
+ , (fGrammarType == Grammar::SchemaGrammarType )
);
toFill.addElement(curAttr);
}
@@ -558,10 +641,12 @@
, prefPtr
, normBuf.getRawBuffer()
, attType
+ , attrValidator
+ , (fGrammarType == Grammar::SchemaGrammarType )
);
curAttr->setSpecified(true);
}
-
+
// Bump the count of attrs in the list
retCount++;
}
@@ -584,9 +669,9 @@
// Get the current att def, for convenience and its def type
const XMLAttDef *curDef = &attDefList.getAttDef(i);
const XMLAttDef::DefAttTypes defType = curDef->getDefaultType();
-
- if (!curDef->getProvided())
- {
+ unsigned int *attCountPtr = fAttDefRegistry->get((void *)curDef);
+ if (!attCountPtr || *attCountPtr < fElemCount)
+ { // did not occur
if(fGrammarType == Grammar::SchemaGrammarType) {
((SchemaAttDef *)curDef)->setValidationAttempted(PSVIDefs::FULL);
((SchemaAttDef *)curDef)->setValidity(PSVIDefs::VALID);
@@ -598,7 +683,7 @@
}
}
- //the attributes is not provided
+ //the attribute is not provided
if (fValidate)
{
// If we are validating and its required, then an error
@@ -622,7 +707,7 @@
}
}
else if ((defType == XMLAttDef::Default) ||
- (defType == XMLAttDef::Fixed) )
+ (defType == XMLAttDef::Fixed) )
{
if (fStandalone && curDef->isExternal())
{
@@ -643,9 +728,8 @@
}
// Fault in the value if needed, and bump the att count.
- // We have to
if ((defType == XMLAttDef::Default)
- || (defType == XMLAttDef::Fixed))
+ || (defType == XMLAttDef::Fixed))
{
// Let the validator pass judgement on the attribute value
if (fValidate)
@@ -691,7 +775,7 @@
((SchemaElementDecl *)elemDecl)->updateValidityFromAttribute((SchemaAttDef *)curDef);
}
- else
+ else if(attCountPtr)
{
//attribute is provided
// (schema) report error for PROHIBITED attrs that are present (V_TAGc)
@@ -728,6 +812,7 @@
// are legal if escaped only. And some escape chars are not subject to
// normalization rules.
bool IGXMLScanner::normalizeAttValue( const XMLAttDef* const attDef
+ , const XMLCh* const attName
, const XMLCh* const value
, XMLBuffer& toFill)
{
@@ -739,14 +824,18 @@
};
// Get the type and name
- const XMLAttDef::AttTypes type = attDef->getType();
+ const XMLAttDef::AttTypes type = (attDef)
+ ?attDef->getType()
+ :XMLAttDef::CData;
// Assume its going to go fine, and empty the target buffer in preperation
bool retVal = true;
toFill.reset();
// Get attribute def - to check to see if it's declared externally or not
- bool isAttExternal = attDef->isExternal();
+ bool isAttExternal = (attDef)
+ ?attDef->isExternal()
+ :false;
// Loop through the chars of the source value and normalize it according
// to the type.
@@ -769,7 +858,7 @@
// not allowed in attribute values.
if (!escaped && (*srcPtr == chOpenAngle))
{
- emitError(XMLErrs::BracketInAttrValue, attDef->getFullName());
+ emitError(XMLErrs::BracketInAttrValue, attName);
retVal = false;
}
@@ -783,17 +872,18 @@
// XML 1.0, Section 2.9
if (fStandalone && fValidate && isAttExternal)
{
- // Can't have a standalone document declaration of "yes" if attribute
- // values are subject to normalisation
- fValidator->emitError(XMLValid::NoAttNormForStandalone, attDef->getFullName());
- if(fGrammarType == Grammar::SchemaGrammarType) {
- ((SchemaAttDef *)(attDef))->setValidity(PSVIDefs::INVALID);
+ // Can't have a standalone document declaration of "yes" if attribute
+ // values are subject to normalisation
+ fValidator->emitError(XMLValid::NoAttNormForStandalone, attName);
+ if(fGrammarType == Grammar::SchemaGrammarType && attDef) {
+ ((SchemaAttDef *)(attDef))->setValidity(PSVIDefs::INVALID);
if (getPSVIHandler())
{
// REVISIT:
// PSVIAttribute->setValidity(PSVIItem::VALIDITY_INVALID);
}
- }
+ }
+
}
nextCh = chSpace;
}
@@ -831,8 +921,8 @@
{
// Can't have a standalone document declaration of "yes" if attribute
// values are subject to normalisation
- fValidator->emitError(XMLValid::NoAttNormForStandalone, attDef->getFullName());
- if(fGrammarType == Grammar::SchemaGrammarType) {
+ fValidator->emitError(XMLValid::NoAttNormForStandalone, attName);
+ if(fGrammarType == Grammar::SchemaGrammarType && attDef) {
((SchemaAttDef *)(attDef))->setValidity(PSVIDefs::INVALID);
if (getPSVIHandler())
{
@@ -855,7 +945,7 @@
srcPtr++;
}
- if(fGrammarType == Grammar::SchemaGrammarType)
+ if(fGrammarType == Grammar::SchemaGrammarType && attDef)
((SchemaElementDecl *)fElemStack.topElement()->fThisElement)->updateValidityFromAttribute((SchemaAttDef *)attDef);
return retVal;
@@ -1093,6 +1183,20 @@
{
fEntityExpansionLimit = fSecurityManager->getEntityExpansionLimit();
fEntityExpansionCount = 0;
+ }
+ fElemCount = 0;
+ if(fUIntPoolRowTotal >= 32)
+ { // 8 KB tied up with validating attributes...
+ fAttDefRegistry->removeAll();
+ fUndeclaredAttrRegistry->removeAll();
+ fUndeclaredAttrRegistryNS->removeAll();
+ recreateUIntPool();
+ }
+ else
+ {
+ // note that this will implicitly reset the values of the hashtables,
+ // though their buckets will still be tied up
+ resetUIntPool();
}
}
1.31 +94 -57 xml-xerces/c/src/xercesc/internal/DGXMLScanner.cpp
Index: DGXMLScanner.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/DGXMLScanner.cpp,v
retrieving revision 1.30
retrieving revision 1.31
diff -u -r1.30 -r1.31
--- DGXMLScanner.cpp 21 Nov 2003 14:46:56 -0000 1.30
+++ DGXMLScanner.cpp 24 Nov 2003 05:09:38 -0000 1.31
@@ -80,6 +80,7 @@
#include <xercesc/validators/DTD/DTDValidator.hpp>
#include <xercesc/util/OutOfMemoryException.hpp>
#include <xercesc/util/XMLResourceIdentifier.hpp>
+#include <xercesc/util/HashPtr.hpp>
XERCES_CPP_NAMESPACE_BEGIN
@@ -95,6 +96,9 @@
, fDTDValidator(0)
, fDTDGrammar(0)
, fDTDElemNonDeclPool(0)
+ , fElemCount(0)
+ , fAttDefRegistry(0)
+ , fUndeclaredAttrRegistry(0)
{
try
{
@@ -134,6 +138,9 @@
, fDTDValidator(0)
, fDTDGrammar(0)
, fDTDElemNonDeclPool(0)
+ , fElemCount(0)
+ , fAttDefRegistry(0)
+ , fUndeclaredAttrRegistry(0)
{
try
{
@@ -1169,6 +1176,9 @@
// pairs until we get there.
unsigned int attCount = 0;
unsigned int curAttListSize = fAttrList->size();
+ wasAdded = false;
+ fElemCount++;
+
while (true)
{
// And get the next non-space character
@@ -1253,37 +1263,57 @@
// See if this attribute is declared for this element. If we are
// not validating of course it will not be at first, but we will
// fault it into the pool (to avoid lots of redundant errors.)
- wasAdded = false;
- XMLAttDef* attDef = elemDecl->findAttr
- (
- fAttNameBuf.getRawBuffer()
- , 0
- , 0
- , 0
- , XMLElementDecl::AddIfNotFound
- , wasAdded
- );
+ XMLAttDef* attDef = ((DTDElementDecl *)elemDecl)->getAttDef ( fAttNameBuf.getRawBuffer());
- if (fValidate)
+ // now need to prepare for duplicate detection
+ if(attDef)
{
- if (wasAdded)
+ unsigned int *curCountPtr = fAttDefRegistry->get(attDef);
+ if(!curCountPtr)
{
- // This is to tell the Validator that this attribute was
- // faulted-in, was not an attribute in the attdef originally
- attDef->setCreateReason(XMLAttDef::JustFaultIn);
-
- fValidator->emitError
- (
- XMLValid::AttNotDefinedForElement
- , fAttNameBuf.getRawBuffer()
- , qnameRawBuf
+ curCountPtr = getNewUIntPtr();
+ *curCountPtr = fElemCount;
+ fAttDefRegistry->put(attDef, curCountPtr);
+ }
+ else if(*curCountPtr < fElemCount)
+ *curCountPtr = fElemCount;
+ else
+ {
+ emitError
+ (
+ XMLErrs::AttrAlreadyUsedInSTag
+ , attDef->getFullName()
+ , elemDecl->getFullName()
+ );
+ }
+ }
+ else
+ {
+ XMLCh * namePtr = fAttNameBuf.getRawBuffer();
+ unsigned int *curCountPtr = fUndeclaredAttrRegistry->get(namePtr);
+ if(!curCountPtr)
+ {
+ curCountPtr = getNewUIntPtr();
+ *curCountPtr = fElemCount;
+ fUndeclaredAttrRegistry->put((void *)namePtr, curCountPtr);
+ }
+ else if(*curCountPtr < fElemCount)
+ *curCountPtr = fElemCount;
+ else
+ {
+ emitError
+ (
+ XMLErrs::AttrAlreadyUsedInSTag
+ , namePtr
+ , elemDecl->getFullName()
);
}
- // If this attribute was faulted-in and first occurence,
- // then emit an error
- else if (attDef->getCreateReason() == XMLAttDef::JustFaultIn
- && !attDef->getProvided())
+ }
+ if (fValidate)
+ {
+ if (!attDef)
{
+
fValidator->emitError
(
XMLValid::AttNotDefinedForElement
@@ -1293,28 +1323,11 @@
}
}
- // If its already provided, then there are more than one of
- // this attribute in this start tag, so emit an error.
- if (attDef->getProvided())
- {
- emitError
- (
- XMLErrs::AttrAlreadyUsedInSTag
- , attDef->getFullName()
- , qnameRawBuf
- );
- }
- else
- {
- // Mark this one as already seen
- attDef->setProvided(true);
- }
-
// Skip any whitespace before the value and then scan the att
// value. This will come back normalized with entity refs and
// char refs expanded.
fReaderMgr.skipPastSpaces();
- if (!scanAttValue(attDef, fAttValueBuf))
+ if (!scanAttValue(attDef, fAttNameBuf.getRawBuffer(), fAttValueBuf))
{
static const XMLCh tmpList[] =
{
@@ -1352,7 +1365,7 @@
// determine if it has a valid value. It will output any needed
// errors, but we just keep going. We only need to do this if
// we are validating.
- if (!wasAdded && attDef->getCreateReason() != XMLAttDef::JustFaultIn)
+ if (attDef)
{
// Let the validator pass judgement on the attribute value
if (fValidate)
@@ -1403,7 +1416,7 @@
fEmptyNamespaceId
, fAttNameBuf.getRawBuffer()
, fAttValueBuf.getRawBuffer()
- , attDef->getType()
+ , (attDef)?attDef->getType():XMLAttDef::CData
, true
, fMemoryManager
);
@@ -1416,7 +1429,7 @@
, fAttNameBuf.getRawBuffer()
, XMLUni::fgZeroLenString
, fAttValueBuf.getRawBuffer()
- , attDef->getType()
+ , (attDef)?attDef->getType():XMLAttDef::CData
, true
, fMemoryManager
);
@@ -1434,7 +1447,7 @@
fEmptyNamespaceId
, fAttNameBuf.getRawBuffer()
, fAttValueBuf.getRawBuffer()
- , attDef->getType()
+ , (attDef)?attDef->getType():XMLAttDef::CData
);
}
else
@@ -1445,7 +1458,7 @@
, fAttNameBuf.getRawBuffer()
, XMLUni::fgZeroLenString
, fAttValueBuf.getRawBuffer()
- , attDef->getType()
+ , (attDef)?attDef->getType():XMLAttDef::CData
);
}
curAtt->setSpecified(true);
@@ -1870,6 +1883,14 @@
fDTDValidator = new (fMemoryManager) DTDValidator();
initValidator(fDTDValidator);
fDTDElemNonDeclPool = new (fMemoryManager) NameIdPool<DTDElementDecl>(29, 128, fMemoryManager);
+ fAttDefRegistry = new (fMemoryManager) RefHashTableOf<unsigned int>
+ (
+ 509, false, new (fMemoryManager)HashPtr(), fMemoryManager
+ );
+ fUndeclaredAttrRegistry = new (fMemoryManager) RefHashTableOf<unsigned int>
+ (
+ 509, false, new (fMemoryManager)HashXMLCh(), fMemoryManager
+ );
}
void DGXMLScanner::cleanUp()
@@ -1877,6 +1898,8 @@
delete fAttrNSList;
delete fDTDValidator;
delete fDTDElemNonDeclPool;
+ delete fAttDefRegistry;
+ delete fUndeclaredAttrRegistry;
}
@@ -1921,8 +1944,9 @@
// Get the current att def, for convenience and its def type
XMLAttDef& curDef = attDefList.getAttDef(i);
- if (!curDef.getProvided() && curDef.getCreateReason() != XMLAttDef::JustFaultIn)
- {
+ unsigned int *attCountPtr = fAttDefRegistry->get(&curDef);
+ if (!attCountPtr || *attCountPtr < fElemCount)
+ { // did not occur
const XMLAttDef::DefAttTypes defType = curDef.getDefaultType();
if (fValidate)
@@ -2039,9 +2063,6 @@
retCount++;
}
}
- else {
- curDef.setProvided(false);
- }
}
}
@@ -2182,6 +2203,18 @@
fEntityExpansionLimit = fSecurityManager->getEntityExpansionLimit();
fEntityExpansionCount = 0;
}
+ if(fUIntPoolRowTotal >= 32)
+ { // 8 KB tied up with validating attributes...
+ fAttDefRegistry->removeAll();
+ fUndeclaredAttrRegistry->removeAll();
+ recreateUIntPool();
+ }
+ else
+ {
+ // note that this will implicitly reset the values of the hashtables,
+ // though their buckets will still be tied up
+ resetUIntPool();
+ }
}
@@ -2440,6 +2473,7 @@
// DGXMLScanner: Private parsing methods
// ---------------------------------------------------------------------------
bool DGXMLScanner::scanAttValue( const XMLAttDef* const attDef
+ , const XMLCh *const attrName
, XMLBuffer& toFill)
{
enum States
@@ -2449,8 +2483,9 @@
};
// Get the type and name
- const XMLAttDef::AttTypes type = attDef->getType();
- const XMLCh* const attrName = attDef->getFullName();
+ const XMLAttDef::AttTypes type = (attDef)
+ ?attDef->getType()
+ :XMLAttDef::CData;
// Reset the target buffer
toFill.reset();
@@ -2465,7 +2500,9 @@
const unsigned int curReader = fReaderMgr.getCurrentReaderNum();
// Get attribute def - to check to see if it's declared externally or not
- bool isAttExternal = attDef->isExternal();
+ bool isAttExternal = (attDef)
+ ?attDef->isExternal()
+ :false;
// Loop until we get the attribute value. Note that we use a double
// loop here to avoid the setup/teardown overhead of the exception
1.11 +16 -1 xml-xerces/c/src/xercesc/internal/DGXMLScanner.hpp
Index: DGXMLScanner.hpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/DGXMLScanner.hpp,v
retrieving revision 1.10
retrieving revision 1.11
diff -u -r1.10 -r1.11
--- DGXMLScanner.hpp 22 Oct 2003 20:22:30 -0000 1.10
+++ DGXMLScanner.hpp 24 Nov 2003 05:09:39 -0000 1.11
@@ -56,6 +56,9 @@
/*
* $Log$
+ * Revision 1.11 2003/11/24 05:09:39 neilg
+ * implement new, statless, method for detecting duplicate attributes
+ *
* Revision 1.10 2003/10/22 20:22:30 knoaman
* Prepare for annotation support.
*
@@ -210,6 +213,7 @@
bool scanAttValue
(
const XMLAttDef* const attDef
+ , const XMLCh *const attrName
, XMLBuffer& toFill
);
bool scanContent(const bool extEntity);
@@ -240,12 +244,23 @@
//
// fDTDElemNonDeclPool
// registry of "faulted-in" DTD element decls
+ // fElemCount
+ // count of the number of start tags seen so far (starts at 1).
+ // Used for duplicate attribute detection/processing of required/defaulted attributes
+ // fAttDefRegistry
+ // mapping from XMLAttDef instances to the count of the last
+ // start tag where they were utilized.
+ // fUndeclaredAttrRegistry
+ // mapping of attr QNames to the count of the last start tag in which they occurred
//
// -----------------------------------------------------------------------
ValueVectorOf<XMLAttr*>* fAttrNSList;
DTDValidator* fDTDValidator;
DTDGrammar* fDTDGrammar;
NameIdPool<DTDElementDecl>* fDTDElemNonDeclPool;
+ unsigned int fElemCount;
+ RefHashTableOf<unsigned int>* fAttDefRegistry;
+ RefHashTableOf<unsigned int>* fUndeclaredAttrRegistry;
};
inline const XMLCh* DGXMLScanner::getName() const
---------------------------------------------------------------------
To unsubscribe, e-mail: xerces-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: xerces-cvs-help@xml.apache.org