You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by pe...@apache.org on 2004/09/28 23:27:38 UTC
cvs commit: xml-xerces/c/src/xercesc/internal XMLScanner.hpp XMLScanner.cpp WFXMLScanner.cpp IGXMLScanner2.cpp DGXMLScanner.cpp
peiyongz 2004/09/28 14:27:38
Modified: c/src/xercesc/internal XMLScanner.hpp XMLScanner.cpp
WFXMLScanner.cpp IGXMLScanner2.cpp DGXMLScanner.cpp
Log:
Optimized duplicated attributes checking for large number of attributes
Revision Changes Path
1.40 +34 -0 xml-xerces/c/src/xercesc/internal/XMLScanner.hpp
Index: XMLScanner.hpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/XMLScanner.hpp,v
retrieving revision 1.39
retrieving revision 1.40
diff -u -r1.39 -r1.40
--- XMLScanner.hpp 28 Sep 2004 02:14:13 -0000 1.39
+++ XMLScanner.hpp 28 Sep 2004 21:27:38 -0000 1.40
@@ -16,6 +16,9 @@
/*
* $Log$
+ * Revision 1.40 2004/09/28 21:27:38 peiyongz
+ * Optimized duplicated attributes checking for large number of attributes
+ *
* Revision 1.39 2004/09/28 02:14:13 cargilld
* Add support for validating annotations.
*
@@ -731,6 +734,13 @@
void resetUIntPool();
void recreateUIntPool();
+ inline
+ void setAttrDupChkRegistry
+ (
+ const unsigned int &attrNumber
+ , bool &toUseHashTable
+ );
+
// -----------------------------------------------------------------------
// Data members
//
@@ -986,6 +996,7 @@
XMLUInt32 fScannerId;
XMLUInt32 fSequenceId;
RefVectorOf<XMLAttr>* fAttrList;
+ RefHash2KeysTableOf<XMLAttr>* fAttrDupChkRegistry;
XMLDocumentHandler* fDocHandler;
DocTypeHandler* fDocTypeHandler;
XMLEntityHandler* fEntityHandler;
@@ -1519,6 +1530,29 @@
fValidationContext->clearIdRefList();
fValidationContext->setEntityDeclPool(0);
fEntityDeclPoolRetrieved = false;
+}
+
+inline void XMLScanner::setAttrDupChkRegistry(const unsigned int &attrNumber
+ , bool &toUseHashTable)
+{
+ // once the attribute exceed 20, we use hash table to check duplication
+ if (attrNumber > 20)
+ {
+ toUseHashTable = true;
+
+ if (!fAttrDupChkRegistry)
+ {
+ fAttrDupChkRegistry = new (fMemoryManager) RefHash2KeysTableOf<XMLAttr>
+ (
+ 2*attrNumber+1, false, new (fMemoryManager)HashXMLCh(), fMemoryManager
+ );
+ }
+ else
+ {
+ fAttrDupChkRegistry->removeAll();
+ }
+ }
+
}
XERCES_CPP_NAMESPACE_END
1.71 +4 -1 xml-xerces/c/src/xercesc/internal/XMLScanner.cpp
Index: XMLScanner.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/XMLScanner.cpp,v
retrieving revision 1.70
retrieving revision 1.71
diff -u -r1.70 -r1.71
--- XMLScanner.cpp 28 Sep 2004 02:14:13 -0000 1.70
+++ XMLScanner.cpp 28 Sep 2004 21:27:38 -0000 1.71
@@ -157,6 +157,7 @@
, fScannerId(0)
, fSequenceId(0)
, fAttrList(0)
+ , fAttrDupChkRegistry(0)
, fDocHandler(0)
, fDocTypeHandler(0)
, fEntityHandler(0)
@@ -237,6 +238,7 @@
, fScannerId(0)
, fSequenceId(0)
, fAttrList(0)
+ , fAttrDupChkRegistry(0)
, fDocHandler(docHandler)
, fDocTypeHandler(docTypeHandler)
, fEntityHandler(entityHandler)
@@ -279,6 +281,7 @@
XMLScanner::~XMLScanner()
{
delete fAttrList;
+ delete fAttrDupChkRegistry;
delete fValidationContext;
fMemoryManager->deallocate(fRootElemName);//delete [] fRootElemName;
fMemoryManager->deallocate(fExternalSchemaLocation);//delete [] fExternalSchemaLocation;
1.25 +33 -8 xml-xerces/c/src/xercesc/internal/WFXMLScanner.cpp
Index: WFXMLScanner.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/WFXMLScanner.cpp,v
retrieving revision 1.24
retrieving revision 1.25
diff -u -r1.24 -r1.25
--- WFXMLScanner.cpp 8 Sep 2004 13:56:13 -0000 1.24
+++ WFXMLScanner.cpp 28 Sep 2004 21:27:38 -0000 1.25
@@ -1457,6 +1457,13 @@
}
if(attCount) {
+
+ //
+ // Decide if to use hash table to do duplicate checking
+ //
+ bool toUseHashTable = false;
+ setAttrDupChkRegistry(attCount, toUseHashTable);
+
// check for duplicate namespace attributes:
// by checking for qualified names with the same local part and with prefixes
// which have been bound to namespace names that are identical.
@@ -1464,17 +1471,35 @@
XMLAttr* curAtt;
for (unsigned int attrIndex=0; attrIndex < attCount-1; attrIndex++) {
loopAttr = fAttrList->elementAt(attrIndex);
- for (unsigned int curAttrIndex = attrIndex+1; curAttrIndex < attCount; curAttrIndex++) {
- curAtt = fAttrList->elementAt(curAttrIndex);
- if (curAtt->getURIId() == loopAttr->getURIId() &&
- XMLString::equals(curAtt->getName(), loopAttr->getName())) {
- emitError
- (
- XMLErrs::AttrAlreadyUsedInSTag
+
+ if (!toUseHashTable)
+ {
+ for (unsigned int curAttrIndex = attrIndex+1; curAttrIndex < attCount; curAttrIndex++) {
+ curAtt = fAttrList->elementAt(curAttrIndex);
+ if (curAtt->getURIId() == loopAttr->getURIId() &&
+ XMLString::equals(curAtt->getName(), loopAttr->getName())) {
+ emitError
+ (
+ XMLErrs::AttrAlreadyUsedInSTag
, curAtt->getName()
, elemDecl->getFullName()
+ );
+ }
+ }
+ }
+ else
+ {
+ if (fAttrDupChkRegistry->containsKey((void*)loopAttr->getName(), loopAttr->getURIId()))
+ {
+ emitError
+ (
+ XMLErrs::AttrAlreadyUsedInSTag
+ , loopAttr->getName()
+ , elemDecl->getFullName()
);
}
+
+ fAttrDupChkRegistry->put((void*)loopAttr->getName(), loopAttr->getURIId(), loopAttr);
}
}
}
1.72 +38 -7 xml-xerces/c/src/xercesc/internal/IGXMLScanner2.cpp
Index: IGXMLScanner2.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/IGXMLScanner2.cpp,v
retrieving revision 1.71
retrieving revision 1.72
diff -u -r1.71 -r1.72
--- IGXMLScanner2.cpp 26 Sep 2004 18:23:50 -0000 1.71
+++ IGXMLScanner2.cpp 28 Sep 2004 21:27:38 -0000 1.72
@@ -110,6 +110,15 @@
XMLBufBid bbNormal(&fBufMgr);
XMLBuffer& normBuf = bbNormal.getBuffer();
+ //
+ // Decide if to use hash table to do duplicate checking
+ //
+ bool toUseHashTable = false;
+ if (fGrammarType == Grammar::DTDGrammarType)
+ {
+ setAttrDupChkRegistry(attCount, toUseHashTable);
+ }
+
// Loop through our explicitly provided attributes, which are in the raw
// scanned form, and build up XMLAttr objects.
unsigned int index;
@@ -617,16 +626,32 @@
// by checking for qualified names with the same local part and with prefixes
// which have been bound to namespace names that are identical.
if (fGrammarType == Grammar::DTDGrammarType) {
- for (unsigned int attrIndex=0; attrIndex < retCount; attrIndex++) {
- curAttr = toFill.elementAt(attrIndex);
- if (uriId == curAttr->getURIId() &&
- XMLString::equals(suffPtr, curAttr->getName())) {
+ if (!toUseHashTable)
+ {
+ for (unsigned int attrIndex=0; attrIndex < retCount; attrIndex++) {
+ curAttr = toFill.elementAt(attrIndex);
+ if (uriId == curAttr->getURIId() &&
+ XMLString::equals(suffPtr, curAttr->getName())) {
+ emitError
+ (
+
+ XMLErrs::AttrAlreadyUsedInSTag
+ , curAttr->getName()
+ , elemDecl->getFullName()
+ );
+ }
+ }
+ }
+ else
+ {
+ if (fAttrDupChkRegistry->containsKey((void*)suffPtr, uriId))
+ {
emitError
- (
+ (
XMLErrs::AttrAlreadyUsedInSTag
, curAttr->getName()
, elemDecl->getFullName()
- );
+ );
}
}
}
@@ -658,6 +683,12 @@
);
curAttr->setSpecified(true);
}
+
+ if (toUseHashTable)
+ {
+ fAttrDupChkRegistry->put((void*)suffPtr, uriId, curAttr);
+ }
+
if(psviAttr)
psviAttr->setValue(curAttr->getValue());
1.54 +32 -7 xml-xerces/c/src/xercesc/internal/DGXMLScanner.cpp
Index: DGXMLScanner.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/DGXMLScanner.cpp,v
retrieving revision 1.53
retrieving revision 1.54
diff -u -r1.53 -r1.54
--- DGXMLScanner.cpp 20 Sep 2004 15:00:49 -0000 1.53
+++ DGXMLScanner.cpp 28 Sep 2004 21:27:38 -0000 1.54
@@ -2378,6 +2378,13 @@
void DGXMLScanner::scanAttrListforNameSpaces(RefVectorOf<XMLAttr>* theAttrList, int attCount,
XMLElementDecl* elemDecl)
{
+
+ //
+ // Decide if to use hash table to do duplicate checking
+ //
+ bool toUseHashTable = false;
+ setAttrDupChkRegistry((unsigned int&)attCount, toUseHashTable);
+
// Make an initial pass through the list and find any xmlns attributes or
// schema attributes.
// When we find one, send it off to be used to update the element stack's
@@ -2412,17 +2419,35 @@
// by checking for qualified names with the same local part and with prefixes
// which have been bound to namespace names that are identical.
XMLAttr* loopAttr;
- for (int attrIndex=0; attrIndex < index; attrIndex++) {
- loopAttr = theAttrList->elementAt(attrIndex);
- if (loopAttr->getURIId() == curAttr->getURIId() &&
- XMLString::equals(loopAttr->getName(), curAttr->getName())) {
+
+ if (!toUseHashTable)
+ {
+ for (int attrIndex=0; attrIndex < index; attrIndex++) {
+ loopAttr = theAttrList->elementAt(attrIndex);
+ if (loopAttr->getURIId() == curAttr->getURIId() &&
+ XMLString::equals(loopAttr->getName(), curAttr->getName())) {
+ emitError
+ (
+ XMLErrs::AttrAlreadyUsedInSTag
+ , curAttr->getName()
+ , elemDecl->getFullName()
+ );
+ }
+ }
+ }
+ else
+ {
+ if (fAttrDupChkRegistry->containsKey((void*)curAttr->getName(), curAttr->getURIId()))
+ {
emitError
- (
+ (
XMLErrs::AttrAlreadyUsedInSTag
, curAttr->getName()
, elemDecl->getFullName()
- );
+ );
}
+
+ fAttrDupChkRegistry->put((void*)curAttr->getName(), curAttr->getURIId(), curAttr);
}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: xerces-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: xerces-cvs-help@xml.apache.org