You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by ne...@apache.org on 2004/06/02 21:58:10 UTC
cvs commit: xml-xerces/c/src/xercesc/internal DGXMLScanner.cpp IGXMLScanner.cpp SGXMLScanner.cpp ElemStack.cpp ElemStack.hpp
neilg 2004/06/02 12:58:10
Modified: c/src/xercesc/internal DGXMLScanner.cpp IGXMLScanner.cpp
SGXMLScanner.cpp ElemStack.cpp ElemStack.hpp
Log:
Fix bug where scanners would accept malformed tags of the form
<p:a xmlns:p="b" xmlns:q="b"></q:a> when namespace processing was
enabled. This also opened the way for some end-tag scanning
performance improvements.
Revision Changes Path
1.48 +10 -24 xml-xerces/c/src/xercesc/internal/DGXMLScanner.cpp
Index: DGXMLScanner.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/DGXMLScanner.cpp,v
retrieving revision 1.47
retrieving revision 1.48
diff -u -r1.47 -r1.48
--- DGXMLScanner.cpp 27 May 2004 16:33:07 -0000 1.47
+++ DGXMLScanner.cpp 2 Jun 2004 19:58:09 -0000 1.48
@@ -645,44 +645,30 @@
ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd, fMemoryManager);
}
- // After the </ is the element QName, so get a name from the input
- if (!fReaderMgr.getName(fQNameBuf))
- {
- // It failed so we can't really do anything with it
- emitError(XMLErrs::ExpectedElementName);
- fReaderMgr.skipPastChar(chCloseAngle);
- return;
- }
-
- // Resolve element name uri if needed
- unsigned int uriId = fEmptyNamespaceId;
- const ElemStack::StackElem* topElem = fElemStack.topElement();
- if (fDoNamespaces)
- {
- uriId = resolvePrefix
- (
- topElem->fThisElement->getElementName()->getPrefix()
- , ElemStack::Mode_Element
- );
- }
+ // Pop the stack of the element we are supposed to be ending. Remember
+ // that we don't own this. The stack just keeps them and reuses them.
+ unsigned int uriId = (fDoNamespaces)
+ ? fElemStack.getCurrentURI() : fEmptyNamespaceId;
// Pop the stack of the element we are supposed to be ending. Remember
// that we don't own this. The stack just keeps them and reuses them.
- fElemStack.popTop();
+ const ElemStack::StackElem* topElem = fElemStack.popTop();
+ XMLElementDecl *tempElement = topElem->fThisElement;
// See if it was the root element, to avoid multiple calls below
const bool isRoot = fElemStack.isEmpty();
// Make sure that its the end of the element that we expect
- if (!XMLString::equals(topElem->fThisElement->getFullName(), fQNameBuf.getRawBuffer()))
+ if (!fReaderMgr.skippedString(tempElement->getFullName()))
{
emitError
(
XMLErrs::ExpectedEndOfTagX
- , topElem->fThisElement->getFullName()
+ , tempElement->getFullName()
);
+ fReaderMgr.skipPastChar(chCloseAngle);
+ return;
}
-
// Make sure we are back on the same reader as where we started
if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum())
1.70 +46 -59 xml-xerces/c/src/xercesc/internal/IGXMLScanner.cpp
Index: IGXMLScanner.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/IGXMLScanner.cpp,v
retrieving revision 1.69
retrieving revision 1.70
diff -u -r1.69 -r1.70
--- IGXMLScanner.cpp 1 Jun 2004 16:48:13 -0000 1.69
+++ IGXMLScanner.cpp 2 Jun 2004 19:58:09 -0000 1.70
@@ -976,68 +976,45 @@
ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd, fMemoryManager);
}
- // After the </ is the element QName, so get a name from the input
- if (!fReaderMgr.getName(fQNameBuf))
+ // Pop the stack of the element we are supposed to be ending. Remember
+ // that we don't own this. The stack just keeps them and reuses them.
+ unsigned int uriId = (fDoNamespaces)
+ ? fElemStack.getCurrentURI() : fEmptyNamespaceId;
+
+ // these get initialized below
+ const ElemStack::StackElem* topElem = 0;
+ XMLElementDecl *tempElement = 0;
+ XMLCh *elemName = 0;
+
+ // Make sure that its the end of the element that we expect
+ // special case for schema validation, whose element decls,
+ // obviously don't contain prefix information
+ if(fGrammarType == Grammar::SchemaGrammarType)
+ {
+ elemName = fElemStack.getCurrentSchemaElemName();
+ topElem = fElemStack.popTop();
+ tempElement = topElem->fThisElement;
+ }
+ else
{
- // It failed so we can't really do anything with it
- emitError(XMLErrs::ExpectedElementName);
- fReaderMgr.skipPastChar(chCloseAngle);
- //REVISIT: Do we restore PSVI information?
- return;
+ topElem = fElemStack.popTop();
+ tempElement = topElem->fThisElement;
+ elemName = (XMLCh *)tempElement->getFullName();
}
-
- unsigned int uriId = fEmptyNamespaceId;
- int prefixColonPos = -1;
- if (fDoNamespaces)
+ if (!fReaderMgr.skippedString(elemName))
{
- uriId = resolveQName
+ emitError
(
- fQNameBuf.getRawBuffer()
- , fPrefixBuf
- , ElemStack::Mode_Element
- , prefixColonPos
+ XMLErrs::ExpectedEndOfTagX
+ , elemName
);
+ fReaderMgr.skipPastChar(chCloseAngle);
+ return;
}
- // Pop the stack of the element we are supposed to be ending. Remember
- // that we don't own this. The stack just keeps them and reuses them.
- //
- // NOTE: We CANNOT do this until we've resolved the element name because
- // the element stack top contains the prefix to URI mappings for this
- // element.
- unsigned int topUri = fElemStack.getCurrentURI();
- const ElemStack::StackElem* topElem = fElemStack.popTop();
-
// See if it was the root element, to avoid multiple calls below
const bool isRoot = fElemStack.isEmpty();
- // Make sure that its the end of the element that we expect
- XMLElementDecl* tempElement = topElem->fThisElement;
- if (fDoNamespaces && fGrammarType == Grammar::SchemaGrammarType) {
-
- // reset error occurred
- fPSVIElemContext.fErrorOccurred = fErrorStack->pop();
- const XMLCh* rawNameBuf = fQNameBuf.getRawBuffer();
- if ((topUri != uriId) || (!XMLString::equals(tempElement->getBaseName(), &rawNameBuf[prefixColonPos + 1])))
- {
- emitError
- (
- XMLErrs::ExpectedEndOfTagX
- , topElem->fThisElement->getFullName()
- );
- }
- }
- else {
- if (!XMLString::equals(tempElement->getFullName(), fQNameBuf.getRawBuffer()))
- {
- emitError
- (
- XMLErrs::ExpectedEndOfTagX
- , topElem->fThisElement->getFullName()
- );
- }
- }
-
// Make sure we are back on the same reader as where we started
if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum())
emitError(XMLErrs::PartialTagMarkupError);
@@ -1057,6 +1034,8 @@
if (fGrammarType == Grammar::SchemaGrammarType)
{
+ // reset error occurred
+ fPSVIElemContext.fErrorOccurred = fErrorStack->pop();
if (fValidate && topElem->fThisElement->isDeclared())
{
fPSVIElemContext.fCurrentTypeInfo = ((SchemaValidator*) fValidator)->getCurrentTypeInfo();
@@ -2372,7 +2351,6 @@
const XMLCh* nameRawBuf = &qnameRawBuf[prefixColonPos + 1];
const XMLCh* original_uriStr = fGrammar->getTargetNamespace();
- unsigned orgGrammarUri = fURIStringPool->getId(original_uriStr);
// REVISIT: since all this code only really
// makes sense for schemas, why can DTD validation theoretically pass
@@ -2380,12 +2358,11 @@
if (uriId != fEmptyNamespaceId) {
// Check in current grammar before switching if necessary
- const XMLCh *rawQName = fQNameBuf.getRawBuffer();
elemDecl = fGrammar->getElemDecl
(
uriId
, nameRawBuf
- , rawQName
+ , qnameRawBuf
, currentScope
);
// may have not been declared; must look everywhere:
@@ -2393,14 +2370,14 @@
if(fGrammarType == Grammar::DTDGrammarType)
{
// should never occur in practice
- elemDecl = fDTDElemNonDeclPool->getByKey(rawQName);
+ elemDecl = fDTDElemNonDeclPool->getByKey(qnameRawBuf);
}
else if (fGrammarType == Grammar::SchemaGrammarType)
{
elemDecl = fSchemaElemNonDeclPool->getByKey(nameRawBuf, uriId, currentScope);
}
- if (!elemDecl && (orgGrammarUri != uriId)) {
+ if (!elemDecl && ( fURIStringPool->getId(original_uriStr) != uriId)) {
// not found, switch to the specified grammar
const XMLCh* uriStr = getURIText(uriId);
bool errorCondition = !switchGrammar(uriStr) && fValidate;
@@ -2524,7 +2501,12 @@
// before we made grammars stateless:
elemDecl = fSchemaElemNonDeclPool->getByKey(nameRawBuf, uriId, currentScope);
}
- if (!elemDecl && orgGrammarUri != fEmptyNamespaceId) {
+ // this is initialized correctly only if there is
+ // no element decl. The other uses in this scope will only
+ // be encountered if there continues to be no element decl--which
+ // implies that this will have been initialized correctly.
+ unsigned orgGrammarUri = fEmptyNamespaceId;
+ if (!elemDecl && (orgGrammarUri = fURIStringPool->getId(original_uriStr)) != fEmptyNamespaceId) {
//not found, switch grammar and try globalNS
bool errorCondition = !switchGrammar(XMLUni::fgZeroLenString) && fValidate;
if (errorCondition && !laxThisOne)
@@ -2753,6 +2735,10 @@
if (fGrammarType == Grammar::SchemaGrammarType) {
+ // squirrel away the element's QName, so that we can do an efficient
+ // end-tag match
+ fElemStack.setCurrentSchemaElemName(fQNameBuf.getRawBuffer());
+
ComplexTypeInfo* typeinfo = (fValidate)
? ((SchemaValidator*)fValidator)->getCurrentTypeInfo()
: ((SchemaElementDecl*) elemDecl)->getComplexTypeInfo();
@@ -3527,3 +3513,4 @@
}
XERCES_CPP_NAMESPACE_END
+
1.83 +27 -41 xml-xerces/c/src/xercesc/internal/SGXMLScanner.cpp
Index: SGXMLScanner.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/SGXMLScanner.cpp,v
retrieving revision 1.82
retrieving revision 1.83
diff -u -r1.82 -r1.83
--- SGXMLScanner.cpp 27 May 2004 16:33:07 -0000 1.82
+++ SGXMLScanner.cpp 2 Jun 2004 19:58:09 -0000 1.83
@@ -904,53 +904,31 @@
ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd, fMemoryManager);
}
- // After the </ is the element QName, so get a name from the input
- if (!fReaderMgr.getName(fQNameBuf))
- {
- // It failed so we can't really do anything with it
- emitError(XMLErrs::ExpectedElementName);
- fReaderMgr.skipPastChar(chCloseAngle);
- return;
- }
-
- int prefixColonPos = -1;
- unsigned int uriId = resolveQName
- (
- fQNameBuf.getRawBuffer()
- , fPrefixBuf
- , ElemStack::Mode_Element
- , prefixColonPos
- );
-
// Pop the stack of the element we are supposed to be ending. Remember
// that we don't own this. The stack just keeps them and reuses them.
- //
- // NOTE: We CANNOT do this until we've resolved the element name because
- // the element stack top contains the prefix to URI mappings for this
- // element.
- unsigned int topUri = fElemStack.getCurrentURI();
- const ElemStack::StackElem* topElem = fElemStack.popTop();
-
- // See if it was the root element, to avoid multiple calls below
- const bool isRoot = fElemStack.isEmpty();
+ unsigned int uriId = (fDoNamespaces)
+ ? fElemStack.getCurrentURI() : fEmptyNamespaceId;
// Make sure that its the end of the element that we expect
- XMLElementDecl* tempElement = topElem->fThisElement;
- const XMLCh* rawNameBuf = fQNameBuf.getRawBuffer();
-
- // reset error occurred
- fPSVIElemContext.fErrorOccurred = fErrorStack->pop();
-
- if ((topUri != uriId) ||
- (!XMLString::equals(tempElement->getBaseName(), &rawNameBuf[prefixColonPos + 1])))
+ XMLCh *elemName = fElemStack.getCurrentSchemaElemName();
+ const ElemStack::StackElem* topElem = fElemStack.popTop();
+ XMLElementDecl *tempElement = topElem->fThisElement;
+ if (!fReaderMgr.skippedString(elemName))
{
emitError
(
XMLErrs::ExpectedEndOfTagX
- , topElem->fThisElement->getFullName()
+ , elemName
);
+ fReaderMgr.skipPastChar(chCloseAngle);
+ return;
}
+ // See if it was the root element, to avoid multiple calls below
+ const bool isRoot = fElemStack.isEmpty();
+
+ fPSVIElemContext.fErrorOccurred = fErrorStack->pop();
+
// Make sure we are back on the same reader as where we started
if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum())
emitError(XMLErrs::PartialTagMarkupError);
@@ -1202,10 +1180,11 @@
// First we have to do the rawest attribute scan. We don't do any
// normalization of them at all, since we don't know yet what type they
// might be (since we need the element decl in order to do that.)
+ const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer();
bool isEmpty;
unsigned int attCount = rawAttrScan
(
- fQNameBuf.getRawBuffer()
+ qnameRawBuf
, *fRawAttrList
, isEmpty
);
@@ -1275,7 +1254,6 @@
// the element decl for this element. We have now update the prefix to
// namespace map so we should get the correct element now.
int prefixColonPos = -1;
- const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer();
unsigned int uriId = resolveQName
(
qnameRawBuf
@@ -1302,7 +1280,6 @@
bool laxBeforeElementFound = false;
const XMLCh* nameRawBuf = &qnameRawBuf[prefixColonPos + 1];
const XMLCh* original_uriStr = fGrammar->getTargetNamespace();
- unsigned orgGrammarUri = fURIStringPool->getId(original_uriStr);
if (uriId != fEmptyNamespaceId) {
@@ -1320,7 +1297,7 @@
// before we made grammars stateless:
elemDecl = fElemNonDeclPool->getByKey(nameRawBuf, uriId, currentScope);
}
- if (!elemDecl && (orgGrammarUri != uriId)) {
+ if (!elemDecl && ( fURIStringPool->getId(original_uriStr) != uriId)) {
// not found, switch to the specified grammar
const XMLCh* uriStr = getURIText(uriId);
bool errorCondition = !switchGrammar(uriStr) && fValidate;
@@ -1422,7 +1399,12 @@
// before we made grammars stateless:
elemDecl = fElemNonDeclPool->getByKey(nameRawBuf, uriId, currentScope);
}
- if (!elemDecl && orgGrammarUri != fEmptyNamespaceId) {
+ // this is initialized correctly only if there is
+ // no element decl. The other uses in this scope will only
+ // be encountered if there continues to be no element decl--which
+ // implies that this will have been initialized correctly.
+ unsigned orgGrammarUri = fEmptyNamespaceId;
+ if (!elemDecl && (orgGrammarUri = fURIStringPool->getId(original_uriStr)) != fEmptyNamespaceId) {
//not found, switch grammar and try globalNS
bool errorCondition = !switchGrammar(XMLUni::fgZeroLenString) && fValidate;
if (errorCondition && !laxThisOne)
@@ -1625,6 +1607,10 @@
if (((SchemaValidator*) fValidator)->getErrorOccurred())
fPSVIElemContext.fErrorOccurred = true;
}
+
+ // squirrel away the element's QName, so that we can do an efficient
+ // end-tag match
+ fElemStack.setCurrentSchemaElemName(fQNameBuf.getRawBuffer());
ComplexTypeInfo* typeinfo = (fValidate)
? ((SchemaValidator*)fValidator)->getCurrentTypeInfo()
1.13 +10 -1 xml-xerces/c/src/xercesc/internal/ElemStack.cpp
Index: ElemStack.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/ElemStack.cpp,v
retrieving revision 1.12
retrieving revision 1.13
diff -u -r1.12 -r1.13
--- ElemStack.cpp 27 Apr 2004 19:17:52 -0000 1.12
+++ ElemStack.cpp 2 Jun 2004 19:58:10 -0000 1.13
@@ -56,6 +56,12 @@
/*
* $Log$
+ * Revision 1.13 2004/06/02 19:58:10 neilg
+ * Fix bug where scanners would accept malformed tags of the form
+ * <p:a xmlns:p="b" xmlns:q="b"></q:a> when namespace processing was
+ * enabled. This also opened the way for some end-tag scanning
+ * performance improvements.
+ *
* Revision 1.12 2004/04/27 19:17:52 peiyongz
* XML1.0-3rd VC: element content(children) dont allow white space from
* EntityRef/CharRef
@@ -209,6 +215,7 @@
fMemoryManager->deallocate(fStack[stackInd]->fChildren);//delete [] fStack[stackInd]->fChildren;
fMemoryManager->deallocate(fStack[stackInd]->fMap);//delete [] fStack[stackInd]->fMap;
+ fMemoryManager->deallocate(fStack[stackInd]->fSchemaElemName);
delete fStack[stackInd];
}
@@ -235,6 +242,8 @@
fStack[fStackTop]->fChildren = 0;
fStack[fStackTop]->fMapCapacity = 0;
fStack[fStackTop]->fMap = 0;
+ fStack[fStackTop]->fSchemaElemName = 0;
+ fStack[fStackTop]->fSchemaElemNameMaxLen = 0;
}
// Set up the new top row
@@ -271,6 +280,8 @@
fStack[fStackTop]->fChildren = 0;
fStack[fStackTop]->fMapCapacity = 0;
fStack[fStackTop]->fMap = 0;
+ fStack[fStackTop]->fSchemaElemName = 0;
+ fStack[fStackTop]->fSchemaElemNameMaxLen = 0;
}
// Set up the new top row
1.10 +30 -1 xml-xerces/c/src/xercesc/internal/ElemStack.hpp
Index: ElemStack.hpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/ElemStack.hpp,v
retrieving revision 1.9
retrieving revision 1.10
diff -u -r1.9 -r1.10
--- ElemStack.hpp 27 Apr 2004 19:17:52 -0000 1.9
+++ ElemStack.hpp 2 Jun 2004 19:58:10 -0000 1.10
@@ -56,6 +56,12 @@
/*
* $Log$
+ * Revision 1.10 2004/06/02 19:58:10 neilg
+ * Fix bug where scanners would accept malformed tags of the form
+ * <p:a xmlns:p="b" xmlns:q="b"></q:a> when namespace processing was
+ * enabled. This also opened the way for some end-tag scanning
+ * performance improvements.
+ *
* Revision 1.9 2004/04/27 19:17:52 peiyongz
* XML1.0-3rd VC: element content(children) dont allow white space from
* EntityRef/CharRef
@@ -221,6 +227,8 @@
int fCurrentScope;
Grammar* fCurrentGrammar;
unsigned int fCurrentURI;
+ XMLCh * fSchemaElemName;
+ unsigned int fSchemaElemNameMaxLen;
};
enum MapModes
@@ -270,6 +278,9 @@
void setCurrentURI(unsigned int uri);
unsigned int getCurrentURI();
+ inline void setCurrentSchemaElemName(const XMLCh * const schemaElemName);
+ inline XMLCh *getCurrentSchemaElemName();
+
// -----------------------------------------------------------------------
// Prefix map methods
// -----------------------------------------------------------------------
@@ -584,6 +595,26 @@
{
fStack[fStackTop-1]->fReferenceEscaped = true;
return;
+}
+
+inline void ElemStack::setCurrentSchemaElemName(const XMLCh * const schemaElemName)
+{
+ unsigned int schemaElemNameLen = XMLString::stringLen(schemaElemName);
+ unsigned int stackPos = fStackTop-1;
+
+ if(fStack[stackPos]->fSchemaElemNameMaxLen <= schemaElemNameLen)
+ {
+ XMLCh *tempStr = fStack[stackPos]->fSchemaElemName;
+ fStack[stackPos]->fSchemaElemNameMaxLen = schemaElemNameLen << 1;
+ fStack[stackPos]->fSchemaElemName = (XMLCh *)fMemoryManager->allocate((fStack[stackPos]->fSchemaElemNameMaxLen)*sizeof(XMLCh));
+ fMemoryManager->deallocate(tempStr);
+ }
+ XMLString::copyString(fStack[stackPos]->fSchemaElemName, schemaElemName);
+}
+
+inline XMLCh *ElemStack::getCurrentSchemaElemName()
+{
+ return fStack[fStackTop-1]->fSchemaElemName;
}
inline int ElemStack::getCurrentScope()
---------------------------------------------------------------------
To unsubscribe, e-mail: xerces-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: xerces-cvs-help@xml.apache.org