You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by am...@apache.org on 2009/09/04 16:08:17 UTC
svn commit: r811420 - in /xerces/c/trunk: src/xercesc/dom/DOMLSParser.hpp
src/xercesc/parsers/DOMLSParserImpl.cpp
src/xercesc/parsers/DOMLSParserImpl.hpp tests/src/DOM/DOMTest/DTest.cpp
Author: amassari
Date: Fri Sep 4 14:08:16 2009
New Revision: 811420
URL: http://svn.apache.org/viewvc?rev=811420&view=rev
Log:
Improve support for DOMLSParserFilter:
- if startElement() returned REJECT it was treated like SKIP, and it was changing the current parent node one time too much
- the acceptNode() callback was invoked on text nodes every time some data was added to them, potentially removing it many times
- the nodes rejected by the callback were not recycled, leading to the same memory footprint as they were accepted
Modified:
xerces/c/trunk/src/xercesc/dom/DOMLSParser.hpp
xerces/c/trunk/src/xercesc/parsers/DOMLSParserImpl.cpp
xerces/c/trunk/src/xercesc/parsers/DOMLSParserImpl.hpp
xerces/c/trunk/tests/src/DOM/DOMTest/DTest.cpp
Modified: xerces/c/trunk/src/xercesc/dom/DOMLSParser.hpp
URL: http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/dom/DOMLSParser.hpp?rev=811420&r1=811419&r2=811420&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/dom/DOMLSParser.hpp (original)
+++ xerces/c/trunk/src/xercesc/dom/DOMLSParser.hpp Fri Sep 4 14:08:16 2009
@@ -24,6 +24,7 @@
#define XERCESC_INCLUDE_GUARD_DOMLSPARSER_HPP
#include <xercesc/dom/DOMConfiguration.hpp>
+#include <xercesc/dom/DOMLSParserFilter.hpp>
#include <xercesc/util/XercesDefs.hpp>
#include <xercesc/validators/common/Grammar.hpp>
@@ -32,7 +33,6 @@
class DOMErrorHandler;
class DOMLSInput;
-class DOMLSParserFilter;
class DOMNode;
class DOMDocument;
Modified: xerces/c/trunk/src/xercesc/parsers/DOMLSParserImpl.cpp
URL: http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/parsers/DOMLSParserImpl.cpp?rev=811420&r1=811419&r2=811420&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/parsers/DOMLSParserImpl.cpp (original)
+++ xerces/c/trunk/src/xercesc/parsers/DOMLSParserImpl.cpp Fri Sep 4 14:08:16 2009
@@ -83,6 +83,8 @@
, fCharsetOverridesXMLEncoding(true)
, fUserAdoptsDocument(false)
, fSupportedParameters(0)
+, fFilterAction(0)
+, fFilterDelayedTextNodes(0)
{
// dom spec has different default from scanner's default, so set explicitly
getScanner()->setNormalizeData(false);
@@ -144,6 +146,8 @@
DOMLSParserImpl::~DOMLSParserImpl()
{
delete fSupportedParameters;
+ delete fFilterAction;
+ delete fFilterDelayedTextNodes;
}
@@ -723,6 +727,10 @@
// remove the abort filter, if present
if(fFilter==&g_AbortFilter)
fFilter=0;
+ if(fFilterAction)
+ fFilterAction->removeAll();
+ if(fFilterDelayedTextNodes)
+ fFilterDelayedTextNodes->removeAll();
Wrapper4DOMLSInput isWrapper((DOMLSInput*)source, fEntityResolver, false, getMemoryManager());
@@ -744,6 +752,10 @@
// remove the abort filter, if present
if(fFilter==&g_AbortFilter)
fFilter=0;
+ if(fFilterAction)
+ fFilterAction->removeAll();
+ if(fFilterDelayedTextNodes)
+ fFilterDelayedTextNodes->removeAll();
AbstractDOMParser::parse(systemId);
if(getErrorCount()!=0)
@@ -763,6 +775,10 @@
// remove the abort filter, if present
if(fFilter==&g_AbortFilter)
fFilter=0;
+ if(fFilterAction)
+ fFilterAction->removeAll();
+ if(fFilterDelayedTextNodes)
+ fFilterDelayedTextNodes->removeAll();
AbstractDOMParser::parse(systemId);
if(getErrorCount()!=0)
@@ -781,6 +797,15 @@
if (getParseInProgress())
throw DOMException(DOMException::INVALID_STATE_ERR, XMLDOMMsg::LSParser_ParseInProgress, fMemoryManager);
+ // remove the abort filter, if present
+ if(fFilter==&g_AbortFilter)
+ fFilter=0;
+ if(fFilterAction)
+ fFilterAction->removeAll();
+ if(fFilterDelayedTextNodes)
+ fFilterDelayedTextNodes->removeAll();
+
+ // TODO
throw DOMException(DOMException::NOT_SUPPORTED_ERR, 0, getMemoryManager());
}
@@ -988,6 +1013,28 @@
return getScanner()->getSrcOffset();
}
+void DOMLSParserImpl::applyFilter(DOMNode* node)
+{
+ DOMLSParserFilter::FilterAction action;
+ // if the parent was already rejected, reject this too
+ if(fFilterAction && fFilterAction->containsKey(fCurrentParent) && fFilterAction->get(fCurrentParent)==DOMLSParserFilter::FILTER_REJECT)
+ action = DOMLSParserFilter::FILTER_REJECT;
+ else
+ action = fFilter->acceptNode(node);
+
+ switch(action)
+ {
+ case DOMLSParserFilter::FILTER_ACCEPT: break;
+ case DOMLSParserFilter::FILTER_REJECT:
+ case DOMLSParserFilter::FILTER_SKIP: if(node==fCurrentNode)
+ fCurrentNode = (node->getPreviousSibling()?node->getPreviousSibling():fCurrentParent);
+ fCurrentParent->removeChild(node);
+ node->release();
+ break;
+ case DOMLSParserFilter::FILTER_INTERRUPT: throw DOMLSException(DOMLSException::PARSE_ERR, XMLDOMMsg::LSParser_ParsingAborted, fMemoryManager);
+ }
+}
+
void DOMLSParserImpl::docCharacters(const XMLCh* const chars
, const XMLSize_t length
, const bool cdataSection)
@@ -995,70 +1042,89 @@
AbstractDOMParser::docCharacters(chars, length, cdataSection);
if(fFilter)
{
+ // send the notification for the previous text node
+ if(fFilterDelayedTextNodes && fCurrentNode->getPreviousSibling() && fFilterDelayedTextNodes->containsKey(fCurrentNode->getPreviousSibling()))
+ {
+ DOMNode* textNode = fCurrentNode->getPreviousSibling();
+ fFilterDelayedTextNodes->removeKey(textNode);
+ applyFilter(textNode);
+ }
DOMNodeFilter::ShowType whatToShow=fFilter->getWhatToShow();
- if(cdataSection && (whatToShow & DOMNodeFilter::SHOW_CDATA_SECTION) ||
- !cdataSection && (whatToShow & DOMNodeFilter::SHOW_TEXT))
+ if(cdataSection && (whatToShow & DOMNodeFilter::SHOW_CDATA_SECTION))
{
- DOMLSParserFilter::FilterAction action =
- fFilter->acceptNode(fCurrentNode);
-
- switch(action)
- {
- case DOMLSParserFilter::FILTER_ACCEPT: break;
- case DOMLSParserFilter::FILTER_REJECT:
- case DOMLSParserFilter::FILTER_SKIP: fCurrentParent->removeChild(fCurrentNode);
- break;
- case DOMLSParserFilter::FILTER_INTERRUPT: throw DOMLSException(DOMLSException::PARSE_ERR, XMLDOMMsg::LSParser_ParsingAborted, fMemoryManager);
- }
+ applyFilter(fCurrentNode);
+ }
+ else if(!cdataSection && (whatToShow & DOMNodeFilter::SHOW_TEXT))
+ {
+ if(fFilterDelayedTextNodes==0)
+ fFilterDelayedTextNodes=new (fMemoryManager) ValueHashTableOf<bool, PtrHasher>(7, fMemoryManager);
+ fFilterDelayedTextNodes->put(fCurrentNode, true);
}
}
}
void DOMLSParserImpl::docComment(const XMLCh* const comment)
{
+ if(fFilter)
+ {
+ // send the notification for the previous text node
+ if(fFilterDelayedTextNodes && fFilterDelayedTextNodes->containsKey(fCurrentNode))
+ {
+ fFilterDelayedTextNodes->removeKey(fCurrentNode);
+ applyFilter(fCurrentNode);
+ }
+ }
+
AbstractDOMParser::docComment(comment);
if(fFilter)
{
DOMNodeFilter::ShowType whatToShow=fFilter->getWhatToShow();
if(whatToShow & DOMNodeFilter::SHOW_COMMENT)
- {
- DOMLSParserFilter::FilterAction action =
- fFilter->acceptNode(fCurrentNode);
-
- switch(action)
- {
- case DOMLSParserFilter::FILTER_ACCEPT: break;
- case DOMLSParserFilter::FILTER_REJECT:
- case DOMLSParserFilter::FILTER_SKIP: fCurrentParent->removeChild(fCurrentNode);
- break;
- case DOMLSParserFilter::FILTER_INTERRUPT: throw DOMLSException(DOMLSException::PARSE_ERR, XMLDOMMsg::LSParser_ParsingAborted, fMemoryManager);
- }
- }
+ applyFilter(fCurrentNode);
}
}
void DOMLSParserImpl::docPI(const XMLCh* const target
, const XMLCh* const data)
{
+ if(fFilter)
+ {
+ // send the notification for the previous text node
+ if(fFilterDelayedTextNodes && fFilterDelayedTextNodes->containsKey(fCurrentNode))
+ {
+ fFilterDelayedTextNodes->removeKey(fCurrentNode);
+ applyFilter(fCurrentNode);
+ }
+ }
+
AbstractDOMParser::docPI(target, data);
if(fFilter)
{
DOMNodeFilter::ShowType whatToShow=fFilter->getWhatToShow();
if(whatToShow & DOMNodeFilter::SHOW_PROCESSING_INSTRUCTION)
- {
- DOMLSParserFilter::FilterAction action =
- fFilter->acceptNode(fCurrentNode);
+ applyFilter(fCurrentNode);
+ }
+}
- switch(action)
- {
- case DOMLSParserFilter::FILTER_ACCEPT: break;
- case DOMLSParserFilter::FILTER_REJECT:
- case DOMLSParserFilter::FILTER_SKIP: fCurrentParent->removeChild(fCurrentNode);
- break;
- case DOMLSParserFilter::FILTER_INTERRUPT: throw DOMLSException(DOMLSException::PARSE_ERR, XMLDOMMsg::LSParser_ParsingAborted, fMemoryManager);
- }
+void DOMLSParserImpl::startEntityReference(const XMLEntityDecl& entDecl)
+{
+ if(fCreateEntityReferenceNodes && fFilter)
+ {
+ // send the notification for the previous text node
+ if(fFilterDelayedTextNodes && fFilterDelayedTextNodes->containsKey(fCurrentNode))
+ {
+ fFilterDelayedTextNodes->removeKey(fCurrentNode);
+ applyFilter(fCurrentNode);
}
}
+
+ DOMNode* origParent = fCurrentParent;
+ AbstractDOMParser::startEntityReference(entDecl);
+ if (fCreateEntityReferenceNodes && fFilter)
+ {
+ if(fFilterAction && fFilterAction->containsKey(origParent) && fFilterAction->get(origParent)==DOMLSParserFilter::FILTER_REJECT)
+ fFilterAction->put(fCurrentNode, DOMLSParserFilter::FILTER_REJECT);
+ }
}
void DOMLSParserImpl::endElement(const XMLElementDecl& elemDecl
@@ -1066,31 +1132,49 @@
, const bool isRoot
, const XMLCh* const elemPrefix)
{
- DOMNode* origParent=fCurrentParent;
- DOMNode* origNode=fCurrentNode;
+ if(fFilter)
+ {
+ // send the notification for the previous text node
+ if(fFilterDelayedTextNodes && fFilterDelayedTextNodes->containsKey(fCurrentNode))
+ {
+ fFilterDelayedTextNodes->removeKey(fCurrentNode);
+ applyFilter(fCurrentNode);
+ }
+ }
+
AbstractDOMParser::endElement(elemDecl, urlId, isRoot, elemPrefix);
if(fFilter)
{
DOMNodeFilter::ShowType whatToShow=fFilter->getWhatToShow();
if(whatToShow & DOMNodeFilter::SHOW_ELEMENT)
{
- DOMLSParserFilter::FilterAction action =
- fFilter->acceptNode(origNode);
-
+ DOMNode* thisNode = fCurrentNode;
+ DOMLSParserFilter::FilterAction action;
+ if(fFilterAction && fFilterAction->containsKey(thisNode))
+ {
+ action = fFilterAction->get(thisNode);
+ fFilterAction->removeKey(thisNode);
+ }
+ else
+ action = fFilter->acceptNode(thisNode);
switch(action)
{
case DOMLSParserFilter::FILTER_ACCEPT: break;
- case DOMLSParserFilter::FILTER_REJECT: origParent->removeChild(origNode);
+ case DOMLSParserFilter::FILTER_REJECT: fCurrentNode = (thisNode->getPreviousSibling()?thisNode->getPreviousSibling():fCurrentParent);
+ fCurrentParent->removeChild(thisNode);
+ thisNode->release();
break;
case DOMLSParserFilter::FILTER_SKIP: {
- DOMNode* child=origNode->getFirstChild();
+ DOMNode* child=thisNode->getFirstChild();
while(child)
{
DOMNode* next=child->getNextSibling();
- origParent->appendChild(child);
+ fCurrentParent->appendChild(child);
child=next;
}
- origParent->removeChild(origNode);
+ fCurrentNode = (thisNode->getPreviousSibling()?thisNode->getPreviousSibling():fCurrentParent);
+ fCurrentParent->removeChild(thisNode);
+ thisNode->release();
}
break;
case DOMLSParserFilter::FILTER_INTERRUPT: throw DOMLSException(DOMLSException::PARSE_ERR, XMLDOMMsg::LSParser_ParsingAborted, fMemoryManager);
@@ -1107,21 +1191,37 @@
, const bool isEmpty
, const bool isRoot)
{
- AbstractDOMParser::startElement(elemDecl, urlId, elemPrefix, attrList, attrCount, false, isRoot);
if(fFilter)
{
- DOMLSParserFilter::FilterAction action =
- fFilter->startElement((DOMElement*)fCurrentNode);
+ // send the notification for the previous text node
+ if(fFilterDelayedTextNodes && fFilterDelayedTextNodes->containsKey(fCurrentNode))
+ {
+ fFilterDelayedTextNodes->removeKey(fCurrentNode);
+ applyFilter(fCurrentNode);
+ }
+ }
- switch(action)
+ DOMNode* origParent = fCurrentParent;
+ AbstractDOMParser::startElement(elemDecl, urlId, elemPrefix, attrList, attrCount, false, isRoot);
+ if(fFilter)
+ {
+ // if the parent was already rejected, reject this too
+ if(fFilterAction && fFilterAction->containsKey(origParent) && fFilterAction->get(origParent)==DOMLSParserFilter::FILTER_REJECT)
+ fFilterAction->put(fCurrentNode, DOMLSParserFilter::FILTER_REJECT);
+ else
{
- case DOMLSParserFilter::FILTER_ACCEPT: break;
- case DOMLSParserFilter::FILTER_REJECT: // TODO: reject also the children
- case DOMLSParserFilter::FILTER_SKIP: fCurrentParent=fCurrentNode->getParentNode();
- fCurrentParent->removeChild(fCurrentNode);
- fCurrentNode=fCurrentParent;
- break;
- case DOMLSParserFilter::FILTER_INTERRUPT: throw DOMLSException(DOMLSException::PARSE_ERR, XMLDOMMsg::LSParser_ParsingAborted, fMemoryManager);
+ DOMLSParserFilter::FilterAction action = fFilter->startElement((DOMElement*)fCurrentNode);
+
+ switch(action)
+ {
+ case DOMLSParserFilter::FILTER_ACCEPT: break;
+ case DOMLSParserFilter::FILTER_REJECT:
+ case DOMLSParserFilter::FILTER_SKIP: if(fFilterAction==0)
+ fFilterAction=new (fMemoryManager) ValueHashTableOf<DOMLSParserFilter::FilterAction, PtrHasher>(7, fMemoryManager);
+ fFilterAction->put(fCurrentNode, action);
+ break;
+ case DOMLSParserFilter::FILTER_INTERRUPT: throw DOMLSException(DOMLSException::PARSE_ERR, XMLDOMMsg::LSParser_ParsingAborted, fMemoryManager);
+ }
}
}
if(isEmpty)
Modified: xerces/c/trunk/src/xercesc/parsers/DOMLSParserImpl.hpp
URL: http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/parsers/DOMLSParserImpl.hpp?rev=811420&r1=811419&r2=811420&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/parsers/DOMLSParserImpl.hpp (original)
+++ xerces/c/trunk/src/xercesc/parsers/DOMLSParserImpl.hpp Fri Sep 4 14:08:16 2009
@@ -29,6 +29,7 @@
#include <xercesc/dom/DOMConfiguration.hpp>
#include <xercesc/util/XercesDefs.hpp>
#include <xercesc/util/RefVectorOf.hpp>
+#include <xercesc/util/ValueHashTableOf.hpp>
XERCES_CPP_NAMESPACE_BEGIN
@@ -555,6 +556,10 @@
const XMLCh* const target
, const XMLCh* const data
);
+ virtual void startEntityReference
+ (
+ const XMLEntityDecl& entDecl
+ );
virtual void endElement
(
const XMLElementDecl& elemDecl
@@ -581,6 +586,11 @@
void resetParse();
// -----------------------------------------------------------------------
+ // Helper methods
+ // -----------------------------------------------------------------------
+ void applyFilter(DOMNode* node);
+
+ // -----------------------------------------------------------------------
// Private data members
//
// fEntityResolver
@@ -607,6 +617,16 @@
// A list of the parameters that can be set, including the ones
// specific of Xerces
//
+ // fFilterAction
+ // A map of elements rejected by the DOMLSParserFilter::startElement
+ // callback, used to avoid invoking DOMLSParserFilter::acceptNode
+ // on its children
+ //
+ // fFilterDelayedTextNodes
+ // As text nodes are filled incrementally, store them in a map
+ // so that we ask DOMLSParserFilter::acceptNode only once, when it
+ // is completely created
+ //
//-----------------------------------------------------------------------
DOMLSResourceResolver* fEntityResolver;
XMLEntityResolver* fXMLEntityResolver;
@@ -615,6 +635,8 @@
bool fCharsetOverridesXMLEncoding;
bool fUserAdoptsDocument;
DOMStringListImpl* fSupportedParameters;
+ ValueHashTableOf<DOMLSParserFilter::FilterAction, PtrHasher>* fFilterAction;
+ ValueHashTableOf<bool, PtrHasher>* fFilterDelayedTextNodes;
// -----------------------------------------------------------------------
// Unimplemented constructors and operators
Modified: xerces/c/trunk/tests/src/DOM/DOMTest/DTest.cpp
URL: http://svn.apache.org/viewvc/xerces/c/trunk/tests/src/DOM/DOMTest/DTest.cpp?rev=811420&r1=811419&r2=811420&view=diff
==============================================================================
--- xerces/c/trunk/tests/src/DOM/DOMTest/DTest.cpp (original)
+++ xerces/c/trunk/tests/src/DOM/DOMTest/DTest.cpp Fri Sep 4 14:08:16 2009
@@ -4915,7 +4915,28 @@
DOMLSInput* m_input;
};
+class ParserSkipper : public DOMLSParserFilter
+{
+public:
+ ParserSkipper() : fCallbackCalls(0) { }
+
+ virtual FilterAction acceptNode(DOMNode* node) { fCallbackCalls++; return DOMLSParserFilter::FILTER_ACCEPT;}
+ virtual FilterAction startElement(DOMElement* node)
+ {
+ XMLCh elem[]={chLatin_e, chLatin_l, chLatin_e, chLatin_m, chNull };
+ if(XMLString::equals(node->getNodeName(), elem))
+ return DOMLSParserFilter::FILTER_REJECT;
+ else
+ return DOMLSParserFilter::FILTER_ACCEPT;
+ }
+ virtual DOMNodeFilter::ShowType getWhatToShow() const { return DOMNodeFilter::SHOW_ALL; }
+
+ unsigned int fCallbackCalls;
+};
+
bool DOMTest::testLSExceptions() {
+ bool OK = true;
+
const char* sXml="<?xml version='1.0'?>"
"<!DOCTYPE root["
"<!ENTITY ent1 'Dallas. &ent3; #5668'>"
@@ -4929,7 +4950,6 @@
"<elem>Home </elem>"
"<elem>Test: &ent5;</elem>"
"</root>";
- MemBufInputSource is((XMLByte*)sXml, strlen(sXml), "bufId");
static const XMLCh gLS[] = { chLatin_L, chLatin_S, chNull };
DOMImplementationLS *impl = (DOMImplementationLS*)DOMImplementationRegistry::getDOMImplementation(gLS);
@@ -4944,14 +4964,14 @@
DOMDocument* doc=domBuilder->parse(input);
fprintf(stderr, "checking testLSExceptions failed at line %i\n", __LINE__);
- return false;
+ OK=false;
}
catch(DOMLSException& e)
{
if(e.code!=DOMLSException::PARSE_ERR)
{
fprintf(stderr, "checking testLSExceptions failed at line %i\n", __LINE__);
- return false;
+ OK=false;
}
}
@@ -4962,20 +4982,70 @@
DOMDocument* doc=domBuilder->parse(input);
fprintf(stderr, "checking testLSExceptions failed at line %i\n", __LINE__);
- return false;
+ OK=false;
}
catch(DOMException& e)
{
if(e.code!=DOMException::INVALID_STATE_ERR)
{
fprintf(stderr, "checking testLSExceptions failed at line %i\n", __LINE__);
- return false;
+ OK=false;
}
}
+
+ try
+ {
+ ParserSkipper skipper;
+ domBuilder->setFilter(&skipper);
+ domBuilder->getDomConfig()->setParameter(XMLUni::fgDOMEntities, false);
+ DOMDocument* doc=domBuilder->parse(input);
+
+ // verify that we get only 3 calls: for the text node, the CDATA section and the root element
+ if(doc==NULL || doc->getDocumentElement()==NULL || doc->getDocumentElement()->getChildElementCount()!=0 || skipper.fCallbackCalls!=3)
+ {
+ fprintf(stderr, "checking testLSExceptions failed at line %i\n", __LINE__);
+ OK=false;
+ }
+ }
+ catch(DOMException&)
+ {
+ fprintf(stderr, "checking testLSExceptions failed at line %i\n", __LINE__);
+ OK=false;
+ }
+
+ // this XML should trigger reuse of DOMElement
+ const char* sXml2="<?xml version='1.0'?>"
+ "<root>"
+ "<elem>Home</elem>"
+ "<elem2>Test</elem2>"
+ "<elem>Home</elem>"
+ "<elem2>Test</elem2>"
+ "</root>";
+ XMLString::transcode(sXml2, tempStr, 3999);
+ input->setStringData(tempStr);
+ try
+ {
+ ParserSkipper skipper;
+ domBuilder->setFilter(&skipper);
+ DOMDocument* doc=domBuilder->parse(input);
+
+ // verify that we get only 5 calls: for the root element, the two elem2 and the two text nodes under them
+ if(doc==NULL || doc->getDocumentElement()==NULL || doc->getDocumentElement()->getChildElementCount()!=2 || skipper.fCallbackCalls!=5)
+ {
+ fprintf(stderr, "checking testLSExceptions failed at line %i\n", __LINE__);
+ OK=false;
+ }
+ }
+ catch(DOMException&)
+ {
+ fprintf(stderr, "checking testLSExceptions failed at line %i\n", __LINE__);
+ OK=false;
+ }
+
input->release();
domBuilder->release();
- return true;
+ return OK;
}
bool DOMTest::testElementTraversal() {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@xerces.apache.org
For additional commands, e-mail: commits-help@xerces.apache.org