You are viewing a plain text version of this content. The canonical link for it is here.
Posted to j-dev@xerces.apache.org by bu...@apache.org on 2001/04/26 20:10:39 UTC
[Bug 1541] New - TreeWalker with sparse filter works in exponential time
http://nagoya.apache.org/bugzilla/show_bug.cgi?id=1541
*** shadow/1541 Thu Apr 26 11:10:39 2001
--- shadow/1541.tmp.13123 Thu Apr 26 11:10:39 2001
***************
*** 0 ****
--- 1,164 ----
+ +============================================================================+
+ | TreeWalker with sparse filter works in exponential time |
+ +----------------------------------------------------------------------------+
+ | Bug #: 1541 Product: Xerces-J |
+ | Status: NEW Version: 1.3.1 |
+ | Resolution: Platform: PC |
+ | Severity: Major OS/Version: |
+ | Priority: Component: DOM |
+ +----------------------------------------------------------------------------+
+ | Assigned To: xerces-j-dev@xml.apache.org |
+ | Reported By: aroy@boostmyscore.com |
+ | CC list: Cc: |
+ +----------------------------------------------------------------------------+
+ | URL: |
+ +============================================================================+
+ | DESCRIPTION |
+ Even in very small documents, a TreeWalkerImpl created to find nodes of a
+ specific name can take arbitrarily long amounts of time. Also, in larger
+ documents, similar filters can cause StackOverflowError's.
+
+ I have included source for a small test file below that can be used to
+ reproduce the bug. In it, an xml document is created and passed to the
+ TreeWalker. A filter is created that matches only nodes with a given name.
+ TreeWalker.nextNode() begins to take arbitrarily long amounts of time for even
+ small documents (~60 seconds for a 3/3/2 (~20) node document). This only occurs
+ when there is a low number of nodes that match the filter.
+
+ In addition, a wider filter (for example two tags) can cause a
+ StackOverflowError on large documents (e.g. 14/13/11 (2000+) nodes).
+
+ Running the following code will result in the output:
+ >>
+ rootheader
+ null
+ approximately 68.098 seconds
+ >>
+ Changing the size of the document to 14/13/11, and setting the nodefilter to
+ use FILTER_TAGS_B will result in the following output:
+ >>
+ rootheader
+ java.lang.StackOverflowError
+ >>
+
+ Code:
+ >>>>>
+ import org.w3c.dom.Document;
+ import org.w3c.dom.Node;
+ import org.w3c.dom.traversal.TreeWalker;
+ import org.apache.xerces.dom.TreeWalkerImpl;
+ import org.w3c.dom.traversal.NodeFilter;
+ import org.apache.xerces.parsers.DOMParser;
+ import org.xml.sax.InputSource;
+ import java.io.StringReader;
+
+ public class TestWalker {
+ // this many children of root node
+ private static final int NUM_BRANCH = 3;
+ // this many children of each branch node
+ private static final int NUM_TWIG = 3;
+ // this many children for each twig node
+ private static final int NUM_LEAF = 2;
+ private static final String[] FILTER_TAGS_A = {"rootheader"};
+ private static final String[] FILTER_TAGS_B = {"rootheader",
+
+ "rootfooter"};
+
+ public TestWalker() {
+ try {
+ runTest(parseXML(createXML()));
+ } catch (StackOverflowError e) {
+ System.out.println(e);
+ }
+ } // end constructor()
+
+ // Creates a TreeWalker with a simple name filter and loops
+ // through each node returned until the document is finished.
+ private void runTest(Document doc) {
+ // Create TreeWalker with inline filter.
+ // The filter simply looks for a node matching the filter node.
+ TreeWalker walker = new
+ TreeWalkerImpl(doc, Node.ELEMENT_NODE,
+ new NodeFilter() {
+ protected
+ String tag[] = FILTER_TAGS_A;
+
+ public short
+ acceptNode (Node n) {
+
+ String testName = n.getNodeName();
+ for
+ (int i=0; i < tag.length; i++) {
+
+ if (testName.equals(tag[i]))
+
+ return FILTER_ACCEPT;
+ }
+
+ return FILTER_SKIP;
+ } // end
+ acceptNode(Node)
+
+ }, true);
+ // Loop until done.
+ long startTime;
+ long endTime;
+ startTime = System.currentTimeMillis();
+ Node node = walker.nextNode();
+ while (node != null) {
+ System.out.println(node.getNodeName());
+ node = walker.nextNode();
+ }
+ endTime = System.currentTimeMillis();
+ System.out.println("null");
+ double runTime = (double)(endTime - startTime);
+ System.out.println("approximately " + (runTime/1000.0) +
+ " seconds");
+ } // end runTest()
+
+ // Parses given String and returns a Document
+ private Document parseXML(String xmlSrc) {
+ // Double check accuracy of created xml:
+ // System.out.println("Source:\n" + xmlSrc);
+ DOMParser parser = new DOMParser();
+ InputSource in = new InputSource(new StringReader(xmlSrc));
+ try { parser.parse(in);
+ } catch (Exception e) {e.printStackTrace();System.exit(1);}
+ return parser.getDocument();
+ } // end parseXML()
+
+ // Creates a simple Document based on global parameters.
+ // The root has NUM_BRANCH children, each has NUM_TWIG children,
+ // and each of those has NUM_LEAF children.
+ private String createXML() {
+ StringBuffer xmlSrc = new StringBuffer();
+ xmlSrc.append("<?xml version=\"1.0\"?>\n");
+ xmlSrc.append("<root><rootheader>Root</rootheader>\n");
+ for (int x=0; x < NUM_BRANCH; x++) {
+ xmlSrc.append
+ ("\t<branch><branchheader>Branch"+x+"</branchheader>\n");
+ xmlSrc.append
+ ("\t\t<resource>Resource"+x+"</resource>\n");
+ for (int y=0; y<NUM_TWIG; y++) {
+ xmlSrc.append
+ ("\t\t<twig><twigheader>Twig"+x+"."+y+
+ "</twigheader>\n");
+ for (int z=0; z<NUM_LEAF; z++)
+ xmlSrc.append
+ ("\t\t\t<leaf>Leaf"+x+"."+y+"."+z+"</leaf>\n");
+ xmlSrc.append("\t\t</twig>\n");
+ }
+ xmlSrc.append("\t</branch>\n");
+ }
+ xmlSrc.append("<rootfooter>The End</rootfooter>\n");
+ xmlSrc.append("</root>");
+ return xmlSrc.toString();
+ } // end createXML()
+
+ // Usage: java TestWalker
+ public static void main(String[] args) {
+ TestWalker bob = new TestWalker();
+ } // end main
+ } // end class TestWalker
+
+ >>>>>
---------------------------------------------------------------------
To unsubscribe, e-mail: xerces-j-dev-unsubscribe@xml.apache.org
For additional commands, e-mail: xerces-j-dev-help@xml.apache.org