You are viewing a plain text version of this content. The canonical link for it is here.
Posted to j-dev@xerces.apache.org by bu...@apache.org on 2001/04/26 20:10:39 UTC
[Bug 1541] New - TreeWalker with sparse filter works in exponential time

http://nagoya.apache.org/bugzilla/show_bug.cgi?id=1541

*** shadow/1541	Thu Apr 26 11:10:39 2001
--- shadow/1541.tmp.13123	Thu Apr 26 11:10:39 2001
***************
*** 0 ****
--- 1,164 ----
+ +============================================================================+
+ | TreeWalker with sparse filter works in exponential time                    |
+ +----------------------------------------------------------------------------+
+ |        Bug #: 1541                        Product: Xerces-J                |
+ |       Status: NEW                         Version: 1.3.1                   |
+ |   Resolution:                            Platform: PC                      |
+ |     Severity: Major                    OS/Version:                         |
+ |     Priority:                           Component: DOM                     |
+ +----------------------------------------------------------------------------+
+ |  Assigned To: xerces-j-dev@xml.apache.org                                  |
+ |  Reported By: aroy@boostmyscore.com                                        |
+ |      CC list: Cc:                                                          |
+ +----------------------------------------------------------------------------+
+ |          URL:                                                              |
+ +============================================================================+
+ |                              DESCRIPTION                                   |
+ Even in very small documents, a TreeWalkerImpl created to find nodes of a 
+ specific name can take arbitrarily long amounts of time. Also, in larger 
+ documents, similar filters can cause StackOverflowError's.
+ 
+ I have included source for a small test file below that can be used to 
+ reproduce the bug. In it, an xml document is created and passed to the 
+ TreeWalker. A filter is created that matches only nodes with a given name. 
+ TreeWalker.nextNode() begins to take arbitrarily long amounts of time for even 
+ small documents (~60 seconds for a 3/3/2 (~20) node document). This only occurs 
+ when there is a low number of nodes that match the filter.
+ 
+ In addition, a wider filter (for example two tags) can cause a 
+ StackOverflowError on large documents (e.g. 14/13/11 (2000+) nodes).
+ 
+ Running the following code will result in the output:
+ >>
+ rootheader
+ null
+ approximately 68.098 seconds
+ >>
+ Changing the size of the document to 14/13/11, and setting the nodefilter to 
+ use FILTER_TAGS_B will result in the following output:
+ >>
+ rootheader
+ java.lang.StackOverflowError
+ >>
+ 
+ Code:
+ >>>>>
+ import org.w3c.dom.Document;
+ import org.w3c.dom.Node;
+ import org.w3c.dom.traversal.TreeWalker;
+ import org.apache.xerces.dom.TreeWalkerImpl;
+ import org.w3c.dom.traversal.NodeFilter;
+ import org.apache.xerces.parsers.DOMParser;
+ import org.xml.sax.InputSource;
+ import java.io.StringReader;
+ 
+ public class TestWalker {
+ 	// this many children of root node
+ 	private static final int NUM_BRANCH = 3;
+ 	// this many children of each branch node
+ 	private static final int NUM_TWIG = 3;
+ 	// this many children for each twig node
+ 	private static final int NUM_LEAF = 2;
+ 	private static final String[] FILTER_TAGS_A = {"rootheader"};
+ 	private static final String[] FILTER_TAGS_B = {"rootheader",
+ 										
+ 		 "rootfooter"};
+ 
+ 	public TestWalker() {
+ 		try {
+ 			runTest(parseXML(createXML()));
+ 		} catch (StackOverflowError e) {
+ 			System.out.println(e);
+ 		}
+ 	} // end constructor()
+ 
+ 	// Creates a TreeWalker with a simple name filter and loops
+ 	// through each node returned until the document is finished.
+ 	private void runTest(Document doc) {
+ 		// Create TreeWalker with inline filter.
+ 		// The filter simply looks for a node matching the filter node.
+ 		TreeWalker walker = new 
+ 			TreeWalkerImpl(doc, Node.ELEMENT_NODE,
+ 						   new NodeFilter() {
+ 								   protected 
+ String tag[] = FILTER_TAGS_A;
+ 								   
+ 								   public short 
+ acceptNode (Node n) {
+ 									   
+ String testName = n.getNodeName();
+ 									   for 
+ (int i=0; i < tag.length; i++) {
+ 									
+ 	   if (testName.equals(tag[i]))
+ 										
+ 	   return FILTER_ACCEPT;
+ 									   }
+ 									   
+ return FILTER_SKIP;
+ 								   } // end 
+ acceptNode(Node)
+ 								   
+ 							   }, true);
+ 		// Loop until done.
+ 		long startTime;
+ 		long endTime;
+ 		startTime = System.currentTimeMillis();
+ 		Node node = walker.nextNode();
+ 		while (node != null) {
+ 			System.out.println(node.getNodeName());
+ 			node = walker.nextNode();
+ 		}
+ 		endTime = System.currentTimeMillis();
+ 		System.out.println("null");
+ 		double runTime = (double)(endTime - startTime);
+ 		System.out.println("approximately " + (runTime/1000.0) + 
+ 						   " seconds");
+ 	} // end runTest()
+ 
+ 	// Parses given String and returns a Document
+ 	private Document parseXML(String xmlSrc) {
+ 		// Double check accuracy of created xml:
+ 		// System.out.println("Source:\n" + xmlSrc);
+ 		DOMParser parser = new DOMParser();
+ 		InputSource in = new InputSource(new StringReader(xmlSrc));
+ 		try { parser.parse(in);
+ 		} catch (Exception e) {e.printStackTrace();System.exit(1);}
+ 		return parser.getDocument();
+ 	} // end parseXML()
+ 
+ 	// Creates a simple Document based on global parameters.
+ 	// The root has NUM_BRANCH children, each has NUM_TWIG children,
+ 	// and each of those has NUM_LEAF children.
+ 	private String createXML() {
+ 		StringBuffer xmlSrc = new StringBuffer();
+ 		xmlSrc.append("<?xml version=\"1.0\"?>\n");
+ 		xmlSrc.append("<root><rootheader>Root</rootheader>\n");
+ 		for (int x=0; x < NUM_BRANCH; x++) {
+ 			xmlSrc.append
+ ("\t<branch><branchheader>Branch"+x+"</branchheader>\n");
+ 			xmlSrc.append
+ ("\t\t<resource>Resource"+x+"</resource>\n");
+ 			for (int y=0; y<NUM_TWIG; y++) {
+ 				xmlSrc.append
+ ("\t\t<twig><twigheader>Twig"+x+"."+y+
+ 							  "</twigheader>\n");
+ 				for (int z=0; z<NUM_LEAF; z++)
+ 					xmlSrc.append
+ ("\t\t\t<leaf>Leaf"+x+"."+y+"."+z+"</leaf>\n");
+ 				xmlSrc.append("\t\t</twig>\n");
+ 			}
+ 			xmlSrc.append("\t</branch>\n");
+ 		}
+ 		xmlSrc.append("<rootfooter>The End</rootfooter>\n");
+ 		xmlSrc.append("</root>");
+ 		return xmlSrc.toString();
+ 	} // end createXML()
+ 
+ 	// Usage: java TestWalker
+ 	public static void main(String[] args) {
+ 		TestWalker bob = new TestWalker();
+ 	} // end main
+ } // end class TestWalker
+ 
+ >>>>>

---------------------------------------------------------------------
To unsubscribe, e-mail: xerces-j-dev-unsubscribe@xml.apache.org
For additional commands, e-mail: xerces-j-dev-help@xml.apache.org