You are viewing a plain text version of this content. The canonical link for it is here.
Posted to jmeter-dev@jakarta.apache.org by se...@apache.org on 2003/11/26 23:50:14 UTC
cvs commit: jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/parser JTidyHTMLParser.java HtmlParserHTMLParser.java RegexpHTMLParser.java HTMLParser.java

sebb        2003/11/26 14:50:14

  Modified:    src/protocol/http/org/apache/jmeter/protocol/http/parser
                        JTidyHTMLParser.java HtmlParserHTMLParser.java
                        RegexpHTMLParser.java HTMLParser.java
  Log:
  Refactor parsers to store URLs in a Collection, and implement the Set in the parent class
  
  Revision  Changes    Path
  1.4       +17 -20    jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/parser/JTidyHTMLParser.java
  
  Index: JTidyHTMLParser.java
  ===================================================================
  RCS file: /home/cvs/jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/parser/JTidyHTMLParser.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- JTidyHTMLParser.java	26 Nov 2003 02:47:12 -0000	1.3
  +++ JTidyHTMLParser.java	26 Nov 2003 22:50:14 -0000	1.4
  @@ -64,8 +64,6 @@
   import java.util.Arrays;
   import java.util.Collection;
   import java.util.Iterator;
  -import java.util.LinkedHashSet;
  -//import java.util.Set;
   
   import junit.framework.TestCase;
   
  @@ -98,10 +96,9 @@
       /* (non-Javadoc)
        * @see org.apache.jmeter.protocol.http.parser.HTMLParser#getEmbeddedResourceURLs(byte[], java.net.URL)
        */
  -    public Iterator getEmbeddedResourceURLs(byte[] html, URL baseUrl)
  +    public Iterator getEmbeddedResourceURLs(byte[] html, URL baseUrl, Collection urls)
           throws HTMLParseException
       {
  -        LinkedHashSet uniqueURLs= new LinkedHashSet();
   		Document dom = null;
   		try
   		{
  @@ -114,18 +111,18 @@
           
   		// Now parse the DOM tree
   		
  -		scanNodes(dom,uniqueURLs, baseUrl);
  +		scanNodes(dom,urls, baseUrl);
   
  -		return uniqueURLs.iterator();
  +		return urls.iterator();
   	}
   
       /** 
   	 * Scan nodes recursively, looking for embedded resources
   	 * @param node - initial node
  -	 * @param uniqueURLs - container for URLs
  +	 * @param urls - container for URLs
   	 * @param baseUrl - used to create absolute URLs
   	 */
  -	private void scanNodes(Node node, Collection uniqueURLs, URL baseUrl)
  +	private void scanNodes(Node node, Collection urls, URL baseUrl)
   	{
   		if ( node == null ) {
   		  return;
  @@ -138,7 +135,7 @@
   	    switch ( type ) {
   
   	    case Node.DOCUMENT_NODE:
  -		  scanNodes(((Document)node).getDocumentElement(),uniqueURLs,baseUrl);
  +		  scanNodes(((Document)node).getDocumentElement(),urls,baseUrl);
   		  break;
   
   	    case Node.ELEMENT_NODE:
  @@ -160,13 +157,13 @@
   		  
   		  if (name.equalsIgnoreCase("img"))
   		  {
  -		  	addURL(uniqueURLs,getValue(attrs,"src"),baseUrl);
  +		  	addURL(urls,getValue(attrs,"src"),baseUrl);
   			break;
             }
             
   		  if (name.equalsIgnoreCase("applet"))
   		  {
  -		  	addURL(uniqueURLs,getValue(attrs,"code"),baseUrl);
  +		  	addURL(urls,getValue(attrs,"code"),baseUrl);
   			  break;
   			}
   			if (name.equalsIgnoreCase("input"))
  @@ -174,18 +171,18 @@
   				String src=getValue(attrs,"src");
   				String typ=getValue(attrs,"type");
   				if ((src!=null) &&(typ.equalsIgnoreCase("image")) ){ 
  -					addURL(uniqueURLs,src,baseUrl);
  +					addURL(urls,src,baseUrl);
   				}
   			  break;
   			}
   			if (name.equalsIgnoreCase("link"))
   			{
  -				addURL(uniqueURLs,getValue(attrs,"href"),baseUrl);
  +				addURL(urls,getValue(attrs,"href"),baseUrl);
   			  break;
   			}
   			String back=getValue(attrs,"background");
   			if (back != null){
  -				addURL(uniqueURLs,back,baseUrl);
  +				addURL(urls,back,baseUrl);
   				break;
   			}
   
  @@ -193,7 +190,7 @@
   		  if ( children != null ) {
   			 int len = children.getLength();
   			 for ( int i = 0; i < len; i++ ) {
  -				scanNodes(children.item(i),uniqueURLs,baseUrl);
  +				scanNodes(children.item(i),urls,baseUrl);
   			 }
   		  }
   		  break;
  @@ -221,23 +218,23 @@
   
       /*
        * Helper method to create and add a URL, if non-null
  -     * @param uniqueURLs - set
  +     * @param urls - set
        * @param url - may be null
        * @param baseUrl
        */
  -    private void addURL(Collection uniqueURLs, String url, URL baseUrl)
  +    private void addURL(Collection urls, String url, URL baseUrl)
       {
       	if (url == null) return;
       	boolean b=false;
   		try
   		{
  -			b=uniqueURLs.add(new URL(baseUrl, url));
  +			b=urls.add(new URL(baseUrl, url));
   		}
   		catch(MalformedURLException mfue)
   		{
   			// Can't build the URL. May be a site error: return
   			// the string.
  -			b=uniqueURLs.add(url);
  +			b=urls.add(url);
   		}
   		if (b) {
   			log.debug("Added   "+url);
  
  
  
  1.3       +6 -10     jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser.java
  
  Index: HtmlParserHTMLParser.java
  ===================================================================
  RCS file: /home/cvs/jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- HtmlParserHTMLParser.java	25 Nov 2003 22:18:58 -0000	1.2
  +++ HtmlParserHTMLParser.java	26 Nov 2003 22:50:14 -0000	1.3
  @@ -61,9 +61,8 @@
   import java.io.StringReader;
   import java.net.MalformedURLException;
   import java.net.URL;
  +import java.util.Collection;
   import java.util.Iterator;
  -import java.util.LinkedHashSet;
  -import java.util.Set;
   
   import junit.framework.TestCase;
   
  @@ -86,7 +85,7 @@
       /* (non-Javadoc)
        * @see org.apache.jmeter.protocol.http.parser.HtmlParser#getEmbeddedResourceURLs(byte[], java.net.URL)
        */
  -    public Iterator getEmbeddedResourceURLs(byte[] html, URL baseUrl)
  +    public Iterator getEmbeddedResourceURLs(byte[] html, URL baseUrl, Collection urls)
           throws HTMLParseException
       {
           Parser htmlParser= null;
  @@ -105,9 +104,6 @@
   
           // Now parse the DOM tree
   
  -        // This is used to ignore duplicated binary files.
  -        Set uniqueURLs= new LinkedHashSet();
  -
           // look for applets
   
           // This will only work with an Applet .class file.
  @@ -169,13 +165,13 @@
   
                   try
                   {
  -                    uniqueURLs.add(new URL(baseUrl, binUrlStr));
  +                    urls.add(new URL(baseUrl, binUrlStr));
                   }
                   catch (MalformedURLException mfue)
                   {
                       // Can't build the URL? May be a site error: return the
                       // string.
  -                    uniqueURLs.add(binUrlStr);
  +                    urls.add(binUrlStr);
                   }
               }
               log.debug("End   : NewHTTPSamplerFull parseNodes");
  @@ -184,7 +180,7 @@
           {
           }
   
  -        return uniqueURLs.iterator();
  +        return urls.iterator();
       }
   
       /**
  
  
  
  1.4       +6 -13     jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/parser/RegexpHTMLParser.java
  
  Index: RegexpHTMLParser.java
  ===================================================================
  RCS file: /home/cvs/jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/parser/RegexpHTMLParser.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- RegexpHTMLParser.java	26 Nov 2003 01:04:14 -0000	1.3
  +++ RegexpHTMLParser.java	26 Nov 2003 22:50:14 -0000	1.4
  @@ -59,8 +59,7 @@
   
   import java.net.MalformedURLException;
   import java.net.URL;
  -import java.util.Set;
  -import java.util.LinkedHashSet;
  +import java.util.Collection;
   import java.util.Iterator;
   
   import junit.framework.TestCase;
  @@ -190,14 +189,8 @@
       /* (non-Javadoc)
        * @see org.apache.jmeter.protocol.http.parser.HtmlParser#getEmbeddedResourceURLs(byte[], java.net.URL)
        */
  -    public Iterator getEmbeddedResourceURLs(byte[] html, URL baseUrl)
  +    public Iterator getEmbeddedResourceURLs(byte[] html, URL baseUrl, Collection urls)
       {
  -        // This is used to ignore duplicated binary files.
  -        // Using a LinkedHashSet to avoid unnecessary overhead in iterating
  -        // the elements in the set later on. As a side-effect, this will keep
  -        // them roughly in order, which should be a better model of browser
  -        // behaviour.
  -        Set uniqueURLs= new LinkedHashSet();
   
           Perl5Matcher matcher= (Perl5Matcher)localMatcher.get();
           PatternMatcherInput input= (PatternMatcherInput)localInput.get();
  @@ -249,7 +242,7 @@
                   {
                       try
                       {
  -                        uniqueURLs.add(new URL(baseUrl, s));
  +                        urls.add(new URL(baseUrl, s));
                       }
                       catch (MalformedURLException e)
                       {
  @@ -263,12 +256,12 @@
                                       + " in page "
                                       + baseUrl);
                           }
  -                        uniqueURLs.add(s);
  +                        urls.add(s);
                       }
                   }
               }
           }
  -        return uniqueURLs.iterator();
  +        return urls.iterator();
       }
       
       public static class Test extends TestCase
  
  
  
  1.4       +33 -3     jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/parser/HTMLParser.java
  
  Index: HTMLParser.java
  ===================================================================
  RCS file: /home/cvs/jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/parser/HTMLParser.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- HTMLParser.java	26 Nov 2003 02:47:42 -0000	1.3
  +++ HTMLParser.java	26 Nov 2003 22:50:14 -0000	1.4
  @@ -61,7 +61,9 @@
   import java.io.FileInputStream;
   import java.net.URL;
   import java.util.Arrays;
  +import java.util.Collection;
   import java.util.Iterator;
  +import java.util.LinkedHashSet;
   
   import junit.framework.TestCase;
   
  @@ -145,8 +147,36 @@
        * @param url  Base URL from which the HTML code was obtained
        * @return an Iterator for the resource URLs 
        */
  -    public abstract Iterator getEmbeddedResourceURLs(byte[] html, URL baseUrl)
  -        throws HTMLParseException;
  +    public Iterator getEmbeddedResourceURLs(byte[] html, URL baseUrl)
  +        throws HTMLParseException
  +        {    
  +        	// The Set is used to ignore duplicated binary files.
  +			// Using a LinkedHashSet to avoid unnecessary overhead in iterating
  +			// the elements in the set later on. As a side-effect, this will keep
  +			// them roughly in order, which should be a better model of browser
  +			// behaviour.
  +        	return getEmbeddedResourceURLs(html, baseUrl,new LinkedHashSet());
  +        }
  +
  +	/**
  +	 * Get the URLs for all the resources that a browser would automatically
  +	 * download following the download of the HTML content, that is: images,
  +	 * stylesheets, javascript files, applets, etc...
  +	 * <p>
  +	 * All URLs should be added to the Collection.
  +	 * <p>
  +	 * Malformed URLs can be reported to the caller by having the Iterator
  +	 * return the corresponding RL String. Overall problems parsing the html
  +	 * should be reported by throwing an HTMLParseException. 
  +	 * 
  +	 * @param html HTML code
  +	 * @param url  Base URL from which the HTML code was obtained
  +	 * @param coll Collection
  +	 * @return an Iterator for the resource URLs 
  +	 */
  +	public abstract Iterator getEmbeddedResourceURLs(byte[] html, URL baseUrl,
  +	                                                  Collection coll)
  +		throws HTMLParseException;
   
       public static class HTMLParserTest extends TestCase
       {
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: jmeter-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: jmeter-dev-help@jakarta.apache.org