You are viewing a plain text version of this content. The canonical link for it is here.
Posted to jmeter-dev@jakarta.apache.org by se...@apache.org on 2003/11/26 23:50:14 UTC
cvs commit: jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/parser JTidyHTMLParser.java HtmlParserHTMLParser.java RegexpHTMLParser.java HTMLParser.java
sebb 2003/11/26 14:50:14
Modified: src/protocol/http/org/apache/jmeter/protocol/http/parser
JTidyHTMLParser.java HtmlParserHTMLParser.java
RegexpHTMLParser.java HTMLParser.java
Log:
Refactor parsers to store URLs in a Collection, and implement the Set in the parent class
Revision Changes Path
1.4 +17 -20 jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/parser/JTidyHTMLParser.java
Index: JTidyHTMLParser.java
===================================================================
RCS file: /home/cvs/jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/parser/JTidyHTMLParser.java,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- JTidyHTMLParser.java 26 Nov 2003 02:47:12 -0000 1.3
+++ JTidyHTMLParser.java 26 Nov 2003 22:50:14 -0000 1.4
@@ -64,8 +64,6 @@
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
-import java.util.LinkedHashSet;
-//import java.util.Set;
import junit.framework.TestCase;
@@ -98,10 +96,9 @@
/* (non-Javadoc)
* @see org.apache.jmeter.protocol.http.parser.HTMLParser#getEmbeddedResourceURLs(byte[], java.net.URL)
*/
- public Iterator getEmbeddedResourceURLs(byte[] html, URL baseUrl)
+ public Iterator getEmbeddedResourceURLs(byte[] html, URL baseUrl, Collection urls)
throws HTMLParseException
{
- LinkedHashSet uniqueURLs= new LinkedHashSet();
Document dom = null;
try
{
@@ -114,18 +111,18 @@
// Now parse the DOM tree
- scanNodes(dom,uniqueURLs, baseUrl);
+ scanNodes(dom,urls, baseUrl);
- return uniqueURLs.iterator();
+ return urls.iterator();
}
/**
* Scan nodes recursively, looking for embedded resources
* @param node - initial node
- * @param uniqueURLs - container for URLs
+ * @param urls - container for URLs
* @param baseUrl - used to create absolute URLs
*/
- private void scanNodes(Node node, Collection uniqueURLs, URL baseUrl)
+ private void scanNodes(Node node, Collection urls, URL baseUrl)
{
if ( node == null ) {
return;
@@ -138,7 +135,7 @@
switch ( type ) {
case Node.DOCUMENT_NODE:
- scanNodes(((Document)node).getDocumentElement(),uniqueURLs,baseUrl);
+ scanNodes(((Document)node).getDocumentElement(),urls,baseUrl);
break;
case Node.ELEMENT_NODE:
@@ -160,13 +157,13 @@
if (name.equalsIgnoreCase("img"))
{
- addURL(uniqueURLs,getValue(attrs,"src"),baseUrl);
+ addURL(urls,getValue(attrs,"src"),baseUrl);
break;
}
if (name.equalsIgnoreCase("applet"))
{
- addURL(uniqueURLs,getValue(attrs,"code"),baseUrl);
+ addURL(urls,getValue(attrs,"code"),baseUrl);
break;
}
if (name.equalsIgnoreCase("input"))
@@ -174,18 +171,18 @@
String src=getValue(attrs,"src");
String typ=getValue(attrs,"type");
if ((src!=null) &&(typ.equalsIgnoreCase("image")) ){
- addURL(uniqueURLs,src,baseUrl);
+ addURL(urls,src,baseUrl);
}
break;
}
if (name.equalsIgnoreCase("link"))
{
- addURL(uniqueURLs,getValue(attrs,"href"),baseUrl);
+ addURL(urls,getValue(attrs,"href"),baseUrl);
break;
}
String back=getValue(attrs,"background");
if (back != null){
- addURL(uniqueURLs,back,baseUrl);
+ addURL(urls,back,baseUrl);
break;
}
@@ -193,7 +190,7 @@
if ( children != null ) {
int len = children.getLength();
for ( int i = 0; i < len; i++ ) {
- scanNodes(children.item(i),uniqueURLs,baseUrl);
+ scanNodes(children.item(i),urls,baseUrl);
}
}
break;
@@ -221,23 +218,23 @@
/*
* Helper method to create and add a URL, if non-null
- * @param uniqueURLs - set
+ * @param urls - set
* @param url - may be null
* @param baseUrl
*/
- private void addURL(Collection uniqueURLs, String url, URL baseUrl)
+ private void addURL(Collection urls, String url, URL baseUrl)
{
if (url == null) return;
boolean b=false;
try
{
- b=uniqueURLs.add(new URL(baseUrl, url));
+ b=urls.add(new URL(baseUrl, url));
}
catch(MalformedURLException mfue)
{
// Can't build the URL. May be a site error: return
// the string.
- b=uniqueURLs.add(url);
+ b=urls.add(url);
}
if (b) {
log.debug("Added "+url);
1.3 +6 -10 jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser.java
Index: HtmlParserHTMLParser.java
===================================================================
RCS file: /home/cvs/jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser.java,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- HtmlParserHTMLParser.java 25 Nov 2003 22:18:58 -0000 1.2
+++ HtmlParserHTMLParser.java 26 Nov 2003 22:50:14 -0000 1.3
@@ -61,9 +61,8 @@
import java.io.StringReader;
import java.net.MalformedURLException;
import java.net.URL;
+import java.util.Collection;
import java.util.Iterator;
-import java.util.LinkedHashSet;
-import java.util.Set;
import junit.framework.TestCase;
@@ -86,7 +85,7 @@
/* (non-Javadoc)
* @see org.apache.jmeter.protocol.http.parser.HtmlParser#getEmbeddedResourceURLs(byte[], java.net.URL)
*/
- public Iterator getEmbeddedResourceURLs(byte[] html, URL baseUrl)
+ public Iterator getEmbeddedResourceURLs(byte[] html, URL baseUrl, Collection urls)
throws HTMLParseException
{
Parser htmlParser= null;
@@ -105,9 +104,6 @@
// Now parse the DOM tree
- // This is used to ignore duplicated binary files.
- Set uniqueURLs= new LinkedHashSet();
-
// look for applets
// This will only work with an Applet .class file.
@@ -169,13 +165,13 @@
try
{
- uniqueURLs.add(new URL(baseUrl, binUrlStr));
+ urls.add(new URL(baseUrl, binUrlStr));
}
catch (MalformedURLException mfue)
{
// Can't build the URL? May be a site error: return the
// string.
- uniqueURLs.add(binUrlStr);
+ urls.add(binUrlStr);
}
}
log.debug("End : NewHTTPSamplerFull parseNodes");
@@ -184,7 +180,7 @@
{
}
- return uniqueURLs.iterator();
+ return urls.iterator();
}
/**
1.4 +6 -13 jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/parser/RegexpHTMLParser.java
Index: RegexpHTMLParser.java
===================================================================
RCS file: /home/cvs/jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/parser/RegexpHTMLParser.java,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- RegexpHTMLParser.java 26 Nov 2003 01:04:14 -0000 1.3
+++ RegexpHTMLParser.java 26 Nov 2003 22:50:14 -0000 1.4
@@ -59,8 +59,7 @@
import java.net.MalformedURLException;
import java.net.URL;
-import java.util.Set;
-import java.util.LinkedHashSet;
+import java.util.Collection;
import java.util.Iterator;
import junit.framework.TestCase;
@@ -190,14 +189,8 @@
/* (non-Javadoc)
* @see org.apache.jmeter.protocol.http.parser.HtmlParser#getEmbeddedResourceURLs(byte[], java.net.URL)
*/
- public Iterator getEmbeddedResourceURLs(byte[] html, URL baseUrl)
+ public Iterator getEmbeddedResourceURLs(byte[] html, URL baseUrl, Collection urls)
{
- // This is used to ignore duplicated binary files.
- // Using a LinkedHashSet to avoid unnecessary overhead in iterating
- // the elements in the set later on. As a side-effect, this will keep
- // them roughly in order, which should be a better model of browser
- // behaviour.
- Set uniqueURLs= new LinkedHashSet();
Perl5Matcher matcher= (Perl5Matcher)localMatcher.get();
PatternMatcherInput input= (PatternMatcherInput)localInput.get();
@@ -249,7 +242,7 @@
{
try
{
- uniqueURLs.add(new URL(baseUrl, s));
+ urls.add(new URL(baseUrl, s));
}
catch (MalformedURLException e)
{
@@ -263,12 +256,12 @@
+ " in page "
+ baseUrl);
}
- uniqueURLs.add(s);
+ urls.add(s);
}
}
}
}
- return uniqueURLs.iterator();
+ return urls.iterator();
}
public static class Test extends TestCase
1.4 +33 -3 jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/parser/HTMLParser.java
Index: HTMLParser.java
===================================================================
RCS file: /home/cvs/jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/parser/HTMLParser.java,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- HTMLParser.java 26 Nov 2003 02:47:42 -0000 1.3
+++ HTMLParser.java 26 Nov 2003 22:50:14 -0000 1.4
@@ -61,7 +61,9 @@
import java.io.FileInputStream;
import java.net.URL;
import java.util.Arrays;
+import java.util.Collection;
import java.util.Iterator;
+import java.util.LinkedHashSet;
import junit.framework.TestCase;
@@ -145,8 +147,36 @@
* @param url Base URL from which the HTML code was obtained
* @return an Iterator for the resource URLs
*/
- public abstract Iterator getEmbeddedResourceURLs(byte[] html, URL baseUrl)
- throws HTMLParseException;
+ public Iterator getEmbeddedResourceURLs(byte[] html, URL baseUrl)
+ throws HTMLParseException
+ {
+ // The Set is used to ignore duplicated binary files.
+ // Using a LinkedHashSet to avoid unnecessary overhead in iterating
+ // the elements in the set later on. As a side-effect, this will keep
+ // them roughly in order, which should be a better model of browser
+ // behaviour.
+ return getEmbeddedResourceURLs(html, baseUrl,new LinkedHashSet());
+ }
+
+ /**
+ * Get the URLs for all the resources that a browser would automatically
+ * download following the download of the HTML content, that is: images,
+ * stylesheets, javascript files, applets, etc...
+ * <p>
+ * All URLs should be added to the Collection.
+ * <p>
+ * Malformed URLs can be reported to the caller by having the Iterator
+ * return the corresponding RL String. Overall problems parsing the html
+ * should be reported by throwing an HTMLParseException.
+ *
+ * @param html HTML code
+ * @param url Base URL from which the HTML code was obtained
+ * @param coll Collection
+ * @return an Iterator for the resource URLs
+ */
+ public abstract Iterator getEmbeddedResourceURLs(byte[] html, URL baseUrl,
+ Collection coll)
+ throws HTMLParseException;
public static class HTMLParserTest extends TestCase
{
---------------------------------------------------------------------
To unsubscribe, e-mail: jmeter-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: jmeter-dev-help@jakarta.apache.org