You are viewing a plain text version of this content. The canonical link for it is here.
Posted to jmeter-dev@jakarta.apache.org by js...@apache.org on 2003/11/25 16:32:38 UTC
cvs commit: jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/parser JTidyHTMLParser.java HtmlParserHTMLParser.java HTMLParser.java HTMLParseException.java RegexpHTMLParser.java HtmlParsingUtils.java HtmlParser.java
jsalvata 2003/11/25 07:32:38
Modified: src/protocol/http/org/apache/jmeter/protocol/http/sampler
HTTPSamplerFull.java HTTPSampler.java
Added: src/protocol/http/org/apache/jmeter/protocol/http/parser
JTidyHTMLParser.java HtmlParserHTMLParser.java
HTMLParser.java HTMLParseException.java
RegexpHTMLParser.java HtmlParsingUtils.java
Removed: src/protocol/http/org/apache/jmeter/protocol/http/sampler
ParseJTidy.java ParseRegexp.java
ParseHtmlParser.java
src/protocol/http/org/apache/jmeter/protocol/http/parser
HtmlParser.java
Log:
Refactored HTMLParser code. Added test cases.
Revision Changes Path
1.20 +164 -102 jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/sampler/HTTPSamplerFull.java
Index: HTTPSamplerFull.java
===================================================================
RCS file: /home/cvs/jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/sampler/HTTPSamplerFull.java,v
retrieving revision 1.19
retrieving revision 1.20
diff -u -r1.19 -r1.20
--- HTTPSamplerFull.java 24 Nov 2003 01:11:40 -0000 1.19
+++ HTTPSamplerFull.java 25 Nov 2003 15:32:38 -0000 1.20
@@ -57,13 +57,15 @@
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.HttpURLConnection;
+import java.net.MalformedURLException;
import java.net.URL;
+import java.util.Iterator;
import junit.framework.TestCase;
+import org.apache.jmeter.protocol.http.parser.*;
import org.apache.jmeter.samplers.Entry;
import org.apache.jmeter.samplers.SampleResult;
-import org.apache.jmeter.util.JMeterUtils;
import org.apache.jorphan.logging.LoggingManager;
import org.apache.log.Logger;
@@ -109,12 +111,13 @@
*
* @author Khor Soon Hin
* @author <a href="mailto:mramshaw@alumni.concordia.ca">Martin Ramshaw</a>
+ * @author <a href="mailto:jsalvata@apache.org">Jordi Salvat i Alabart</a>
* @version $Id$
*/
public class HTTPSamplerFull
{
/** Used to store the Logger (used for debug and error messages). */
- transient private static Logger log = LoggingManager.getLoggerForClass();
+ transient private static Logger log= LoggingManager.getLoggerForClass();
/**
* Used to store the UTF encoding name (which is version dependent).
@@ -122,11 +125,6 @@
*/
protected static String utfEncodingName;
- private static boolean parseJTidy =
- JMeterUtils.getPropDefault("parser.jtidy",false);
- private static boolean parseRegexp =
- JMeterUtils.getPropDefault("parser.regexp",false);
-
/**
* This is the only Constructor.
*/
@@ -152,14 +150,14 @@
{
// Sample the container page.
log.debug("Start : HTTPSamplerFull sample");
- SampleResult res = sampler.sample(new Entry());
- if(log.isDebugEnabled())
+ SampleResult res= sampler.sample(new Entry());
+ if (log.isDebugEnabled())
{
log.debug("Main page loading time - " + res.getTime());
}
// Now parse the HTML page
- return parseForImages(res,sampler);
+ return downloadEmbeddedResources(res, sampler);
}
/**
@@ -167,20 +165,74 @@
* @param sampler - the HTTP sampler
* @return the sample result, with possible additional sub results
*/
- protected SampleResult parseForImages(SampleResult res, HTTPSampler sampler)
+ protected SampleResult downloadEmbeddedResources(
+ SampleResult res,
+ HTTPSampler sampler)
{
- if (parseJTidy){
- log.info("Using JTidy");
- return ParseJTidy.parseForImages(res,sampler);
- }
- else if (parseRegexp) {
- log.info("Using Regexp-based HTML parsing");
- return ParseRegexp.parseForImages(res, sampler);
- }
- else {
- log.info("Using HtmlParser");
- return ParseHtmlParser.parseForImages(res,sampler);
+ Iterator urls;
+ try
+ {
+ urls=
+ HTMLParser.getParser().getEmbeddedResourceURLs(
+ res.getResponseData(),
+ sampler.getUrl());
+ }
+ catch (MalformedURLException e)
+ {
+ // If we're downloading the resources dragged in by the HTML
+ // page, I can't see how the URL could be malformed!
+ log.error("Program error: can't get sampler URL", e);
+ throw new Error(e);
+ }
+ catch (HTMLParseException e)
+ {
+ res.setResponseData(e.toString().getBytes());
+ res.setResponseCode(HTTPSampler.NON_HTTP_RESPONSE_CODE);
+ res.setResponseMessage(HTTPSampler.NON_HTTP_RESPONSE_MESSAGE);
+ res.setSuccessful(false);
+ return res;
+ }
+
+ // Iterate through the URLs and download each image:
+ while (urls.hasNext())
+ {
+ SampleResult binRes= new SampleResult();
+ Object url= urls.next();
+ binRes.setSampleLabel(url.toString());
+ try
+ {
+ HTTPSamplerFull.loadBinary((URL)url, binRes, sampler);
+ }
+ catch (ClassCastException e)
+ {
+ binRes.setResponseData(e.toString().getBytes());
+ binRes.setResponseCode(HTTPSampler.NON_HTTP_RESPONSE_CODE);
+ binRes.setResponseMessage(
+ HTTPSampler.NON_HTTP_RESPONSE_MESSAGE);
+ binRes.setSuccessful(false);
+ continue;
+ }
+ catch (Exception ioe)
+ {
+ log.error("Error reading from URL - " + ioe);
+ binRes.setResponseData(ioe.toString().getBytes());
+ binRes.setResponseCode(HTTPSampler.NON_HTTP_RESPONSE_CODE);
+ binRes.setResponseMessage(
+ HTTPSampler.NON_HTTP_RESPONSE_MESSAGE);
+ binRes.setSuccessful(false);
+ }
+
+ log.debug("Adding result");
+ res.addSubResult(binRes);
+ res.setTime(res.getTime() + binRes.getTime());
+ }
+
+ // Okay, we're all done now
+ if (log.isDebugEnabled())
+ {
+ log.debug("Total time - " + res.getTime());
}
+ return res;
}
/**
@@ -192,23 +244,26 @@
*
* @throws IOException indicates a problem reading from the URL
*/
- protected static byte[] loadBinary(URL url, SampleResult res, HTTPSampler sampler)
+ protected static byte[] loadBinary(
+ URL url,
+ SampleResult res,
+ HTTPSampler sampler)
throws Exception
{
log.debug("Start : loadBinary");
- byte[] ret = new byte[0];
+ byte[] ret= new byte[0];
res.setSamplerData(new HTTPSampler(url).toString());
HttpURLConnection conn;
try
{
- conn = sampler.setupConnection(url, HTTPSampler.GET,res);
+ conn= sampler.setupConnection(url, HTTPSampler.GET, res);
sampler.connect();
}
- catch(Exception ioe)
+ catch (Exception ioe)
{
// don't do anything 'cos presumably the connection will return the
// correct http response codes
- if(log.isDebugEnabled())
+ if (log.isDebugEnabled())
{
log.debug("loadBinary : error in setupConnection " + ioe);
}
@@ -217,19 +272,19 @@
try
{
- long time = System.currentTimeMillis();
- if(log.isDebugEnabled())
+ long time= System.currentTimeMillis();
+ if (log.isDebugEnabled())
{
log.debug("loadBinary : start time - " + time);
}
- int errorLevel = getErrorLevel(conn, res);
+ int errorLevel= getErrorLevel(conn, res);
if (errorLevel == 2)
{
- ret = sampler.readResponse(conn);
- res.setContentType(conn.getHeaderField("Content-type"));
+ ret= sampler.readResponse(conn);
+ res.setContentType(conn.getHeaderField("Content-type"));
res.setSuccessful(true);
- long endTime = System.currentTimeMillis();
- if(log.isDebugEnabled())
+ long endTime= System.currentTimeMillis();
+ if (log.isDebugEnabled())
{
log.debug("loadBinary : end time - " + endTime);
}
@@ -238,16 +293,15 @@
else
{
res.setSuccessful(false);
- int responseCode =
- ((HttpURLConnection)conn).getResponseCode();
- String responseMsg =
- ((HttpURLConnection)conn).getResponseMessage();
+ int responseCode= ((HttpURLConnection)conn).getResponseCode();
+ String responseMsg=
+ ((HttpURLConnection)conn).getResponseMessage();
log.error("loadBinary : failed code - " + responseCode);
log.error("loadBinary : failed message - " + responseMsg);
}
- if(log.isDebugEnabled())
+ if (log.isDebugEnabled())
{
- log.debug("loadBinary : binary - " + ret[0]+ret[1]);
+ log.debug("loadBinary : binary - " + ret[0] + ret[1]);
log.debug("loadBinary : loadTime - " + res.getTime());
}
log.debug("End : loadBinary");
@@ -266,7 +320,7 @@
// when its timeout period is reached.
sampler.disconnect(conn);
}
- catch(Exception e)
+ catch (Exception e)
{
}
}
@@ -280,33 +334,35 @@
* @param res where all results of sampling will be stored
* @return HTTP response code divided by 100
*/
- protected static int getErrorLevel(HttpURLConnection conn, SampleResult res)
+ protected static int getErrorLevel(
+ HttpURLConnection conn,
+ SampleResult res)
{
log.debug("Start : getErrorLevel");
- int errorLevel = 2;
+ int errorLevel= 2;
try
{
- int responseCode =
- ((HttpURLConnection) conn).getResponseCode();
- String responseMessage =
- ((HttpURLConnection) conn).getResponseMessage();
- errorLevel = responseCode/100;
+ int responseCode= ((HttpURLConnection)conn).getResponseCode();
+ String responseMessage=
+ ((HttpURLConnection)conn).getResponseMessage();
+ errorLevel= responseCode / 100;
res.setResponseCode(String.valueOf(responseCode));
res.setResponseMessage(responseMessage);
- if(log.isDebugEnabled())
+ if (log.isDebugEnabled())
{
- log.debug("getErrorLevel : responseCode - " +
- responseCode);
- log.debug("getErrorLevel : responseMessage - " +
- responseMessage);
+ log.debug("getErrorLevel : responseCode - " + responseCode);
+ log.debug(
+ "getErrorLevel : responseMessage - " + responseMessage);
}
}
catch (Exception e2)
{
log.error("getErrorLevel : " + conn.getHeaderField(0));
log.error("getErrorLevel : " + conn.getHeaderFieldKey(0));
- log.error("getErrorLevel : " +
- "Error getting response code for HttpUrlConnection - ",e2);
+ log.error(
+ "getErrorLevel : "
+ + "Error getting response code for HttpUrlConnection - ",
+ e2);
res.setResponseData(e2.toString().getBytes());
res.setResponseCode(HTTPSampler.NON_HTTP_RESPONSE_CODE);
res.setResponseMessage(HTTPSampler.NON_HTTP_RESPONSE_MESSAGE);
@@ -328,21 +384,21 @@
log.debug("Start : getUTFEncodingName");
if (utfEncodingName == null)
{
- String versionNum = System.getProperty( "java.version" );
- if(log.isDebugEnabled())
+ String versionNum= System.getProperty("java.version");
+ if (log.isDebugEnabled())
{
log.debug("getUTFEncodingName : version = " + versionNum);
}
- if (versionNum.startsWith( "1.1" ))
+ if (versionNum.startsWith("1.1"))
{
- utfEncodingName = "UTF8";
+ utfEncodingName= "UTF8";
}
else
{
- utfEncodingName = "UTF-8";
+ utfEncodingName= "UTF-8";
}
}
- if(log.isDebugEnabled())
+ if (log.isDebugEnabled())
{
log.debug("getUTFEncodingName : Encoding = " + utfEncodingName);
}
@@ -354,7 +410,7 @@
{
private HTTPSampler hsf;
- transient private static Logger log = LoggingManager.getLoggerForClass();
+ transient private static Logger log= LoggingManager.getLoggerForClass();
public Test(String name)
{
@@ -364,7 +420,7 @@
protected void setUp()
{
log.debug("Start : setUp1");
- hsf = new HTTPSampler();
+ hsf= new HTTPSampler();
hsf.setMethod(HTTPSampler.GET);
hsf.setProtocol("file");
hsf.setPath("HTTPSamplerFullTestFile.txt");
@@ -375,12 +431,12 @@
public void testGetUTFEncodingName()
{
log.debug("Start : testGetUTFEncodingName");
- String javaVersion = System.getProperty("java.version");
+ String javaVersion= System.getProperty("java.version");
System.setProperty("java.version", "1.1");
assertEquals("UTF8", HTTPSamplerFull.getUTFEncodingName());
// need to clear utfEncodingName variable first 'cos
// getUTFEncodingName checks to see if it's null
- utfEncodingName = null;
+ utfEncodingName= null;
System.setProperty("java.version", "1.2");
assertEquals("UTF-8", HTTPSamplerFull.getUTFEncodingName());
System.setProperty("java.version", javaVersion);
@@ -404,47 +460,53 @@
log.debug("Start : testSampleMain");
// !ToDo : Have to wait till the day SampleResult is extended to
// store results of all downloaded stuff e.g. images, applets etc
- String fileInput = "<html>\n\n" +
- "<title>\n" +
- " A simple applet\n" +
- "</title>\n" +
- "<body background=\"back.jpg\" vlink=\"#dd0000\" "+
- "link=\"#0000ff\">\n" +
- "<center>\n" +
- "<h2> A simple applet\n" +
- "</h2>\n" +
- "<br>\n" +
- "<br>\n" +
- "<table>\n" +
- "<td width = 20>\n" +
- "<td width = 500 align = left>\n" +
- "<img src=\"/tomcat.gif\">\n" +
- "<img src=\"/tomcat.gif\">\n" +
- "<a href=\"NervousText.java\"> Read my code <a>\n" +
- "<p><applet code=NervousText.class width=400 " +
"height=200>\n" +
- "</applet>\n" +
- "<p><applet code=NervousText.class width=400 " +
"height=200>\n" +
- "</applet>\n" +
- "</table>\n" +
- "<form>\n" +
- " <input type=\"image\" src=\"/tomcat-power.gif\">\n" +
- "</form>\n" +
- "<form>\n" +
- " <input type=\"image\" src=\"/tomcat-power.gif\">\n" +
- "</form>\n" +
- "</body>\n" +
- "</html>\n";
- byte[] bytes = fileInput.getBytes();
+ String fileInput=
+ "<html>\n\n"
+ + "<title>\n"
+ + " A simple applet\n"
+ + "</title>\n"
+ + "<body background=\"back.jpg\" vlink=\"#dd0000\" "
+ + "link=\"#0000ff\">\n"
+ + "<center>\n"
+ + "<h2> A simple applet\n"
+ + "</h2>\n"
+ + "<br>\n"
+ + "<br>\n"
+ + "<table>\n"
+ + "<td width = 20>\n"
+ + "<td width = 500 align = left>\n"
+ + "<img src=\"/tomcat.gif\">\n"
+ + "<img src=\"/tomcat.gif\">\n"
+ + "<a href=\"NervousText.java\"> Read my code <a>\n"
+ + "<p><applet code=NervousText.class width=400 "
+ + "height=200>\n"
+ + "</applet>\n"
+ + "<p><applet code=NervousText.class width=400 "
+ + "height=200>\n"
+ + "</applet>\n"
+ + "</table>\n"
+ + "<form>\n"
+ + " <input type=\"image\" src=\"/tomcat-power.gif\">\n"
+ + "</form>\n"
+ + "<form>\n"
+ + " <input type=\"image\" src=\"/tomcat-power.gif\">\n"
+ + "</form>\n"
+ + "</body>\n"
+ + "</html>\n";
+ byte[] bytes= fileInput.getBytes();
try
{
- FileOutputStream fos =
- new FileOutputStream("HTTPSamplerFullTestFile.txt");
+ FileOutputStream fos=
+ new FileOutputStream("HTTPSamplerFullTestFile.txt");
fos.write(bytes);
fos.close();
}
- catch(IOException ioe)
+ catch (IOException ioe)
{
- fail("Cannot create HTTPSamplerFullTestFile.txt in current " +
"directory for testing - " + ioe);
+ fail(
+ "Cannot create HTTPSamplerFullTestFile.txt in current "
+ + "directory for testing - "
+ + ioe);
}
// !ToDo
// hsf.sample(entry);
@@ -455,7 +517,7 @@
protected void tearDown()
{
log.debug("Start : tearDown");
- hsf = null;
+ hsf= null;
log.debug("End : tearDown");
}
}
1.61 +2 -2 jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/sampler/HTTPSampler.java
Index: HTTPSampler.java
===================================================================
RCS file: /home/cvs/jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/sampler/HTTPSampler.java,v
retrieving revision 1.60
retrieving revision 1.61
diff -u -r1.60 -r1.61
--- HTTPSampler.java 22 Nov 2003 03:26:36 -0000 1.60
+++ HTTPSampler.java 25 Nov 2003 15:32:38 -0000 1.61
@@ -1150,7 +1150,7 @@
{
imageSampler = new HTTPSamplerFull();
}
- res = imageSampler.parseForImages(res, this);
+ res = imageSampler.downloadEmbeddedResources(res, this);
}
return res;
}
1.1 jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/parser/JTidyHTMLParser.java
Index: JTidyHTMLParser.java
===================================================================
/*
* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2001-2003 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" and
* "Apache JMeter" must not be used to endorse or promote products
* derived from this software without prior written permission. For
* written permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* "Apache JMeter", nor may "Apache" appear in their name, without
* prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*
* @author TBA
* @author <a href="mailto:jsalvata@apache.org">Jordi Salvat i Alabart</a>
* @version $Id: JTidyHTMLParser.java,v 1.1 2003/11/25 15:32:38 jsalvata Exp $
*/
package org.apache.jmeter.protocol.http.parser;
import java.io.ByteArrayInputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.Set;
import junit.framework.TestCase;
import org.apache.jorphan.logging.LoggingManager;
import org.apache.log.Logger;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.tidy.Tidy;
import org.xml.sax.SAXException;
/**
* HtmlParser implementation using JTidy.
*/
class JTidyHTMLParser extends HTMLParser
{
/** Used to store the Logger (used for debug and error messages). */
transient private static Logger log = LoggingManager.getLoggerForClass();
/**
* This is a singleton class
*/
private JTidyHTMLParser()
{
super();
}
/* (non-Javadoc)
* @see org.apache.jmeter.protocol.http.parser.HTMLParser#getEmbeddedResourceURLs(byte[], java.net.URL)
*/
public Iterator getEmbeddedResourceURLs(byte[] html, URL baseUrl)
throws HTMLParseException
{
LinkedHashSet uniqueURLs= new LinkedHashSet();
Document dom = null;
try
{
dom = (Document)getDOM(html);
}
catch(SAXException se)
{
throw new HTMLParseException(se);
}
// Now parse the DOM tree
// TODO - check for <base> tag ??
// look for images
parseNodes(dom, "img", false, "src", uniqueURLs, baseUrl);
// look for applets
// This will only work with an Applet .class file.
// Ideally, this should be upgraded to work with Objects (IE)
// and archives (.jar and .zip) files as well.
parseNodes(dom, "applet", false, "code", uniqueURLs, baseUrl);
// look for input tags with image types
parseNodes(dom, "input", true, "src", uniqueURLs, baseUrl);
// look for background images
parseNodes(dom, "body", false, "background", uniqueURLs, baseUrl);
// look for table background images
parseNodes(dom, "table", false, "background", uniqueURLs, baseUrl);
//TODO look for TD, TR etc images
return uniqueURLs.iterator();
}
/**
* Parse the DOM tree looking for the specified HTML source tags,
* and download the appropriate binary files matching these tags.
*
* @param html the HTML document to parse
* @param htmlTag the HTML tag to parse for
* @param type indicates that we require 'type=image'
* @param srcTag the HTML tag that indicates the source URL
* @param uniques used to ensure that binary files are only downloaded
* once
* @param baseUrl base URL
*
* @param res <code>SampleResult</code> to store sampling results
*/
private static void parseNodes(Document html, String htmlTag, boolean type,
String srcTag, Set uniques, URL baseUrl)
{
log.debug("Start : HTTPSamplerFull parseNodes");
NodeList nodeList = html.getElementsByTagName(htmlTag);
for(int i = 0; i < nodeList.getLength(); i++)
{
Node tempNode = nodeList.item(i);
if(log.isDebugEnabled())
{
log.debug("'" + htmlTag + "' tag: " + tempNode);
}
// get the url of the Binary
NamedNodeMap nnm = tempNode.getAttributes();
Node namedItem = null;
if(type)
{
// if type is set, we need 'type=image'
namedItem = nnm.getNamedItem("type");
if(namedItem == null)
{
log.debug("namedItem 'null' - ignoring");
break;
}
String inputType = namedItem.getNodeValue();
if(log.isDebugEnabled())
{
log.debug("Input type - " + inputType);
}
if(inputType != null && inputType.equalsIgnoreCase("image"))
{
// then we need to download the binary
}
else
{
log.debug("type != 'image' - ignoring");
break;
}
}
namedItem = nnm.getNamedItem(srcTag);
if(namedItem == null)
{
continue;
}
String binUrlStr = namedItem.getNodeValue();
try
{
uniques.add(new URL(baseUrl, binUrlStr));
}
catch(MalformedURLException mfue)
{
// Can't build the URL. May be a site error: return
// the string.
uniques.add(binUrlStr);
}
}
log.debug("End : HTTPSamplerFull parseNodes");
}
/**
* Returns <code>tidy</code> as HTML parser.
*
* @return a <code>tidy</code> HTML parser
*/
private static Tidy getTidyParser()
{
log.debug("Start : getParser");
Tidy tidy = new Tidy();
tidy.setCharEncoding(org.w3c.tidy.Configuration.UTF8);
tidy.setQuiet(true);
tidy.setShowWarnings(false);
if(log.isDebugEnabled())
{
log.debug("getParser : tidy parser created - " + tidy);
}
log.debug("End : getParser");
return tidy;
}
/**
* Returns a node representing a whole xml given an xml document.
*
* @param text an xml document (as a byte array)
* @return a node representing a whole xml
*
* @throws SAXException indicates an error parsing the xml document
*/
private static Node getDOM(byte [] text) throws SAXException
{
log.debug("Start : getDOM");
Node node = getTidyParser().parseDOM(new
ByteArrayInputStream(text), null);
if(log.isDebugEnabled())
{
log.debug("node : " + node);
}
log.debug("End : getDOM");
return node;
}
public static class Test extends TestCase
{
public Test() {
super();
}
public void testParser() throws Exception {
HTMLParserTest.testParser(new JTidyHTMLParser());
}
}
}
1.1 jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser.java
Index: HtmlParserHTMLParser.java
===================================================================
/*
* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2001-2003 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" and
* "Apache JMeter" must not be used to endorse or promote products
* derived from this software without prior written permission. For
* written permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* "Apache JMeter", nor may "Apache" appear in their name, without
* prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*
* @author TBA
* @author <a href="mailto:jsalvata@apache.org">Jordi Salvat i Alabart</a>
* @version $Id: HtmlParserHTMLParser.java,v 1.1 2003/11/25 15:32:38 jsalvata Exp $
*/
package org.apache.jmeter.protocol.http.parser;
import java.io.StringReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.Set;
import junit.framework.TestCase;
import org.apache.jmeter.samplers.SampleResult;
import org.apache.jorphan.logging.LoggingManager;
import org.apache.log.Logger;
import org.htmlparser.*;
import org.htmlparser.util.*;
import org.htmlparser.scanners.*;
import org.htmlparser.tags.*;
/**
* HtmlParser implementation using SourceForge's HtmlParser.
*/
class HtmlParserHTMLParser extends HTMLParser
{
/** Used to store the Logger (used for debug and error messages). */
transient private static Logger log= LoggingManager.getLoggerForClass();
/* (non-Javadoc)
* @see org.apache.jmeter.protocol.http.parser.HtmlParser#getEmbeddedResourceURLs(byte[], java.net.URL)
*/
public Iterator getEmbeddedResourceURLs(byte[] html, URL baseUrl)
throws HTMLParseException
{
Parser htmlParser= null;
try
{
String contents= new String(html);
StringReader reader= new StringReader(contents);
NodeReader nreader= new NodeReader(reader, contents.length());
htmlParser= new Parser(nreader, new DefaultParserFeedback());
addTagListeners(htmlParser);
}
catch (Exception e)
{
throw new HTMLParseException(e);
}
// Now parse the DOM tree
// This is used to ignore duplicated binary files.
Set uniqueURLs= new LinkedHashSet();
// look for applets
// This will only work with an Applet .class file.
// Ideally, this should be upgraded to work with Objects (IE)
// and archives (.jar and .zip) files as well.
boolean uniqueBinary;
SampleResult binRes= null;
try
{
// we start to iterate through the elements
for (NodeIterator e= htmlParser.elements(); e.hasMoreNodes();)
{
uniqueBinary= true;
Node node= e.nextNode();
String binUrlStr= null;
// first we check to see if body tag has a
// background set and we set the NodeIterator
// to the child elements inside the body
if (node instanceof BodyTag)
{
BodyTag body= (BodyTag)node;
binUrlStr= body.getAttribute("background");
// if the body tag exists, we get the elements
// within the body tag. if we don't we won't
// see the body of the page. The only catch
// with this is if there are images after the
// closing body tag, it won't get parsed. If
// someone puts it outside the body tag, it
// is probably a mistake. Plus it's bad to
// have important content after the closing
// body tag. Peter Lin 10-9-03
e= body.elements();
}
else if (node instanceof ImageTag)
{
ImageTag image= (ImageTag)node;
binUrlStr= image.getImageURL();
}
else if (node instanceof AppletTag)
{
AppletTag applet= (AppletTag)node;
binUrlStr= applet.getAppletClass();
}
else if (node instanceof InputTag)
{
InputTag input= (InputTag)node;
// we check the input tag type for image
String strType= input.getAttribute("type");
if (strType != null && strType.equalsIgnoreCase("image"))
{
// then we need to download the binary
binUrlStr= input.getAttribute("src");
}
}
if (binUrlStr == null)
{
continue;
}
try
{
uniqueURLs.add(new URL(baseUrl, binUrlStr));
}
catch (MalformedURLException mfue)
{
// Can't build the URL? May be a site error: return the
// string.
uniqueURLs.add(binUrlStr);
}
}
log.debug("End : NewHTTPSamplerFull parseNodes");
}
catch (ParserException e)
{
}
return uniqueURLs.iterator();
}
/**
* Returns a node representing a whole xml given an xml document.
*
* @param text an xml document
* @return a node representing a whole xml
*
* @throws SAXException indicates an error parsing the xml document
*/
private static void addTagListeners(Parser parser)
{
log.debug("Start : addTagListeners");
// add body tag scanner
parser.addScanner(new BodyScanner());
// add ImageTag scanner
LinkScanner linkScanner= new LinkScanner(LinkTag.LINK_TAG_FILTER);
// parser.addScanner(linkScanner);
parser.addScanner(
linkScanner.createImageScanner(ImageTag.IMAGE_TAG_FILTER));
// add input tag scanner
parser.addScanner(new InputTagScanner());
// add applet tag scanner
parser.addScanner(new AppletScanner());
}
public static class Test extends TestCase
{
public Test()
{
super();
}
public void testParser() throws Exception
{
HTMLParserTest.testParser(new HtmlParserHTMLParser());
}
}
}
1.1 jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/parser/HTMLParser.java
Index: HTMLParser.java
===================================================================
/*
* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2003 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" and
* "Apache JMeter" must not be used to endorse or promote products
* derived from this software without prior written permission. For
* written permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* "Apache JMeter", nor may "Apache" appear in their name, without
* prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*
* @author <a href="mailto:jsalvata@apache.org">Jordi Salvat i Alabart</a>
* @version $Id: HTMLParser.java,v 1.1 2003/11/25 15:32:38 jsalvata Exp $
*/
package org.apache.jmeter.protocol.http.parser;
import java.io.File;
import java.io.FileInputStream;
import java.net.URL;
import java.util.Arrays;
import java.util.Iterator;
import junit.framework.TestCase;
import org.apache.jmeter.util.JMeterUtils;
/**
* HtmlParsers can parse HTML content to obtain URLs.
*/
public abstract class HTMLParser
{
/** Singleton */
static HTMLParser parser;
/**
* Create the single instance.
*/
private static void initialize()
{
String htmlParserClassName=
JMeterUtils.getPropDefault(
"htmlParser.className",
"org.apache.jmeter.protocol.http.parser.HtmlParserHTMLParser");
try
{
parser=
(HTMLParser)Class.forName(htmlParserClassName).newInstance();
}
catch (InstantiationException e)
{
throw new Error(e);
}
catch (IllegalAccessException e)
{
throw new Error(e);
}
catch (ClassNotFoundException e)
{
throw new Error(e);
}
}
/**
* Obtain the (singleton) HtmlParser.
*
* @return The single HtmlParser instance.
*/
public static HTMLParser getParser()
{
if (parser == null)
initialize();
return parser;
}
/**
* Get the URLs for all the resources that a browser would automatically
* download following the download of the HTML content, that is: images,
* stylesheets, javascript files, applets, etc...
* <p>
* URLs should not appear twice in the returned iterator.
* <p>
* Malformed URLs can be reported to the caller by having the Iterator
* return the corresponding RL String. Overall problems parsing the html
* should be reported by throwing an HTMLParseException.
*
* @param html HTML code
* @param url Base URL from which the HTML code was obtained
* @return an Iterator for the resource URLs
*/
public abstract Iterator getEmbeddedResourceURLs(byte[] html, URL baseUrl)
throws HTMLParseException;
public static class HTMLParserTest extends TestCase
{
public HTMLParserTest() {
super();
}
public static void testParser(HTMLParser parser) throws Exception
{
final String[] EXPECTED_RESULT= new String[] {
"http://myhost/mydir/images/image-a.gif",
"http://myhost/mydir/images/image-b.gif",
"http://myhost/mydir/images/image-c.gif",
"http://myhost/mydir/images/image-d.gif",
"http://myhost/mydir/images/image-e.gif",
"http://myhost/mydir/images/image-f.gif",
"http://myhost/mydir/images/image-a2.gif",
"http://myhost/mydir/images/image-b2.gif",
"http://myhost/mydir/images/image-c2.gif",
"http://myhost/mydir/images/image-d2.gif",
"http://myhost/mydir/images/image-e2.gif",
"http://myhost/mydir/images/image-f2.gif",
};
File f= new File("testfiles/HTMLParserTestCase.html");
byte[] buffer= new byte[(int)f.length()];
int len= new FileInputStream(f).read(buffer);
assertEquals(len, buffer.length);
Iterator result=
parser.getEmbeddedResourceURLs(
buffer,
new URL("http://myhost/mydir/myfile.html"));
Iterator expected= Arrays.asList(EXPECTED_RESULT).iterator();
while (expected.hasNext()) {
assertTrue(result.hasNext());
assertEquals(expected.next(), result.next().toString());
}
assertFalse(result.hasNext());
}
public void testDefaultParser() throws Exception {
testParser(getParser());
}
}
}
1.1 jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/parser/HTMLParseException.java
Index: HTMLParseException.java
===================================================================
/*
* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2003 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" and
* "Apache JMeter" must not be used to endorse or promote products
* derived from this software without prior written permission. For
* written permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* "Apache JMeter", nor may "Apache" appear in their name, without
* prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*
* @author <a href="mailto:jsalvata@apache.org">Jordi Salvat i Alabart</a>
* @version $Id: HTMLParseException.java,v 1.1 2003/11/25 15:32:38 jsalvata Exp $
*/
package org.apache.jmeter.protocol.http.parser;
/**
* To change the template for this generated type comment go to
* Window>Preferences>Java>Code Generation>Code and Comments
*/
public class HTMLParseException extends Exception
{
/**
*
*/
public HTMLParseException()
{
super();
}
/**
* @param message
*/
public HTMLParseException(String message)
{
super(message);
}
/**
* @param cause
*/
public HTMLParseException(Throwable cause)
{
super(cause);
}
/**
* @param message
* @param cause
*/
public HTMLParseException(String message, Throwable cause)
{
super(message, cause);
}
}
1.1 jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/parser/RegexpHTMLParser.java
Index: RegexpHTMLParser.java
===================================================================
/*
* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2003 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" and
* "Apache JMeter" must not be used to endorse or promote products
* derived from this software without prior written permission. For
* written permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* "Apache JMeter", nor may "Apache" appear in their name, without
* prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*
* @author <a href="mailto:jsalvata@apache.org">Jordi Salvat i Alabart</a>
* @version $Id: RegexpHTMLParser.java,v 1.1 2003/11/25 15:32:38 jsalvata Exp $
*/
package org.apache.jmeter.protocol.http.parser;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Set;
import java.util.LinkedHashSet;
import java.util.Iterator;
import junit.framework.TestCase;
import org.apache.jorphan.logging.LoggingManager;
import org.apache.log.Logger;
// NOTE: Also looked at using Java 1.4 regexp instead of ORO. The change was
// trivial. Performance did not improve -- at least not significantly.
// Finally decided for ORO following advise from Stefan Bodewig (message
// to jmeter-dev dated 25 Nov 2003 8:52 CET) [Jordi]
import org.apache.oro.text.regex.MatchResult;
import org.apache.oro.text.regex.Pattern;
import org.apache.oro.text.regex.PatternMatcherInput;
import org.apache.oro.text.regex.Perl5Compiler;
import org.apache.oro.text.regex.Perl5Matcher;
import org.apache.oro.text.regex.MalformedPatternException;
/**
* HtmlParser implementation using regular expressions.
* <p>
* This class will find RLs specified in the following ways (where
* <b>url</b> represents the RL being found:
* <ul>
* <li><img src=<b>url</b> ... >
* <li><script src=<b>url</b> ... >
* <li><applet code=<b>url</b> ... >
* <li><input type=image src=<b>url</b> ... >
* <li><body background=<b>url</b> ... >
* <li><table background=<b>url</b> ... >
* <li><td background=<b>url</b> ... >
* <li><tr background=<b>url</b> ... >
* <li><applet ... codebase=<b>url</b> ... >
* <li><embed src=<b>url</b> ... >
* <li><embed codebase=<b>url</b> ... >
* <li><object codebase=<b>url</b> ... >
* </ul>
*
* <p>
* This class will take into account the following construct:
* <ul>
* <li><base href=<b>url</b>>
* </ul>
*
* <p>
* But not the following:
* <ul>
* <li>< ... codebase=<b>url</b> ... >
* </ul>
*
* <p>
* This HtmlParser implementation
*/
class RegexpHTMLParser extends HTMLParser
{
/**
* Regular expression used against the HTML code to find the URIs of
* images, etc.:
*/
private static final String REGEXP=
"<BASE(?=\\s)[^\\>]*\\sHREF\\s*=\\s*\"([^\">]*)\""
+ "|<(?:IMG|SCRIPT)(?=\\s)[^\\>]*\\sSRC\\s*=\\s*\"([^\">]*)\""
+ "|<APPLET(?=\\s)[^\\>]*\\sCODE(?:BASE)?\\s*=\\s*\"([^\">]*)\""
+ "|<(?:EMBED|OBJECT)(?=\\s)[^\\>]*\\s(?:SRC|CODEBASE)\\s*=\\s*\"([^\">]*)\""
+ "|<(?:BODY|TABLE|TR|TD)(?=\\s)[^\\>]*\\sBACKGROUND\\s*=\\s*\"([^\">]*)\""
+ "|<INPUT(?=\\s)(?:[^\\>]*\\s(?:SRC\\s*=\\s*\"([^\">]*)\"|TYPE\\s*=\\s*\"image\")){2,}"
+ "|<LINK(?=\\s)(?:[^\\>]*\\s(?:HREF\\s*=\\s*\"([^\">]*)\"|REL\\s*=\\s*\"stylesheet\")){2,}";
/**
* Compiled regular expression.
*/
static Pattern pattern;
/**
* Thread-local matcher:
*/
private static ThreadLocal localMatcher= new ThreadLocal()
{
protected Object initialValue()
{
return new Perl5Matcher();
}
};
/**
* Thread-local input:
*/
private static ThreadLocal localInput= new ThreadLocal()
{
protected Object initialValue()
{
return new PatternMatcherInput(new char[0]);
}
};
/** Used to store the Logger (used for debug and error messages). */
transient private static Logger log= LoggingManager.getLoggerForClass();
/**
* This is a singleton class:
*/
static {
// Compile the regular expression:
try
{
Perl5Compiler c= new Perl5Compiler();
pattern=
c.compile(
REGEXP,
Perl5Compiler.CASE_INSENSITIVE_MASK
| Perl5Compiler.SINGLELINE_MASK
| Perl5Compiler.READ_ONLY_MASK);
}
catch (MalformedPatternException mpe)
{
log.error(
"Internal error compiling regular expression in ParseRegexp.");
log.error("MalformedPatterException - " + mpe);
throw new Error(mpe);
}
}
/* (non-Javadoc)
* @see org.apache.jmeter.protocol.http.parser.HtmlParser#getEmbeddedResourceURLs(byte[], java.net.URL)
*/
public Iterator getEmbeddedResourceURLs(byte[] html, URL baseUrl)
{
// This is used to ignore duplicated binary files.
// Using a LinkedHashSet to avoid unnecessary overhead in iterating
// the elements in the set later on. As a side-effect, this will keep
// them roughly in order, which should be a better model of browser
// behaviour.
Set uniqueURLs= new LinkedHashSet();
Perl5Matcher matcher= (Perl5Matcher)localMatcher.get();
PatternMatcherInput input= (PatternMatcherInput)localInput.get();
// TODO: find a way to avoid the cost of creating a String here --
// probably a new PatternMatcherInput working on a byte[] would do
// better.
input.setInput(new String(html));
while (matcher.contains(input, pattern))
{
MatchResult match= matcher.getMatch();
String s;
if (log.isDebugEnabled())
log.debug("match groups " + match.groups());
// Check for a BASE HREF:
s= match.group(1);
if (s != null)
{
if (log.isDebugEnabled())
{
log.debug("new baseUrl: " + s + " - " + baseUrl.toString());
}
try
{
baseUrl= new URL(baseUrl, s);
}
catch (MalformedURLException e)
{
// Doesn't even look like a URL?
// Maybe it isn't: Ignore the exception.
if (log.isDebugEnabled())
{
log.debug(
"Can't build base URL from RL "
+ s
+ " in page "
+ baseUrl,
e);
}
}
}
for (int g= 2; g < match.groups(); g++)
{
s= match.group(g);
if (log.isDebugEnabled())
{
log.debug("group " + g + " - " + match.group(g));
}
if (s != null)
{
try
{
uniqueURLs.add(new URL(baseUrl, s));
}
catch (MalformedURLException e)
{
// Doesn't even look like a URL? It may be a site
// error: return the string.
if (log.isDebugEnabled())
{
log.debug(
"Can't build URL from RL "
+ s
+ " in page "
+ baseUrl);
}
uniqueURLs.add(s);
}
}
}
}
return uniqueURLs.iterator();
}
public static class Test extends TestCase
{
public Test() {
super();
}
public void testParser() throws Exception {
HTMLParserTest.testParser(new RegexpHTMLParser());
}
}
}
1.1 jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/parser/HtmlParsingUtils.java
Index: HtmlParsingUtils.java
===================================================================
/*
* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2001 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" and
* "Apache JMeter" must not be used to endorse or promote products
* derived from this software without prior written permission. For
* written permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* "Apache JMeter", nor may "Apache" appear in their name, without
* prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
package org.apache.jmeter.protocol.http.parser;
import java.io.ByteArrayInputStream;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLDecoder;
import java.util.LinkedList;
import java.util.List;
import junit.framework.TestCase;
import org.apache.jmeter.config.Argument;
import org.apache.jmeter.protocol.http.sampler.HTTPSampler;
import org.apache.jmeter.testelement.property.PropertyIterator;
import org.apache.jorphan.logging.LoggingManager;
import org.apache.log.Logger;
import org.apache.oro.text.PatternCacheLRU;
import org.apache.oro.text.regex.MalformedPatternException;
import org.apache.oro.text.regex.Perl5Compiler;
import org.apache.oro.text.regex.Perl5Matcher;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.tidy.Tidy;
import org.xml.sax.SAXException;
/**
* @author Michael Stover
* Created June 14, 2001
* @version $Revision: 1.1 $ Last updated: $Date: 2003/11/25 15:32:38 $
*/
public final class HtmlParsingUtils implements Serializable
{
transient private static Logger log = LoggingManager.getLoggerForClass();
protected static String utfEncodingName;
/* NOTUSED
private int compilerOptions =
Perl5Compiler.CASE_INSENSITIVE_MASK
| Perl5Compiler.MULTILINE_MASK
| Perl5Compiler.READ_ONLY_MASK;
*/
private static PatternCacheLRU patternCache =
new PatternCacheLRU(1000, new Perl5Compiler());
private static ThreadLocal localMatcher = new ThreadLocal()
{
protected Object initialValue()
{
return new Perl5Matcher();
}
};
/**
* Private constructor to prevent instantiation.
*/
private HtmlParsingUtils()
{
}
public static synchronized boolean isAnchorMatched(
HTTPSampler newLink,
HTTPSampler config)
throws MalformedPatternException
{
boolean ok = true;
Perl5Matcher matcher = (Perl5Matcher) localMatcher.get();
PropertyIterator iter = config.getArguments().iterator();
// In JDK1.2, URLDecoder.decode has Exception in its throws clause.
// However, it was removed in JDK1.3. Since JMeter is
// JDK1.2-compatible, we need to catch Exception.
String query = null;
try
{
query = URLDecoder.decode(newLink.getQueryString());// TODO use decode(String,"UTF-8") instead?
}
catch (Exception e)
{
// do nothing. query will remain null.
}
if (query == null && config.getArguments().getArgumentCount() > 0)
{
return false;
}
while (iter.hasNext())
{
Argument item = (Argument) iter.next().getObjectValue();
if (query.indexOf(item.getName() + "=") == -1)
{
if (!(ok =
ok
&& matcher.contains(
query,
patternCache.getPattern(
item.getName(),
Perl5Compiler.READ_ONLY_MASK))))
{
return false;
}
}
}
if (config.getDomain() != null
&& config.getDomain().length() > 0
&& !newLink.getDomain().equals(config.getDomain()))
{
if (!(ok =
ok
&& matcher.matches(
newLink.getDomain(),
patternCache.getPattern(
config.getDomain(),
Perl5Compiler.READ_ONLY_MASK))))
{
return false;
}
}
if (!newLink.getPath().equals(config.getPath())
&& !matcher.matches(
newLink.getPath(),
patternCache.getPattern(
"[/]*" + config.getPath(),
Perl5Compiler.READ_ONLY_MASK)))
{
return false;
}
if (!(ok =
ok
&& matcher.matches(
newLink.getProtocol(),
patternCache.getPattern(
config.getProtocol(),
Perl5Compiler.READ_ONLY_MASK))))
{
return false;
}
return ok;
}
public static synchronized boolean isArgumentMatched(
Argument arg,
Argument patternArg)
throws MalformedPatternException
{
Perl5Matcher matcher = (Perl5Matcher) localMatcher.get();
return (
arg.getName().equals(patternArg.getName())
|| matcher.matches(
arg.getName(),
patternCache.getPattern(
patternArg.getName(),
Perl5Compiler.READ_ONLY_MASK)))
&& (arg.getValue().equals(patternArg.getValue())
|| matcher.matches(
(String) arg.getValue(),
patternCache.getPattern(
(String) patternArg.getValue(),
Perl5Compiler.READ_ONLY_MASK)));
}
/**
* Returns <code>tidy</code> as HTML parser.
*
* @return a <code>tidy</code> HTML parser
*/
public static Tidy getParser()
{
log.debug("Start : getParser1");
Tidy tidy = new Tidy();
tidy.setCharEncoding(org.w3c.tidy.Configuration.UTF8);
tidy.setQuiet(true);
tidy.setShowWarnings(false);
if (log.isDebugEnabled())
{
log.debug("getParser1 : tidy parser created - " + tidy);
}
log.debug("End : getParser1");
return tidy;
}
/**
* Returns a node representing a whole xml given an xml document.
*
* @param text an xml document
* @return a node representing a whole xml
*/
public static Node getDOM(String text) throws SAXException
{
log.debug("Start : getDOM1");
try
{
Node node =
getParser().parseDOM(
new ByteArrayInputStream(
text.getBytes(getUTFEncodingName())),
null);
if (log.isDebugEnabled())
{
log.debug("node : " + node);
}
log.debug("End : getDOM1");
return node;
}
catch (UnsupportedEncodingException e)
{
log.error("getDOM1 : Unsupported encoding exception - " + e);
log.debug("End : getDOM1");
throw new RuntimeException("UTF-8 encoding failed");
}
}
/**
* Returns the encoding type which is different for different jdks even
* though they mean the same thing i.e. UTF8 or UTF-8.
*
* @return either UTF8 or UTF-8 depending on the jdk version
*/
public static String getUTFEncodingName()
{
log.debug("Start : getUTFEncodingName1");
if (utfEncodingName == null)
{
String versionNum = System.getProperty("java.version");
if (log.isDebugEnabled())
{
log.debug("getUTFEncodingName1 : versionNum - " + versionNum);
}
if (versionNum.startsWith("1.1"))
{
utfEncodingName = "UTF8";
}
else
{
utfEncodingName = "UTF-8";
}
}
if (log.isDebugEnabled())
{
log.debug(
"getUTFEncodingName1 : Returning utfEncodingName - "
+ utfEncodingName);
}
log.debug("End : getUTFEncodingName1");
return utfEncodingName;
}
public static Document createEmptyDoc()
{
return Tidy.createEmptyDocument();
}
/**
* Create a new URL based on an HREF string plus a contextual URL object.
* Given that an HREF string might be of three possible forms, some
* processing is required.
*/
public static HTTPSampler createUrlFromAnchor(
String parsedUrlString,
HTTPSampler context)
throws MalformedURLException
{
HTTPSampler url = new HTTPSampler();
url.setDomain(context.getDomain());
url.setProtocol(context.getProtocol());
url.setPort(context.getPort());
// In JDK1.3, we can get the path using getPath(). However, in JDK1.2,
// we have to parse the file to obtain the path. In the source for
// JDK1.3.1, they determine the path to be from the start of the file
// up to the LAST question mark (if any).
String contextPath = null;
String contextFile = context.getPath();
int indexContextQuery = contextFile.lastIndexOf('?');
if (indexContextQuery != -1)
{
contextPath = contextFile.substring(0, indexContextQuery);
}
else
{
contextPath = contextFile;
}
int queryStarts = parsedUrlString.indexOf("?");
if (queryStarts == -1)
{
queryStarts = parsedUrlString.length();
}
if (parsedUrlString.startsWith("/"))
{
url.setPath(parsedUrlString.substring(0, queryStarts));
}
else if (parsedUrlString.startsWith(".."))
{
url.setPath(
contextPath.substring(
0,
contextPath.substring(
0,
contextPath.lastIndexOf("/")).lastIndexOf(
"/"))
+ parsedUrlString.substring(2, queryStarts));
}
else if (!parsedUrlString.toLowerCase().startsWith("http"))
{
url.setPath(
contextPath.substring(0, contextPath.lastIndexOf("/"))
+ "/"
+ parsedUrlString.substring(0, queryStarts));
}
else
{
URL u = new URL(parsedUrlString);
// Determine the path. (See JDK1.2/1.3 comment above.)
String uPath = null;
String uFile = u.getFile();
int indexUQuery = uFile.lastIndexOf('?');
if (indexUQuery != -1)
{
uPath = uFile.substring(0, indexUQuery);
}
else
{
uPath = uFile;
}
url.setPath(uPath);
url.setDomain(u.getHost());
url.setProtocol(u.getProtocol());
url.setPort(u.getPort());
}
if (queryStarts < parsedUrlString.length())
{
url.parseArguments(parsedUrlString.substring(queryStarts + 1));
}
return url;
}
public static List createURLFromForm(Node doc, HTTPSampler context)
{
String selectName = null;
LinkedList urlConfigs = new LinkedList();
recurseForm(doc, urlConfigs, context, selectName, false);
/*
* NamedNodeMap atts = formNode.getAttributes();
* if(atts.getNamedItem("action") == null)
* {
* throw new MalformedURLException();
* }
* String action = atts.getNamedItem("action").getNodeValue();
* UrlConfig url = createUrlFromAnchor(action, context);
* recurseForm(doc, url, selectName,true,formStart);
*/
return urlConfigs;
}
private static boolean recurseForm(
Node tempNode,
LinkedList urlConfigs,
HTTPSampler context,
String selectName,
boolean inForm)
{
NamedNodeMap nodeAtts = tempNode.getAttributes();
String tag = tempNode.getNodeName();
try
{
if (inForm)
{
HTTPSampler url = (HTTPSampler) urlConfigs.getLast();
if (tag.equalsIgnoreCase("form"))
{
try
{
urlConfigs.add(createFormUrlConfig(tempNode, context));
}
catch (MalformedURLException e)
{
inForm = false;
}
}
else if (tag.equalsIgnoreCase("input"))
{
url.addArgument(
getAttributeValue(nodeAtts, "name"),
getAttributeValue(nodeAtts, "value"));
}
else if (tag.equalsIgnoreCase("textarea"))
{
try
{
url.addArgument(
getAttributeValue(nodeAtts, "name"),
tempNode.getFirstChild().getNodeValue());
}
catch (NullPointerException e)
{
url.addArgument(
getAttributeValue(nodeAtts, "name"),
"");
}
}
else if (tag.equalsIgnoreCase("select"))
{
selectName = getAttributeValue(nodeAtts, "name");
}
else if (tag.equalsIgnoreCase("option"))
{
String value = getAttributeValue(nodeAtts, "value");
if (value == null)
{
try
{
value = tempNode.getFirstChild().getNodeValue();
}
catch (NullPointerException e)
{
value = "";
}
}
url.addArgument(selectName, value);
}
}
else if (tag.equalsIgnoreCase("form"))
{
try
{
urlConfigs.add(createFormUrlConfig(tempNode, context));
inForm = true;
}
catch (MalformedURLException e)
{
inForm = false;
}
try
{
Thread.sleep(5000);
}
catch (Exception e)
{
}
}
}
catch (Exception ex)
{
log.warn("Some bad HTML " + printNode(tempNode), ex);
}
NodeList childNodes = tempNode.getChildNodes();
for (int x = 0; x < childNodes.getLength(); x++)
{
inForm =
recurseForm(
childNodes.item(x),
urlConfigs,
context,
selectName,
inForm);
}
return inForm;
}
private static String getAttributeValue(NamedNodeMap att, String attName)
{
try
{
return att.getNamedItem(attName).getNodeValue();
}
catch (Exception ex)
{
return "";
}
}
private static String printNode(Node node)
{
StringBuffer buf = new StringBuffer();
buf.append("<");
buf.append(node.getNodeName());
NamedNodeMap atts = node.getAttributes();
for (int x = 0; x < atts.getLength(); x++)
{
buf.append(" ");
buf.append(atts.item(x).getNodeName());
buf.append("=\"");
buf.append(atts.item(x).getNodeValue());
buf.append("\"");
}
buf.append(">");
return buf.toString();
}
/**
* @version $Revision: 1.1 $
*/
public static class Test extends TestCase
{
transient private static Logger log = LoggingManager.getLoggerForClass();
public Test(String name)
{
super(name);
}
public void testGetUTFEncodingName()
{
log.debug("Start : testGetUTFEncodingName1");
String javaVersion = System.getProperty("java.version");
utfEncodingName = null;
System.setProperty("java.version", "1.1");
assertEquals("UTF8", HtmlParsingUtils.getUTFEncodingName());
// need to clear utfEncodingName variable first 'cos
// getUTFEncodingName checks to see if it's null
utfEncodingName = null;
System.setProperty("java.version", "1.2");
assertEquals("UTF-8", HtmlParsingUtils.getUTFEncodingName());
System.setProperty("java.version", javaVersion);
log.debug("End : testGetUTFEncodingName1");
}
protected void setUp()
{
}
}
private static HTTPSampler createFormUrlConfig(
Node tempNode,
HTTPSampler context)
throws MalformedURLException
{
NamedNodeMap atts = tempNode.getAttributes();
if (atts.getNamedItem("action") == null)
{
throw new MalformedURLException();
}
String action = atts.getNamedItem("action").getNodeValue();
HTTPSampler url = createUrlFromAnchor(action, context);
return url;
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: jmeter-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: jmeter-dev-help@jakarta.apache.org