You are viewing a plain text version of this content. The canonical link for it is here.
Posted to jmeter-dev@jakarta.apache.org by se...@apache.org on 2006/05/25 03:08:07 UTC
svn commit: r409279 - in /jakarta/jmeter/branches/rel-2-1:
bin/jmeter.properties build.xml
src/htmlparser16/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser16.java
xdocs/changes.xml
Author: sebb
Date: Wed May 24 18:08:06 2006
New Revision: 409279
URL: http://svn.apache.org/viewvc?rev=409279&view=rev
Log:
Add support for using htmlparser version 1.6
Tidy build.xml (remove optional BSF stuff)
Added:
jakarta/jmeter/branches/rel-2-1/src/htmlparser16/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser16.java (with props)
Modified:
jakarta/jmeter/branches/rel-2-1/bin/jmeter.properties
jakarta/jmeter/branches/rel-2-1/build.xml
jakarta/jmeter/branches/rel-2-1/xdocs/changes.xml
Modified: jakarta/jmeter/branches/rel-2-1/bin/jmeter.properties
URL: http://svn.apache.org/viewvc/jakarta/jmeter/branches/rel-2-1/bin/jmeter.properties?rev=409279&r1=409278&r2=409279&view=diff
==============================================================================
--- jakarta/jmeter/branches/rel-2-1/bin/jmeter.properties (original)
+++ jakarta/jmeter/branches/rel-2-1/bin/jmeter.properties Wed May 24 18:08:06 2006
@@ -318,6 +318,9 @@
# Other parsers:
#htmlParser.className=org.apache.jmeter.protocol.http.parser.JTidyHTMLParser
#htmlParser.className=org.apache.jmeter.protocol.http.parser.RegexpHTMLParser
+#
+# Version 1.6 of htmlparser can be downloaded and used instead of the default htmlparser.jar
+#htmlParser.className=org.apache.jmeter.protocol.http.parser.HtmlParserHTMLParser16
#---------------------------------------------------------------------------
# Remote batching configuration
Modified: jakarta/jmeter/branches/rel-2-1/build.xml
URL: http://svn.apache.org/viewvc/jakarta/jmeter/branches/rel-2-1/build.xml?rev=409279&r1=409278&r2=409279&view=diff
==============================================================================
--- jakarta/jmeter/branches/rel-2-1/build.xml (original)
+++ jakarta/jmeter/branches/rel-2-1/build.xml Wed May 24 18:08:06 2006
@@ -193,6 +193,7 @@
<property name="src.jorphan" value="src/jorphan"/>
<property name="src.ldap" value="src/protocol/ldap"/>
<property name="src.htmlparser" value="src/htmlparser"/>
+ <property name="src.htmlparser16" value="src/htmlparser16"/>
<property name="src.tcp" value="src/protocol/tcp"/>
<property name="src.examples" value="src/examples"/>
<property name="src.mail" value="src/protocol/mail"/>
@@ -219,6 +220,7 @@
<pathelement location="${src.jorphan}"/>
<pathelement location="${src.ldap}"/>
<pathelement location="${src.htmlparser}"/>
+ <pathelement location="${src.htmlparser16}"/>
<pathelement location="${src.tcp}"/>
<pathelement location="${src.examples}"/>
<pathelement location="${src.mail}"/>
@@ -242,6 +244,7 @@
<property name="build.ldap" location="build/protocol/ldap"/>
<property name="build.mail" location="build/protocol/mail"/>
<property name="build.htmlparser" location="build/htmlparser"/>
+ <property name="build.htmlparser16" location="build/htmlparser16"/>
<property name="build.tcp" location="build/protocol/tcp"/>
<property name="build.examples" location="build/examples"/>
<property name="build.monitor.components" location="build/monitor/components"/>
@@ -392,9 +395,10 @@
-->
<!-- Build classpath (includes the optional jar directory) -->
+ <!-- Exclude the htmlparser jars to prevent clashes between versions -->
<path id="classpath">
- <fileset dir="${lib.dir}" includes="*.jar"/>
- <fileset dir="${lib.opt}" includes="*.jar"/>
+ <fileset dir="${lib.dir}" includes="*.jar" excludes="htmlparser*.jar"/>
+ <fileset dir="${lib.opt}" includes="*.jar" excludes="htmlparser*.jar"/>
</path>
<!-- Anakia classpath -->
@@ -446,8 +450,10 @@
<available classname="bsh.Interpreter" property="beanshell.present">
<classpath refid="classpath"/>
</available>
- <available classname="org.apache.bsf.BSFManager" property="bsf.present">
- <classpath refid="classpath"/>
+ <available classname="org.htmlparser.Tag" property="htmlparser16.present">
+ <classpath>
+ <fileset dir="${lib.opt}" includes="htmlparser1_6.jar"/>
+ </classpath>
</available>
<available classname="javax.jms.Message" property="jms.present">
<classpath refid="classpath"/>
@@ -469,8 +475,8 @@
<echo message="Classes for BeanShell support not found in classpath"/>
</target>
- <target name="bsf-message" depends="check-libs" unless="bsf.present">
- <echo message="Classes for BSF support not found in classpath"/>
+ <target name="htmlparser16-message" depends="check-libs" unless="htmlparser16.present">
+ <echo message="Classes for HTMLParser 1.6 support not found in classpath"/>
</target>
<target name="jms-message" depends="check-libs" unless="jms.present">
@@ -478,7 +484,7 @@
</target>
<target name="report-missing-libs"
- depends="ssl-message,mail-message,beanshell-message,bsf-message,jms-message"
+ depends="ssl-message,mail-message,beanshell-message,htmlparser16-message,jms-message"
/>
<!--
@@ -556,6 +562,21 @@
</classpath>
</javac>
</target>
+
+ <target name="compile-htmlparser16" depends="compile-http,compile-htmlparser" description="Compile htmlparser 1.6 support" if="htmlparser16.present">
+ <mkdir dir="${build.htmlparser16}"/>
+ <javac srcdir="${src.htmlparser16}" destdir="${build.htmlparser16}" optimize="${optimize}" debug="on" source="${src.java.version}" target="${target.java.version}" deprecation="${deprecation}" encoding="${encoding}">
+ <include name="**/*.java"/>
+ <classpath>
+ <pathelement location="${build.jorphan}"/>
+ <pathelement location="${build.core}"/>
+ <pathelement location="${build.http}"/>
+ <fileset dir="${lib.opt}" includes="htmlparser1_6.jar"/>
+ <pathelement location="${}"/>
+ <path refid="classpath"/>
+ </classpath>
+ </javac>
+ </target>
<target name="compile-tests" description="Compile test components only">
<mkdir dir="${build.test}"/>
@@ -564,7 +585,6 @@
<pathelement location="${build.jorphan}"/>
<pathelement location="${build.core}"/>
<pathelement location="${build.components}"/>
- <pathelement location="${build.htmlparser}"/>
<pathelement location="${build.http}"/>
<pathelement location="${build.ftp}"/>
<pathelement location="${build.functions}"/>
@@ -639,7 +659,6 @@
<javac srcdir="${src.java}" destdir="${build.java}" source="${src.java.version}" optimize="${optimize}" debug="on" target="${target.java.version}" deprecation="${deprecation}" encoding="${encoding}">
<include name="**/*.java"/>
<exclude name="org/apache/jmeter/protocol/java/**/BeanShell*.java" unless="beanshell.present"/>
- <exclude name="org/apache/jmeter/protocol/java/**/BSF*.java" unless="bsf.present"/>
<classpath>
<pathelement location="${build.jorphan}"/>
<pathelement location="${build.core}"/>
@@ -686,7 +705,7 @@
</javac>
</target>
- <target name="compile-protocols" depends="compile-http,compile-ftp,compile-jdbc,compile-java,compile-ldap,compile-mail,compile-tcp" description="Compile all protocol-specific components."/>
+ <target name="compile-protocols" depends="compile-http,compile-htmlparser16,compile-ftp,compile-jdbc,compile-java,compile-ldap,compile-mail,compile-tcp" description="Compile all protocol-specific components."/>
<target name="compile-examples" depends="compile-jorphan,compile-core" description="Compile example components.">
<mkdir dir="${build.examples}"/>
@@ -901,6 +920,18 @@
</manifest>
</jar>
+ <!-- HtmlParserHTMLParser16 -->
+ <jar jarfile="${lib.dir}/htmlparserparser16.jar" manifest="MANIFEST">
+ <fileset dir="${build.htmlparser16}" includes="**/HtmlParserHTMLParser16.class"/>
+ <metainf dir="." includes="LICENSE,NOTICE"/>
+ <manifest>
+ <attribute name="Built-By" value="${user.name}"/>
+ <attribute name="Implementation-Version" value="${jmeter.version}"/>
+ <attribute name="X-Compile-Source-JDK" value="${src.java.version}"/>
+ <attribute name="X-Compile-Target-JDK" value="${target.java.version}"/>
+ </manifest>
+ </jar>
+
<!-- ftp -->
<jar jarfile="${dest.jar}/ApacheJMeter_ftp.jar" manifest="MANIFEST">
<fileset dir="${build.ftp}" includes="**/*.class" />
@@ -1106,7 +1137,7 @@
<property name="jsse.present" value="assume"/>
<property name="javamail.complete" value="assume"/>
<property name="beanshell.present" value="assume"/>
- <property name="bsf.present" value="assume"/>
+ <property name="htmlparser16.present" value="assume"/>
<property name="jms.present" value="assume"/>
</target>
@@ -1135,6 +1166,7 @@
<include name="${lib.dir}/jorphan.jar"/>
<include name="${lib.dir}/htmlparser.jar"/>
<include name="${lib.dir}/htmlparserparser.jar"/>
+ <include name="${lib.dir}/htmlparserparser16.jar"/>
<include name="${lib.dir}/junit/test.jar"/>
</patternset>
@@ -1462,6 +1494,8 @@
<delete file="${dest.jar.jmeter}/ApacheJMeter.jar"/>
<delete quiet="true" file="${lib.dir}/jorphan.jar"/>
<delete quiet="true" file="${lib.dir}/htmlparser.jar"/>
+ <delete quiet="true" file="${lib.dir}/htmlparserparser.jar"/>
+ <delete quiet="true" file="${lib.dir}/htmlparserparser16.jar"/>
<delete quiet="true">
<fileset dir="${dest.jar}"/>
</delete>
@@ -1607,7 +1641,9 @@
<java classname="org.apache.jorphan.test.AllTests" fork="yes" dir="${basedir}/bin">
<classpath>
<fileset dir="${dest.jar}" includes="*.jar"/>
- <pathelement location="${build.test}"/>
+ <pathelement location="${build.test}"/>
+ <fileset dir="${lib.dir}" includes="htmlparser.jar"/>
+ <fileset dir="${lib.dir}" includes="htmlparserparser.jar"/>
<path refid="classpath"/>
</classpath>
<sysproperty key="java.awt.headless" value="${test.headless}"/>
@@ -1669,7 +1705,7 @@
-->
<java classname="org.htmlparser.tests.AllTests" fork="yes" failonerror="yes">
<classpath>
- <pathelement location="${build.htmlparser}"/>
+ <pathelement location="${build.htmlparser}"/>
<pathelement location="${lib.dir}/htmlparser.jar"/>
<pathelement location="${junit.jar}"/>
<pathelement location="${log-kit.jar}"/>
@@ -1677,25 +1713,5 @@
<arg value="-text"/>
</java>
</target>
-
-
- <target name="get-beanshell">
- <get src="http://www.beanshell.org/bsh-1.3.0.jar"
- dest="lib/bsh-1.3.0.jar"
- verbose="true" usetimestamp="true"/>
- </target>
-
- <target name="get-bsf">
- <get src="http://cvs.apache.org/dist/jakarta/bsf/v2.3.0rc1/bin/bsf-bin-2.3.0.zip"
- dest="bsf-bin-2.3.0.zip"
- verbose="true" usetimestamp="true"/>
- <!--
- Does not appear to be possible to junk the path names in unzip,
- so use a temporary directory and delete it.
- -->
- <unzip src="bsf-bin-2.3.0.zip" dest="tmp"/>
- <move file="tmp/bsf-2.3.0/lib/bsf.jar" todir="lib"></move>
- <delete dir="tmp"></delete>
- </target>
</project>
Added: jakarta/jmeter/branches/rel-2-1/src/htmlparser16/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser16.java
URL: http://svn.apache.org/viewvc/jakarta/jmeter/branches/rel-2-1/src/htmlparser16/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser16.java?rev=409279&view=auto
==============================================================================
--- jakarta/jmeter/branches/rel-2-1/src/htmlparser16/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser16.java (added)
+++ jakarta/jmeter/branches/rel-2-1/src/htmlparser16/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser16.java Wed May 24 18:08:06 2006
@@ -0,0 +1,182 @@
+/*
+ * Copyright 2006 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.jmeter.protocol.http.parser;
+
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.Iterator;
+
+import org.apache.jorphan.logging.LoggingManager;
+import org.apache.log.Logger;
+import org.htmlparser.Node;
+import org.htmlparser.Parser;
+import org.htmlparser.Tag;
+import org.htmlparser.tags.AppletTag;
+import org.htmlparser.tags.BaseHrefTag;
+import org.htmlparser.tags.BodyTag;
+import org.htmlparser.tags.CompositeTag;
+import org.htmlparser.tags.FrameTag;
+import org.htmlparser.tags.ImageTag;
+import org.htmlparser.tags.InputTag;
+import org.htmlparser.tags.LinkTag;
+import org.htmlparser.tags.ScriptTag;
+import org.htmlparser.util.NodeIterator;
+import org.htmlparser.util.ParserException;
+
+/**
+ * HtmlParser implementation using SourceForge's HtmlParser.
+ *
+ */
+class HtmlParserHTMLParser16 extends HTMLParser {
+ private static final Logger log = LoggingManager.getLoggerForClass();
+
+ protected HtmlParserHTMLParser16() {
+ super();
+ log.info("Using htmlparser version 1.6");
+ }
+
+ protected boolean isReusable() {
+ return true;
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.jmeter.protocol.http.parser.HtmlParser#getEmbeddedResourceURLs(byte[],
+ * java.net.URL)
+ */
+ public Iterator getEmbeddedResourceURLs(byte[] html, URL baseUrl, URLCollection urls) throws HTMLParseException {
+ if (log.isDebugEnabled()) log.debug("Parsing html of: " + baseUrl);
+
+ Parser htmlParser = null;
+
+ try {
+ String contents = new String(html);
+ htmlParser = new Parser();
+ htmlParser.setInputHTML(contents);
+ } catch (Exception e) {
+ throw new HTMLParseException(e);
+ }
+
+ // Now parse the DOM tree
+ try {
+ // we start to iterate through the elements
+ parseNodes(htmlParser.elements(), new URLPointer(baseUrl), urls);
+ log.debug("End : parseNodes");
+ } catch (ParserException e) {
+ throw new HTMLParseException(e);
+ }
+
+ return urls.iterator();
+ }
+
+ /*
+ * A dummy class to pass the pointer of URL.
+ */
+ private static class URLPointer {
+ private URLPointer(URL newUrl) {
+ url = newUrl;
+ }
+ private URL url;
+ }
+
+ /**
+ * Recursively parse all nodes to pick up all URL s.
+ * @see e the nodes to be parsed
+ * @see baseUrl Base URL from which the HTML code was obtained
+ * @see urls URLCollection
+ */
+ private void parseNodes(final NodeIterator e,
+ final URLPointer baseUrl, final URLCollection urls)
+ throws HTMLParseException, ParserException {
+ while(e.hasMoreNodes()) {
+ Node node = e.nextNode();
+ // a url is always in a Tag.
+ if (!(node instanceof Tag)) {
+ continue;
+ }
+ Tag tag = (Tag) node;
+ String tagname=tag.getTagName();
+ String binUrlStr = null;
+
+ // first we check to see if body tag has a
+ // background set
+ if (tag instanceof BodyTag) {
+ binUrlStr = tag.getAttribute(ATT_BACKGROUND);
+ } else if (tag instanceof BaseHrefTag) {
+ BaseHrefTag baseHref = (BaseHrefTag) tag;
+ String baseref = baseHref.getBaseUrl().toString();
+ try {
+ if (!baseref.equals(""))// Bugzilla 30713
+ {
+ baseUrl.url = new URL(baseUrl.url, baseHref.getBaseUrl());
+ }
+ } catch (MalformedURLException e1) {
+ throw new HTMLParseException(e1);
+ }
+ } else if (tag instanceof ImageTag) {
+ ImageTag image = (ImageTag) tag;
+ binUrlStr = image.getImageURL();
+ } else if (tag instanceof AppletTag) {
+ // look for applets
+
+ // This will only work with an Applet .class file.
+ // Ideally, this should be upgraded to work with Objects (IE)
+ // and archives (.jar and .zip) files as well.
+ AppletTag applet = (AppletTag) tag;
+ binUrlStr = applet.getAppletClass();
+ } else if (tag instanceof InputTag) {
+ // we check the input tag type for image
+ if (ATT_IS_IMAGE.equalsIgnoreCase(tag.getAttribute(ATT_TYPE))) {
+ // then we need to download the binary
+ binUrlStr = tag.getAttribute(ATT_SRC);
+ }
+ } else if (tag instanceof LinkTag) {
+ LinkTag link = (LinkTag) tag;
+ if (link.getChild(0) instanceof ImageTag) {
+ ImageTag img = (ImageTag) link.getChild(0);
+ binUrlStr = img.getImageURL();
+ }
+ } else if (tag instanceof ScriptTag) {
+ binUrlStr = tag.getAttribute(ATT_SRC);
+ } else if (tag instanceof FrameTag) {
+ binUrlStr = tag.getAttribute(ATT_SRC);
+ } else if (tagname.equalsIgnoreCase(TAG_EMBED)
+ || tagname.equalsIgnoreCase(TAG_BGSOUND)){
+ binUrlStr = tag.getAttribute(ATT_SRC);
+ } else if (tagname.equalsIgnoreCase(TAG_LINK)) {
+ // Putting the string first means it works even if the attribute is null
+ if (STYLESHEET.equalsIgnoreCase(tag.getAttribute(ATT_REL))) {
+ binUrlStr = tag.getAttribute(ATT_HREF);
+ }
+ } else {
+ binUrlStr = tag.getAttribute(ATT_BACKGROUND);
+ }
+
+ if (binUrlStr != null) {
+ urls.addURL(binUrlStr, baseUrl.url);
+ }
+ // second, if the tag was a composite tag,
+ // recursively parse its children.
+ if (tag instanceof CompositeTag) {
+ CompositeTag composite = (CompositeTag) tag;
+ parseNodes(composite.elements(), baseUrl, urls);
+ }
+ }
+ }
+}
Propchange: jakarta/jmeter/branches/rel-2-1/src/htmlparser16/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser16.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: jakarta/jmeter/branches/rel-2-1/src/htmlparser16/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser16.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision
Propchange: jakarta/jmeter/branches/rel-2-1/src/htmlparser16/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser16.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified: jakarta/jmeter/branches/rel-2-1/xdocs/changes.xml
URL: http://svn.apache.org/viewvc/jakarta/jmeter/branches/rel-2-1/xdocs/changes.xml?rev=409279&r1=409278&r2=409279&view=diff
==============================================================================
--- jakarta/jmeter/branches/rel-2-1/xdocs/changes.xml (original)
+++ jakarta/jmeter/branches/rel-2-1/xdocs/changes.xml Wed May 24 18:08:06 2006
@@ -104,6 +104,7 @@
<li>Bug 37652 - support for Ajp Tomcat protocol</li>
<li>Bug 39626 - Loading SOAP/XML-RPC requests from file</li>
<li>Bug 39652 - Allow truncation of labels on AxisGraph</li>
+<li>Allow use of htmlparser 1.6</li>
</ul>
<h4>Bug fixes:</h4>
---------------------------------------------------------------------
To unsubscribe, e-mail: jmeter-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: jmeter-dev-help@jakarta.apache.org