You are viewing a plain text version of this content. The canonical link for it is here.
Posted to jmeter-dev@jakarta.apache.org by se...@apache.org on 2006/05/25 03:08:07 UTC

svn commit: r409279 - in /jakarta/jmeter/branches/rel-2-1: bin/jmeter.properties build.xml src/htmlparser16/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser16.java xdocs/changes.xml

Author: sebb
Date: Wed May 24 18:08:06 2006
New Revision: 409279

URL: http://svn.apache.org/viewvc?rev=409279&view=rev
Log:
Add support for using htmlparser version 1.6
Tidy build.xml (remove optional BSF stuff)

Added:
    jakarta/jmeter/branches/rel-2-1/src/htmlparser16/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser16.java   (with props)
Modified:
    jakarta/jmeter/branches/rel-2-1/bin/jmeter.properties
    jakarta/jmeter/branches/rel-2-1/build.xml
    jakarta/jmeter/branches/rel-2-1/xdocs/changes.xml

Modified: jakarta/jmeter/branches/rel-2-1/bin/jmeter.properties
URL: http://svn.apache.org/viewvc/jakarta/jmeter/branches/rel-2-1/bin/jmeter.properties?rev=409279&r1=409278&r2=409279&view=diff
==============================================================================
--- jakarta/jmeter/branches/rel-2-1/bin/jmeter.properties (original)
+++ jakarta/jmeter/branches/rel-2-1/bin/jmeter.properties Wed May 24 18:08:06 2006
@@ -318,6 +318,9 @@
 # Other parsers:
 #htmlParser.className=org.apache.jmeter.protocol.http.parser.JTidyHTMLParser
 #htmlParser.className=org.apache.jmeter.protocol.http.parser.RegexpHTMLParser
+#
+# Version 1.6 of htmlparser can be downloaded and used instead of the default htmlparser.jar
+#htmlParser.className=org.apache.jmeter.protocol.http.parser.HtmlParserHTMLParser16
 
 #---------------------------------------------------------------------------
 # Remote batching configuration

Modified: jakarta/jmeter/branches/rel-2-1/build.xml
URL: http://svn.apache.org/viewvc/jakarta/jmeter/branches/rel-2-1/build.xml?rev=409279&r1=409278&r2=409279&view=diff
==============================================================================
--- jakarta/jmeter/branches/rel-2-1/build.xml (original)
+++ jakarta/jmeter/branches/rel-2-1/build.xml Wed May 24 18:08:06 2006
@@ -193,6 +193,7 @@
   <property name="src.jorphan" value="src/jorphan"/>
   <property name="src.ldap" value="src/protocol/ldap"/>
   <property name="src.htmlparser" value="src/htmlparser"/>
+  <property name="src.htmlparser16" value="src/htmlparser16"/>
   <property name="src.tcp" value="src/protocol/tcp"/>
   <property name="src.examples" value="src/examples"/>
   <property name="src.mail" value="src/protocol/mail"/>
@@ -219,6 +220,7 @@
     <pathelement location="${src.jorphan}"/>
     <pathelement location="${src.ldap}"/>
     <pathelement location="${src.htmlparser}"/>
+    <pathelement location="${src.htmlparser16}"/>
     <pathelement location="${src.tcp}"/>
     <pathelement location="${src.examples}"/>
     <pathelement location="${src.mail}"/>
@@ -242,6 +244,7 @@
   <property name="build.ldap" location="build/protocol/ldap"/>
   <property name="build.mail" location="build/protocol/mail"/>
   <property name="build.htmlparser" location="build/htmlparser"/>
+  <property name="build.htmlparser16" location="build/htmlparser16"/>
   <property name="build.tcp" location="build/protocol/tcp"/>
   <property name="build.examples" location="build/examples"/>
   <property name="build.monitor.components" location="build/monitor/components"/>
@@ -392,9 +395,10 @@
   -->
   
   <!-- Build classpath (includes the optional jar directory) -->
+  <!-- Exclude the htmlparser jars to prevent clashes between versions -->
   <path id="classpath">
-    <fileset dir="${lib.dir}" includes="*.jar"/>
-    <fileset dir="${lib.opt}" includes="*.jar"/>
+    <fileset dir="${lib.dir}" includes="*.jar" excludes="htmlparser*.jar"/>
+    <fileset dir="${lib.opt}" includes="*.jar" excludes="htmlparser*.jar"/>
   </path>
 
   <!-- Anakia classpath -->
@@ -446,8 +450,10 @@
     <available classname="bsh.Interpreter" property="beanshell.present">
       <classpath refid="classpath"/>
     </available>
-    <available classname="org.apache.bsf.BSFManager" property="bsf.present">
-      <classpath refid="classpath"/>
+    <available classname="org.htmlparser.Tag" property="htmlparser16.present">
+      <classpath>
+    	<fileset dir="${lib.opt}" includes="htmlparser1_6.jar"/>
+      </classpath>
     </available>
     <available classname="javax.jms.Message" property="jms.present">
       <classpath refid="classpath"/>
@@ -469,8 +475,8 @@
     <echo message="Classes for BeanShell support not found in classpath"/>
   </target>
 
-  <target name="bsf-message" depends="check-libs" unless="bsf.present">
-    <echo message="Classes for BSF support not found in classpath"/>
+  <target name="htmlparser16-message" depends="check-libs" unless="htmlparser16.present">
+    <echo message="Classes for HTMLParser 1.6 support not found in classpath"/>
   </target>
 
   <target name="jms-message" depends="check-libs" unless="jms.present">
@@ -478,7 +484,7 @@
   </target>
 
   <target name="report-missing-libs" 
-      depends="ssl-message,mail-message,beanshell-message,bsf-message,jms-message"
+      depends="ssl-message,mail-message,beanshell-message,htmlparser16-message,jms-message"
   />
 
   <!--
@@ -556,6 +562,21 @@
       </classpath>
     </javac>
   </target>
+
+  <target name="compile-htmlparser16" depends="compile-http,compile-htmlparser" description="Compile htmlparser 1.6 support" if="htmlparser16.present">
+    <mkdir dir="${build.htmlparser16}"/>
+    <javac srcdir="${src.htmlparser16}" destdir="${build.htmlparser16}" optimize="${optimize}" debug="on" source="${src.java.version}" target="${target.java.version}" deprecation="${deprecation}" encoding="${encoding}">
+      <include name="**/*.java"/>
+      <classpath>
+        <pathelement location="${build.jorphan}"/>
+        <pathelement location="${build.core}"/>
+        <pathelement location="${build.http}"/>
+      	<fileset dir="${lib.opt}" includes="htmlparser1_6.jar"/>
+        <pathelement location="${}"/>
+        <path refid="classpath"/>
+      </classpath>
+    </javac>
+  </target>
   
   <target name="compile-tests" description="Compile test components only">
     <mkdir dir="${build.test}"/>
@@ -564,7 +585,6 @@
         <pathelement location="${build.jorphan}"/>
         <pathelement location="${build.core}"/>
         <pathelement location="${build.components}"/>
-        <pathelement location="${build.htmlparser}"/>
         <pathelement location="${build.http}"/>
         <pathelement location="${build.ftp}"/>
         <pathelement location="${build.functions}"/>
@@ -639,7 +659,6 @@
     <javac srcdir="${src.java}" destdir="${build.java}" source="${src.java.version}" optimize="${optimize}" debug="on" target="${target.java.version}" deprecation="${deprecation}" encoding="${encoding}">
       <include name="**/*.java"/>
       <exclude name="org/apache/jmeter/protocol/java/**/BeanShell*.java" unless="beanshell.present"/>
-      <exclude name="org/apache/jmeter/protocol/java/**/BSF*.java" unless="bsf.present"/>
       <classpath>
         <pathelement location="${build.jorphan}"/>
         <pathelement location="${build.core}"/>
@@ -686,7 +705,7 @@
     </javac>
   </target>
 
-  <target name="compile-protocols" depends="compile-http,compile-ftp,compile-jdbc,compile-java,compile-ldap,compile-mail,compile-tcp" description="Compile all protocol-specific components."/>
+  <target name="compile-protocols" depends="compile-http,compile-htmlparser16,compile-ftp,compile-jdbc,compile-java,compile-ldap,compile-mail,compile-tcp" description="Compile all protocol-specific components."/>
 
   <target name="compile-examples" depends="compile-jorphan,compile-core" description="Compile example components.">
     <mkdir dir="${build.examples}"/>
@@ -901,6 +920,18 @@
     	</manifest>
     </jar>
   	    
+    <!-- HtmlParserHTMLParser16 -->
+    <jar jarfile="${lib.dir}/htmlparserparser16.jar" manifest="MANIFEST">
+      <fileset dir="${build.htmlparser16}" includes="**/HtmlParserHTMLParser16.class"/>
+        <metainf dir="." includes="LICENSE,NOTICE"/>
+    	<manifest>
+           <attribute name="Built-By" value="${user.name}"/>
+           <attribute name="Implementation-Version" value="${jmeter.version}"/>
+           <attribute name="X-Compile-Source-JDK" value="${src.java.version}"/>
+           <attribute name="X-Compile-Target-JDK" value="${target.java.version}"/>
+    	</manifest>
+    </jar>
+  	  	    
     <!-- ftp -->
     <jar jarfile="${dest.jar}/ApacheJMeter_ftp.jar" manifest="MANIFEST">
       <fileset dir="${build.ftp}" includes="**/*.class" />
@@ -1106,7 +1137,7 @@
     <property name="jsse.present" value="assume"/>
     <property name="javamail.complete" value="assume"/>
     <property name="beanshell.present" value="assume"/>
-    <property name="bsf.present" value="assume"/>
+    <property name="htmlparser16.present" value="assume"/>
     <property name="jms.present" value="assume"/>
   </target>
 
@@ -1135,6 +1166,7 @@
     <include name="${lib.dir}/jorphan.jar"/>
     <include name="${lib.dir}/htmlparser.jar"/>
     <include name="${lib.dir}/htmlparserparser.jar"/>
+    <include name="${lib.dir}/htmlparserparser16.jar"/>
     <include name="${lib.dir}/junit/test.jar"/>
   </patternset>
 
@@ -1462,6 +1494,8 @@
     <delete file="${dest.jar.jmeter}/ApacheJMeter.jar"/>
     <delete quiet="true" file="${lib.dir}/jorphan.jar"/>
     <delete quiet="true" file="${lib.dir}/htmlparser.jar"/>
+    <delete quiet="true" file="${lib.dir}/htmlparserparser.jar"/>
+    <delete quiet="true" file="${lib.dir}/htmlparserparser16.jar"/>
     <delete quiet="true">
         <fileset dir="${dest.jar}"/>
     </delete>
@@ -1607,7 +1641,9 @@
    <java classname="org.apache.jorphan.test.AllTests" fork="yes" dir="${basedir}/bin">
       <classpath>
     	<fileset dir="${dest.jar}" includes="*.jar"/>
-	    <pathelement location="${build.test}"/>
+        <pathelement location="${build.test}"/>
+    	<fileset dir="${lib.dir}" includes="htmlparser.jar"/>
+    	<fileset dir="${lib.dir}" includes="htmlparserparser.jar"/>
     	<path refid="classpath"/>
       </classpath>
       <sysproperty key="java.awt.headless" value="${test.headless}"/>
@@ -1669,7 +1705,7 @@
 -->
     <java classname="org.htmlparser.tests.AllTests" fork="yes" failonerror="yes">
       <classpath>
-	    <pathelement location="${build.htmlparser}"/>
+	<pathelement location="${build.htmlparser}"/>
         <pathelement location="${lib.dir}/htmlparser.jar"/>
         <pathelement location="${junit.jar}"/>
         <pathelement location="${log-kit.jar}"/>
@@ -1677,25 +1713,5 @@
       <arg value="-text"/>
     </java>
   </target>
-
-    
-    <target name="get-beanshell">
-      <get src="http://www.beanshell.org/bsh-1.3.0.jar" 
-        dest="lib/bsh-1.3.0.jar" 
-        verbose="true" usetimestamp="true"/>
-    </target>
-
-    <target name="get-bsf">
-      <get src="http://cvs.apache.org/dist/jakarta/bsf/v2.3.0rc1/bin/bsf-bin-2.3.0.zip" 
-        dest="bsf-bin-2.3.0.zip" 
-        verbose="true" usetimestamp="true"/>
-        <!--
-        Does not appear to be possible to junk the path names in unzip,
-        so use a temporary directory and delete it.
-        -->
-        <unzip src="bsf-bin-2.3.0.zip" dest="tmp"/>
-        <move file="tmp/bsf-2.3.0/lib/bsf.jar" todir="lib"></move>
-        <delete dir="tmp"></delete>
-    </target>
   
 </project>

Added: jakarta/jmeter/branches/rel-2-1/src/htmlparser16/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser16.java
URL: http://svn.apache.org/viewvc/jakarta/jmeter/branches/rel-2-1/src/htmlparser16/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser16.java?rev=409279&view=auto
==============================================================================
--- jakarta/jmeter/branches/rel-2-1/src/htmlparser16/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser16.java (added)
+++ jakarta/jmeter/branches/rel-2-1/src/htmlparser16/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser16.java Wed May 24 18:08:06 2006
@@ -0,0 +1,182 @@
+/*
+ * Copyright 2006 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * 
+ */
+
+package org.apache.jmeter.protocol.http.parser;
+
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.Iterator;
+
+import org.apache.jorphan.logging.LoggingManager;
+import org.apache.log.Logger;
+import org.htmlparser.Node;
+import org.htmlparser.Parser;
+import org.htmlparser.Tag;
+import org.htmlparser.tags.AppletTag;
+import org.htmlparser.tags.BaseHrefTag;
+import org.htmlparser.tags.BodyTag;
+import org.htmlparser.tags.CompositeTag;
+import org.htmlparser.tags.FrameTag;
+import org.htmlparser.tags.ImageTag;
+import org.htmlparser.tags.InputTag;
+import org.htmlparser.tags.LinkTag;
+import org.htmlparser.tags.ScriptTag;
+import org.htmlparser.util.NodeIterator;
+import org.htmlparser.util.ParserException;
+
+/**
+ * HtmlParser implementation using SourceForge's HtmlParser.
+ * 
+ */
+class HtmlParserHTMLParser16 extends HTMLParser {
+    private static final Logger log = LoggingManager.getLoggerForClass();
+
+	protected HtmlParserHTMLParser16() {
+		super();
+        log.info("Using htmlparser version 1.6");
+	}
+	
+	protected boolean isReusable() {
+		return true;
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see org.apache.jmeter.protocol.http.parser.HtmlParser#getEmbeddedResourceURLs(byte[],
+	 *      java.net.URL)
+	 */
+	public Iterator getEmbeddedResourceURLs(byte[] html, URL baseUrl, URLCollection urls) throws HTMLParseException {
+        if (log.isDebugEnabled()) log.debug("Parsing html of: " + baseUrl);
+
+        Parser htmlParser = null;
+
+		try {
+			String contents = new String(html);
+			htmlParser = new Parser();
+            htmlParser.setInputHTML(contents);
+		} catch (Exception e) {
+			throw new HTMLParseException(e);
+		}
+
+		// Now parse the DOM tree
+		try {
+			// we start to iterate through the elements
+			parseNodes(htmlParser.elements(), new URLPointer(baseUrl), urls);
+			log.debug("End   : parseNodes");
+		} catch (ParserException e) {
+			throw new HTMLParseException(e);
+		}
+
+		return urls.iterator();
+	}
+	
+    /*
+	 * A dummy class to pass the pointer of URL.
+	 */
+    private static class URLPointer {
+    	private URLPointer(URL newUrl) {
+    		url = newUrl;
+    	}
+    	private URL url;
+    }
+    
+    /**
+     * Recursively parse all nodes to pick up all URL s.
+     * @see e the nodes to be parsed
+     * @see baseUrl Base URL from which the HTML code was obtained
+     * @see urls URLCollection
+     */
+    private void parseNodes(final NodeIterator e,
+    		final URLPointer baseUrl, final URLCollection urls) 
+        throws HTMLParseException, ParserException {
+        while(e.hasMoreNodes()) {
+            Node node = e.nextNode();
+            // a url is always in a Tag.
+            if (!(node instanceof Tag)) {
+                continue;
+            }
+            Tag tag = (Tag) node;
+            String tagname=tag.getTagName();
+            String binUrlStr = null;
+
+            // first we check to see if body tag has a
+            // background set
+            if (tag instanceof BodyTag) {
+                binUrlStr = tag.getAttribute(ATT_BACKGROUND);
+            } else if (tag instanceof BaseHrefTag) {
+                BaseHrefTag baseHref = (BaseHrefTag) tag;
+                String baseref = baseHref.getBaseUrl().toString();
+                try {
+                    if (!baseref.equals(""))// Bugzilla 30713
+                    {
+                        baseUrl.url = new URL(baseUrl.url, baseHref.getBaseUrl());
+                    }
+                } catch (MalformedURLException e1) {
+                    throw new HTMLParseException(e1);
+                }
+            } else if (tag instanceof ImageTag) {
+                ImageTag image = (ImageTag) tag;
+                binUrlStr = image.getImageURL();
+            } else if (tag instanceof AppletTag) {
+        		// look for applets
+
+        		// This will only work with an Applet .class file.
+        		// Ideally, this should be upgraded to work with Objects (IE)
+        		// and archives (.jar and .zip) files as well.
+                AppletTag applet = (AppletTag) tag;
+                binUrlStr = applet.getAppletClass();
+            } else if (tag instanceof InputTag) {
+                // we check the input tag type for image
+                if (ATT_IS_IMAGE.equalsIgnoreCase(tag.getAttribute(ATT_TYPE))) {
+                    // then we need to download the binary
+                    binUrlStr = tag.getAttribute(ATT_SRC);
+                }
+            } else if (tag instanceof LinkTag) {
+                LinkTag link = (LinkTag) tag;
+                if (link.getChild(0) instanceof ImageTag) {
+                    ImageTag img = (ImageTag) link.getChild(0);
+                    binUrlStr = img.getImageURL();
+                }
+            } else if (tag instanceof ScriptTag) {
+                binUrlStr = tag.getAttribute(ATT_SRC);
+            } else if (tag instanceof FrameTag) {
+                binUrlStr = tag.getAttribute(ATT_SRC);
+            } else if (tagname.equalsIgnoreCase(TAG_EMBED)
+                || tagname.equalsIgnoreCase(TAG_BGSOUND)){
+                binUrlStr = tag.getAttribute(ATT_SRC);  
+            } else if (tagname.equalsIgnoreCase(TAG_LINK)) {
+                // Putting the string first means it works even if the attribute is null
+                if (STYLESHEET.equalsIgnoreCase(tag.getAttribute(ATT_REL))) {
+                    binUrlStr = tag.getAttribute(ATT_HREF);
+                }
+            } else {
+                binUrlStr = tag.getAttribute(ATT_BACKGROUND);
+            }
+
+            if (binUrlStr != null) {
+                urls.addURL(binUrlStr, baseUrl.url);
+            }
+            // second, if the tag was a composite tag,
+            // recursively parse its children.
+            if (tag instanceof CompositeTag) {
+                CompositeTag composite = (CompositeTag) tag;
+                parseNodes(composite.elements(), baseUrl, urls);
+            }
+        }
+    }
+}

Propchange: jakarta/jmeter/branches/rel-2-1/src/htmlparser16/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser16.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: jakarta/jmeter/branches/rel-2-1/src/htmlparser16/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser16.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision

Propchange: jakarta/jmeter/branches/rel-2-1/src/htmlparser16/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser16.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: jakarta/jmeter/branches/rel-2-1/xdocs/changes.xml
URL: http://svn.apache.org/viewvc/jakarta/jmeter/branches/rel-2-1/xdocs/changes.xml?rev=409279&r1=409278&r2=409279&view=diff
==============================================================================
--- jakarta/jmeter/branches/rel-2-1/xdocs/changes.xml (original)
+++ jakarta/jmeter/branches/rel-2-1/xdocs/changes.xml Wed May 24 18:08:06 2006
@@ -104,6 +104,7 @@
 <li>Bug 37652 - support for Ajp Tomcat protocol</li>
 <li>Bug 39626 - Loading SOAP/XML-RPC requests from file</li>
 <li>Bug 39652 - Allow truncation of labels on AxisGraph</li>
+<li>Allow use of htmlparser 1.6</li>
 </ul>
 
 <h4>Bug fixes:</h4>



---------------------------------------------------------------------
To unsubscribe, e-mail: jmeter-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: jmeter-dev-help@jakarta.apache.org