You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jmeter.apache.org by pm...@apache.org on 2016/02/19 21:37:20 UTC
svn commit: r1731296 - in /jmeter/trunk:
src/protocol/http/org/apache/jmeter/protocol/http/proxy/FormCharSetFinder.java
xdocs/changes.xml
Author: pmouawad
Date: Fri Feb 19 20:37:20 2016
New Revision: 1731296
URL: http://svn.apache.org/viewvc?rev=1731296&view=rev
Log:
Bug 59036 - FormCharSetFinder : Use JSoup instead of deprecated HTMLParser
Bugzilla Id: 59036
Modified:
jmeter/trunk/src/protocol/http/org/apache/jmeter/protocol/http/proxy/FormCharSetFinder.java
jmeter/trunk/xdocs/changes.xml
Modified: jmeter/trunk/src/protocol/http/org/apache/jmeter/protocol/http/proxy/FormCharSetFinder.java
URL: http://svn.apache.org/viewvc/jmeter/trunk/src/protocol/http/org/apache/jmeter/protocol/http/proxy/FormCharSetFinder.java?rev=1731296&r1=1731295&r2=1731296&view=diff
==============================================================================
--- jmeter/trunk/src/protocol/http/org/apache/jmeter/protocol/http/proxy/FormCharSetFinder.java (original)
+++ jmeter/trunk/src/protocol/http/org/apache/jmeter/protocol/http/proxy/FormCharSetFinder.java Fri Feb 19 20:37:20 2016
@@ -20,17 +20,15 @@ package org.apache.jmeter.protocol.http.
import java.util.Map;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.jmeter.protocol.http.parser.HTMLParseException;
import org.apache.jorphan.logging.LoggingManager;
import org.apache.jorphan.util.JOrphanUtils;
import org.apache.log.Logger;
-import org.apache.jmeter.protocol.http.parser.HTMLParseException;
-import org.htmlparser.Node;
-import org.htmlparser.Parser;
-import org.htmlparser.Tag;
-import org.htmlparser.tags.CompositeTag;
-import org.htmlparser.tags.FormTag;
-import org.htmlparser.util.NodeIterator;
-import org.htmlparser.util.ParserException;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
/**
* A parser for html, to find the form tags, and their accept-charset value
@@ -39,10 +37,6 @@ import org.htmlparser.util.ParserExcepti
public class FormCharSetFinder {
private static final Logger log = LoggingManager.getLoggerForClass();
- static {
- log.info("Using htmlparser version: "+Parser.getVersion());
- }
-
public FormCharSetFinder() {
super();
}
@@ -61,75 +55,32 @@ public class FormCharSetFinder {
log.debug("Parsing html of: " + html);
}
- Parser htmlParser = null;
- try {
- htmlParser = new Parser();
- htmlParser.setInputHTML(html);
- } catch (Exception e) {
- throw new HTMLParseException(e);
- }
-
- // Now parse the DOM tree
- try {
- // we start to iterate through the elements
- parseNodes(htmlParser.elements(), formEncodings, pageEncoding);
- log.debug("End : parseNodes");
- } catch (ParserException e) {
- throw new HTMLParseException(e);
- }
- }
-
- /**
- * Recursively parse all nodes to pick up all form encodings
- *
- * @param e the nodes to be parsed
- * @param formEncodings the Map where we should add form encodings found
- * @param pageEncoding the encoding used for the page where the nodes are present
- */
- private void parseNodes(final NodeIterator e, Map<String, String> formEncodings, String pageEncoding)
- throws HTMLParseException, ParserException {
- while(e.hasMoreNodes()) {
- Node node = e.nextNode();
- // a url is always in a Tag.
- if (!(node instanceof Tag)) {
- continue;
- }
- Tag tag = (Tag) node;
-
- // Only check form tags
- if (tag instanceof FormTag) {
- // Find the action / form url
- String action = tag.getAttribute("action");
- String acceptCharSet = tag.getAttribute("accept-charset");
- if(action != null && action.length() > 0) {
- // We use the page encoding where the form resides, as the
- // default encoding for the form
- String formCharSet = pageEncoding;
- // Check if we found an accept-charset attribute on the form
- if(acceptCharSet != null) {
- String[] charSets = JOrphanUtils.split(acceptCharSet, ",");
- // Just use the first one of the possible many charsets
- if(charSets.length > 0) {
- formCharSet = charSets[0].trim();
- if(formCharSet.length() == 0) {
- formCharSet = null;
- }
+ Document document = Jsoup.parse(html);
+ Elements forms = document.select("form");
+ for (Element element : forms) {
+ String action = element.attr("action");
+ if( !(StringUtils.isEmpty(action)) ) {
+ // We use the page encoding where the form resides, as the
+ // default encoding for the form
+ String formCharSet = pageEncoding;
+ String acceptCharSet = element.attr("accept-charset");
+ // Check if we found an accept-charset attribute on the form
+ if(acceptCharSet != null) {
+ String[] charSets = JOrphanUtils.split(acceptCharSet, ",");
+ // Just use the first one of the possible many charsets
+ if(charSets.length > 0) {
+ formCharSet = charSets[0].trim();
+ if(formCharSet.length() == 0) {
+ formCharSet = null;
}
}
- if(formCharSet != null) {
- synchronized (formEncodings) {
- formEncodings.put(action, formCharSet);
- }
+ }
+ if(formCharSet != null) {
+ synchronized (formEncodings) {
+ formEncodings.put(action, formCharSet);
}
}
- }
-
- // second, if the tag was a composite tag,
- // recursively parse its children.
- if (tag instanceof CompositeTag) {
- CompositeTag composite = (CompositeTag) tag;
- parseNodes(composite.elements(), formEncodings, pageEncoding);
- }
+ }
}
}
}
Modified: jmeter/trunk/xdocs/changes.xml
URL: http://svn.apache.org/viewvc/jmeter/trunk/xdocs/changes.xml?rev=1731296&r1=1731295&r2=1731296&view=diff
==============================================================================
--- jmeter/trunk/xdocs/changes.xml (original)
+++ jmeter/trunk/xdocs/changes.xml Fri Feb 19 20:37:20 2016
@@ -113,6 +113,7 @@ Summary
<li><bug>57577</bug>HttpSampler : Retrieve All Embedded Resources, add property "httpsampler.embedded_resources_use_md5" to only compute md5 and not keep response data. Contributed by Benoit Wiart (benoit dot wiart at gmail.com)</li>
<li><bug>59023</bug>HttpSampler UI : rework the embedded resources labels and change default number of parallel downloads to 6. Contributed by Benoit Wiart (benoit dot wiart at gmail.com)</li>
<li><bug>59028</bug>Use SystemDefaultDnsResolver singleton. Contributed by Benoit Wiart (benoit dot wiart at gmail.com)</li>
+ <li><bug>59036</bug>FormCharSetFinder : Use JSoup instead of deprecated HTMLParser</li>
</ul>
<h3>Other samplers</h3>