You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jmeter.apache.org by pm...@apache.org on 2016/02/19 21:37:20 UTC

svn commit: r1731296 - in /jmeter/trunk: src/protocol/http/org/apache/jmeter/protocol/http/proxy/FormCharSetFinder.java xdocs/changes.xml

Author: pmouawad
Date: Fri Feb 19 20:37:20 2016
New Revision: 1731296

URL: http://svn.apache.org/viewvc?rev=1731296&view=rev
Log:
Bug 59036 - FormCharSetFinder : Use JSoup instead of deprecated HTMLParser 
Bugzilla Id: 59036

Modified:
    jmeter/trunk/src/protocol/http/org/apache/jmeter/protocol/http/proxy/FormCharSetFinder.java
    jmeter/trunk/xdocs/changes.xml

Modified: jmeter/trunk/src/protocol/http/org/apache/jmeter/protocol/http/proxy/FormCharSetFinder.java
URL: http://svn.apache.org/viewvc/jmeter/trunk/src/protocol/http/org/apache/jmeter/protocol/http/proxy/FormCharSetFinder.java?rev=1731296&r1=1731295&r2=1731296&view=diff
==============================================================================
--- jmeter/trunk/src/protocol/http/org/apache/jmeter/protocol/http/proxy/FormCharSetFinder.java (original)
+++ jmeter/trunk/src/protocol/http/org/apache/jmeter/protocol/http/proxy/FormCharSetFinder.java Fri Feb 19 20:37:20 2016
@@ -20,17 +20,15 @@ package org.apache.jmeter.protocol.http.
 
 import java.util.Map;
 
+import org.apache.commons.lang3.StringUtils;
+import org.apache.jmeter.protocol.http.parser.HTMLParseException;
 import org.apache.jorphan.logging.LoggingManager;
 import org.apache.jorphan.util.JOrphanUtils;
 import org.apache.log.Logger;
-import org.apache.jmeter.protocol.http.parser.HTMLParseException;
-import org.htmlparser.Node;
-import org.htmlparser.Parser;
-import org.htmlparser.Tag;
-import org.htmlparser.tags.CompositeTag;
-import org.htmlparser.tags.FormTag;
-import org.htmlparser.util.NodeIterator;
-import org.htmlparser.util.ParserException;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
 
 /**
  * A parser for html, to find the form tags, and their accept-charset value
@@ -39,10 +37,6 @@ import org.htmlparser.util.ParserExcepti
 public class FormCharSetFinder {
     private static final Logger log = LoggingManager.getLoggerForClass();
 
-    static {
-        log.info("Using htmlparser version: "+Parser.getVersion());
-    }
-
     public FormCharSetFinder() {
         super();
     }
@@ -61,75 +55,32 @@ public class FormCharSetFinder {
             log.debug("Parsing html of: " + html);
         }
 
-        Parser htmlParser = null;
-        try {
-            htmlParser = new Parser();
-            htmlParser.setInputHTML(html);
-        } catch (Exception e) {
-            throw new HTMLParseException(e);
-        }
-
-        // Now parse the DOM tree
-        try {
-            // we start to iterate through the elements
-            parseNodes(htmlParser.elements(), formEncodings, pageEncoding);
-            log.debug("End   : parseNodes");
-        } catch (ParserException e) {
-            throw new HTMLParseException(e);
-        }
-    }
-
-    /**
-     * Recursively parse all nodes to pick up all form encodings
-     *
-     * @param e the nodes to be parsed
-     * @param formEncodings the Map where we should add form encodings found
-     * @param pageEncoding the encoding used for the page where the nodes are present
-     */
-    private void parseNodes(final NodeIterator e, Map<String, String> formEncodings, String pageEncoding)
-        throws HTMLParseException, ParserException {
-        while(e.hasMoreNodes()) {
-            Node node = e.nextNode();
-            // a url is always in a Tag.
-            if (!(node instanceof Tag)) {
-                continue;
-            }
-            Tag tag = (Tag) node;
-
-            // Only check form tags
-            if (tag instanceof FormTag) {
-                // Find the action / form url
-                String action = tag.getAttribute("action");
-                String acceptCharSet = tag.getAttribute("accept-charset");
-                if(action != null && action.length() > 0) {
-                    // We use the page encoding where the form resides, as the
-                    // default encoding for the form
-                    String formCharSet = pageEncoding;
-                    // Check if we found an accept-charset attribute on the form
-                    if(acceptCharSet != null) {
-                        String[] charSets = JOrphanUtils.split(acceptCharSet, ",");
-                        // Just use the first one of the possible many charsets
-                        if(charSets.length > 0) {
-                            formCharSet = charSets[0].trim();
-                            if(formCharSet.length() == 0) {
-                                formCharSet = null;
-                            }
+        Document document = Jsoup.parse(html);
+        Elements forms = document.select("form");
+        for (Element element : forms) {
+            String action = element.attr("action");
+            if( !(StringUtils.isEmpty(action)) ) {
+                // We use the page encoding where the form resides, as the
+                // default encoding for the form
+                String formCharSet = pageEncoding;
+                String acceptCharSet = element.attr("accept-charset");
+                // Check if we found an accept-charset attribute on the form
+                if(acceptCharSet != null) {
+                    String[] charSets = JOrphanUtils.split(acceptCharSet, ",");
+                    // Just use the first one of the possible many charsets
+                    if(charSets.length > 0) {
+                        formCharSet = charSets[0].trim();
+                        if(formCharSet.length() == 0) {
+                            formCharSet = null;
                         }
                     }
-                    if(formCharSet != null) {
-                        synchronized (formEncodings) {
-                            formEncodings.put(action, formCharSet);
-                        }
+                }
+                if(formCharSet != null) {
+                    synchronized (formEncodings) {
+                        formEncodings.put(action, formCharSet);
                     }
                 }
-            }
-
-            // second, if the tag was a composite tag,
-            // recursively parse its children.
-            if (tag instanceof CompositeTag) {
-                CompositeTag composite = (CompositeTag) tag;
-                parseNodes(composite.elements(), formEncodings, pageEncoding);
-            }
+            }      
         }
     }
 }

Modified: jmeter/trunk/xdocs/changes.xml
URL: http://svn.apache.org/viewvc/jmeter/trunk/xdocs/changes.xml?rev=1731296&r1=1731295&r2=1731296&view=diff
==============================================================================
--- jmeter/trunk/xdocs/changes.xml (original)
+++ jmeter/trunk/xdocs/changes.xml Fri Feb 19 20:37:20 2016
@@ -113,6 +113,7 @@ Summary
     <li><bug>57577</bug>HttpSampler : Retrieve All Embedded Resources, add property "httpsampler.embedded_resources_use_md5" to only compute md5 and not keep response data. Contributed by Benoit Wiart (benoit dot wiart at gmail.com)</li>
     <li><bug>59023</bug>HttpSampler UI : rework the embedded resources labels and change default number of parallel downloads to 6. Contributed by Benoit Wiart (benoit dot wiart at gmail.com)</li>
     <li><bug>59028</bug>Use SystemDefaultDnsResolver singleton. Contributed by Benoit Wiart (benoit dot wiart at gmail.com)</li>
+    <li><bug>59036</bug>FormCharSetFinder : Use JSoup instead of deprecated HTMLParser</li>
 </ul>
 
 <h3>Other samplers</h3>