You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by le...@apache.org on 2014/05/29 22:07:11 UTC

[3/4] git commit: Yet another fix for URL processing - do not escape what is already escaped :)

Yet another fix for URL processing - do not escape what is already escaped :)


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/f773f840
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/f773f840
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/f773f840

Branch: refs/heads/master
Commit: f773f840e93766265e87038688f9d36d4fe7e939
Parents: 64ae99b
Author: Eugene Dzhurinsky <jd...@gmail.com>
Authored: Wed Dec 25 18:47:40 2013 -0500
Committer: Eugene Dzhurinsky <jd...@gmail.com>
Committed: Thu May 8 23:03:22 2014 -0400

----------------------------------------------------------------------
 .../apache/any23/http/DefaultHTTPClient.java    | 34 +++++++-------------
 .../apache/any23/source/HTTPDocumentSource.java |  3 +-
 2 files changed, 13 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/f773f840/core/src/main/java/org/apache/any23/http/DefaultHTTPClient.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/http/DefaultHTTPClient.java b/core/src/main/java/org/apache/any23/http/DefaultHTTPClient.java
index 967f59f..f533040 100644
--- a/core/src/main/java/org/apache/any23/http/DefaultHTTPClient.java
+++ b/core/src/main/java/org/apache/any23/http/DefaultHTTPClient.java
@@ -17,22 +17,16 @@
 
 package org.apache.any23.http;
 
-import org.apache.commons.httpclient.Header;
-import org.apache.commons.httpclient.HostConfiguration;
-import org.apache.commons.httpclient.HttpClient;
-import org.apache.commons.httpclient.HttpConnectionManager;
-import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
+import org.apache.commons.httpclient.*;
 import org.apache.commons.httpclient.methods.GetMethod;
 import org.apache.commons.httpclient.params.HttpConnectionManagerParams;
 
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.net.URI;
-import java.net.URISyntaxException;
-import java.net.URLEncoder;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.regex.Pattern;
 
 /**
  * Opens an {@link InputStream} on an HTTP URI. Is configured
@@ -43,6 +37,8 @@ import java.util.List;
  */
 public class DefaultHTTPClient implements HTTPClient {
 
+    private static final Pattern ESCAPED_PATTERN = Pattern.compile("%[0-9a-f]{2}",Pattern.CASE_INSENSITIVE);
+
     private final MultiThreadedHttpConnectionManager manager = new MultiThreadedHttpConnectionManager();
 
     private HTTPClientConfiguration configuration;
@@ -55,6 +51,10 @@ public class DefaultHTTPClient implements HTTPClient {
 
     private String contentType = null;
 
+    public static final boolean isUrlEncoded(String url) {
+        return ESCAPED_PATTERN.matcher(url).find();
+    }
+
     /**
      * Creates a {@link DefaultHTTPClient} instance already initialized
      *
@@ -86,22 +86,10 @@ public class DefaultHTTPClient implements HTTPClient {
             ensureClientInitialized();
             String uriStr;
             try {
-                URI uriObj = new URI(uri);
+                URI uriObj = new URI(uri, isUrlEncoded(uri));
                 // [scheme:][//authority][path][?query][#fragment]
-                final String path = uriObj.getPath();
-                final String query = uriObj.getQuery();
-                final String fragment = uriObj.getFragment();
-                uriStr = String.format(
-                        "%s://%s%s%s%s%s%s",
-                        uriObj.getScheme(),
-                        uriObj.getAuthority(),
-                        path,
-                        query == null ? "" : "?",
-                        query,
-                        fragment == null ? "" : "#",
-                        fragment != null ? URLEncoder.encode(fragment, "UTF-8") : ""
-                );
-            } catch (URISyntaxException e) {
+                uriStr = uriObj.toString();
+            } catch (URIException e) {
                 throw new IllegalArgumentException("Invalid URI string.", e);
             }
             method = new GetMethod(uriStr);

http://git-wip-us.apache.org/repos/asf/any23/blob/f773f840/core/src/main/java/org/apache/any23/source/HTTPDocumentSource.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/source/HTTPDocumentSource.java b/core/src/main/java/org/apache/any23/source/HTTPDocumentSource.java
index 709bf5a..61a1b2d 100644
--- a/core/src/main/java/org/apache/any23/source/HTTPDocumentSource.java
+++ b/core/src/main/java/org/apache/any23/source/HTTPDocumentSource.java
@@ -17,6 +17,7 @@
 
 package org.apache.any23.source;
 
+import org.apache.any23.http.DefaultHTTPClient;
 import org.apache.any23.http.HTTPClient;
 import org.apache.commons.httpclient.URI;
 import org.apache.commons.httpclient.URIException;
@@ -49,7 +50,7 @@ public class HTTPDocumentSource implements DocumentSource {
 
     private String normalize(String uri) throws URISyntaxException {
         try {
-            URI normalized = new URI(uri, false);
+            URI normalized = new URI(uri, DefaultHTTPClient.isUrlEncoded(uri));
             normalized.normalize();
             return normalized.toString();
         } catch (URIException e) {