You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by le...@apache.org on 2014/05/29 22:07:11 UTC
[3/4] git commit: Yet another fix for URL processing - do not escape
what is already escaped :)
Yet another fix for URL processing - do not escape what is already escaped :)
Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/f773f840
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/f773f840
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/f773f840
Branch: refs/heads/master
Commit: f773f840e93766265e87038688f9d36d4fe7e939
Parents: 64ae99b
Author: Eugene Dzhurinsky <jd...@gmail.com>
Authored: Wed Dec 25 18:47:40 2013 -0500
Committer: Eugene Dzhurinsky <jd...@gmail.com>
Committed: Thu May 8 23:03:22 2014 -0400
----------------------------------------------------------------------
.../apache/any23/http/DefaultHTTPClient.java | 34 +++++++-------------
.../apache/any23/source/HTTPDocumentSource.java | 3 +-
2 files changed, 13 insertions(+), 24 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/any23/blob/f773f840/core/src/main/java/org/apache/any23/http/DefaultHTTPClient.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/http/DefaultHTTPClient.java b/core/src/main/java/org/apache/any23/http/DefaultHTTPClient.java
index 967f59f..f533040 100644
--- a/core/src/main/java/org/apache/any23/http/DefaultHTTPClient.java
+++ b/core/src/main/java/org/apache/any23/http/DefaultHTTPClient.java
@@ -17,22 +17,16 @@
package org.apache.any23.http;
-import org.apache.commons.httpclient.Header;
-import org.apache.commons.httpclient.HostConfiguration;
-import org.apache.commons.httpclient.HttpClient;
-import org.apache.commons.httpclient.HttpConnectionManager;
-import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
+import org.apache.commons.httpclient.*;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.params.HttpConnectionManagerParams;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
-import java.net.URI;
-import java.net.URISyntaxException;
-import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
+import java.util.regex.Pattern;
/**
* Opens an {@link InputStream} on an HTTP URI. Is configured
@@ -43,6 +37,8 @@ import java.util.List;
*/
public class DefaultHTTPClient implements HTTPClient {
+ private static final Pattern ESCAPED_PATTERN = Pattern.compile("%[0-9a-f]{2}",Pattern.CASE_INSENSITIVE);
+
private final MultiThreadedHttpConnectionManager manager = new MultiThreadedHttpConnectionManager();
private HTTPClientConfiguration configuration;
@@ -55,6 +51,10 @@ public class DefaultHTTPClient implements HTTPClient {
private String contentType = null;
+ public static final boolean isUrlEncoded(String url) {
+ return ESCAPED_PATTERN.matcher(url).find();
+ }
+
/**
* Creates a {@link DefaultHTTPClient} instance already initialized
*
@@ -86,22 +86,10 @@ public class DefaultHTTPClient implements HTTPClient {
ensureClientInitialized();
String uriStr;
try {
- URI uriObj = new URI(uri);
+ URI uriObj = new URI(uri, isUrlEncoded(uri));
// [scheme:][//authority][path][?query][#fragment]
- final String path = uriObj.getPath();
- final String query = uriObj.getQuery();
- final String fragment = uriObj.getFragment();
- uriStr = String.format(
- "%s://%s%s%s%s%s%s",
- uriObj.getScheme(),
- uriObj.getAuthority(),
- path,
- query == null ? "" : "?",
- query,
- fragment == null ? "" : "#",
- fragment != null ? URLEncoder.encode(fragment, "UTF-8") : ""
- );
- } catch (URISyntaxException e) {
+ uriStr = uriObj.toString();
+ } catch (URIException e) {
throw new IllegalArgumentException("Invalid URI string.", e);
}
method = new GetMethod(uriStr);
http://git-wip-us.apache.org/repos/asf/any23/blob/f773f840/core/src/main/java/org/apache/any23/source/HTTPDocumentSource.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/source/HTTPDocumentSource.java b/core/src/main/java/org/apache/any23/source/HTTPDocumentSource.java
index 709bf5a..61a1b2d 100644
--- a/core/src/main/java/org/apache/any23/source/HTTPDocumentSource.java
+++ b/core/src/main/java/org/apache/any23/source/HTTPDocumentSource.java
@@ -17,6 +17,7 @@
package org.apache.any23.source;
+import org.apache.any23.http.DefaultHTTPClient;
import org.apache.any23.http.HTTPClient;
import org.apache.commons.httpclient.URI;
import org.apache.commons.httpclient.URIException;
@@ -49,7 +50,7 @@ public class HTTPDocumentSource implements DocumentSource {
private String normalize(String uri) throws URISyntaxException {
try {
- URI normalized = new URI(uri, false);
+ URI normalized = new URI(uri, DefaultHTTPClient.isUrlEncoded(uri));
normalized.normalize();
return normalized.toString();
} catch (URIException e) {