You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hc.apache.org by ol...@apache.org on 2019/02/02 20:49:40 UTC

[httpcomponents-core] branch HTTPCLIENT-1968 created (now 36b69bb)

This is an automated email from the ASF dual-hosted git repository.

olegk pushed a change to branch HTTPCLIENT-1968
in repository https://gitbox.apache.org/repos/asf/httpcomponents-core.git.


      at 36b69bb  HTTPCLIENT-1968: URIBuilder to split path component into path segments when digesting a URI

This branch includes the following new commits:

     new b235b66  HTTPCLIENT-1968: added utility methods to parse and format URI path segments
     new 36b69bb  HTTPCLIENT-1968: URIBuilder to split path component into path segments when digesting a URI

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[httpcomponents-core] 01/02: HTTPCLIENT-1968: added utility methods to parse and format URI path segments

Posted by ol...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

olegk pushed a commit to branch HTTPCLIENT-1968
in repository https://gitbox.apache.org/repos/asf/httpcomponents-core.git

commit b235b660a7ca1e2a852f8dcaa0441df814546abb
Author: Oleg Kalnichevski <ol...@apache.org>
AuthorDate: Sat Feb 2 18:02:33 2019 +0100

    HTTPCLIENT-1968: added utility methods to parse and format URI path segments
---
 .../org/apache/hc/core5/net/URLEncodedUtils.java   | 166 +++++++++++++++------
 .../apache/hc/core5/net/TestURLEncodedUtils.java   |  39 +++++
 2 files changed, 161 insertions(+), 44 deletions(-)

diff --git a/httpcore5/src/main/java/org/apache/hc/core5/net/URLEncodedUtils.java b/httpcore5/src/main/java/org/apache/hc/core5/net/URLEncodedUtils.java
index 338ba73..76fd01d 100644
--- a/httpcore5/src/main/java/org/apache/hc/core5/net/URLEncodedUtils.java
+++ b/httpcore5/src/main/java/org/apache/hc/core5/net/URLEncodedUtils.java
@@ -33,7 +33,9 @@ import java.nio.CharBuffer;
 import java.nio.charset.Charset;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.BitSet;
+import java.util.Collections;
 import java.util.List;
 
 import org.apache.hc.core5.http.NameValuePair;
@@ -57,21 +59,20 @@ public class URLEncodedUtils {
     private static final char QP_SEP_A = '&';
     private static final char QP_SEP_S = ';';
     private static final String NAME_VALUE_SEPARATOR = "=";
+    private static final char PATH_SEPARATOR = '/';
+
+    private static final BitSet PATH_SEPARATORS     = new BitSet(256);
+    static {
+        PATH_SEPARATORS.set(PATH_SEPARATOR);
+    }
 
     /**
-     * Returns a list of {@link NameValuePair NameValuePairs} as built from the URI's query portion. For example, a URI
-     * of {@code http://example.org/path/to/file?a=1&b=2&c=3} would return a list of three NameValuePairs, one for a=1,
-     * one for b=2, and one for c=3. By convention, {@code '&'} and {@code ';'} are accepted as parameter separators.
-     * <p>
-     * This is typically useful while parsing an HTTP PUT.
+     * Returns a list of {@link NameValuePair}s URI query parameters.
+     * By convention, {@code '&'} and {@code ';'} are accepted as parameter separators.
      *
-     * This API is currently only used for testing.
-     *
-     * @param uri
-     *        URI to parse
-     * @param charset
-     *        Charset to use while parsing the query
-     * @return a list of {@link NameValuePair} as built from the URI's query portion.
+     * @param uri input URI.
+     * @param charset parameter charset.
+     * @return list of query parameters.
      *
      * @since 4.5
      */
@@ -85,14 +86,12 @@ public class URLEncodedUtils {
     }
 
     /**
-     * Returns a list of {@link NameValuePair NameValuePairs} as parsed from the given string using the given character
-     * encoding. By convention, {@code '&'} and {@code ';'} are accepted as parameter separators.
+     * Returns a list of {@link NameValuePair}s URI query parameters.
+     * By convention, {@code '&'} and {@code ';'} are accepted as parameter separators.
      *
-     * @param s
-     *            text to parse.
-     * @param charset
-     *            Encoding to use when decoding the parameters.
-     * @return a list of {@link NameValuePair} as built from the URI's query portion.
+     * @param s URI query component.
+     * @param charset charset to use when decoding the parameters.
+     * @return list of query parameters.
      *
      * @since 4.2
      */
@@ -104,22 +103,18 @@ public class URLEncodedUtils {
     }
 
     /**
-     * Returns a list of {@link NameValuePair NameValuePairs} as parsed from the given string using
-     * the given character encoding.
+     * Returns a list of {@link NameValuePair}s parameters.
      *
-     * @param s
-     *            text to parse.
-     * @param charset
-     *            Encoding to use when decoding the parameters.
-     * @param separators
-     *            element separators.
-     * @return a list of {@link NameValuePair} as built from the URI's query portion.
+     * @param s input text.
+     * @param charset parameter charset.
+     * @param separators parameter separators.
+     * @return list of query parameters.
      *
      * @since 4.4
      */
     public static List<NameValuePair> parse(
             final CharSequence s, final Charset charset, final char... separators) {
-        Args.notNull(s, "Char array buffer");
+        Args.notNull(s, "Char sequence");
         final TokenParser tokenParser = TokenParser.INSTANCE;
         final BitSet delimSet = new BitSet();
         for (final char separator: separators) {
@@ -151,6 +146,98 @@ public class URLEncodedUtils {
         return list;
     }
 
+    static List<String> splitSegments(final CharSequence s, final BitSet separators) {
+        final ParserCursor cursor = new ParserCursor(0, s.length());
+        // Skip leading separator
+        if (cursor.atEnd()) {
+            return Collections.emptyList();
+        }
+        if (separators.get(s.charAt(cursor.getPos()))) {
+            cursor.updatePos(cursor.getPos() + 1);
+        }
+        final List<String> list = new ArrayList<>();
+        final StringBuilder buf = new StringBuilder();
+        for (;;) {
+            if (cursor.atEnd()) {
+                list.add(buf.toString());
+                break;
+            }
+            final char current = s.charAt(cursor.getPos());
+            if (separators.get(current)) {
+                list.add(buf.toString());
+                buf.setLength(0);
+            } else {
+                buf.append(current);
+            }
+            cursor.updatePos(cursor.getPos() + 1);
+        }
+        return list;
+    }
+
+    static List<String> splitPathSegments(final CharSequence s) {
+        return splitSegments(s, PATH_SEPARATORS);
+    }
+
+    /**
+     * Returns a list of URI path segments.
+     *
+     * @param s URI path component.
+     * @param charset parameter charset.
+     * @return list of segments.
+     *
+     * @since 4.5
+     */
+    public static List<String> parsePathSegments(final CharSequence s, final Charset charset) {
+        Args.notNull(s, "Char sequence");
+        final List<String> list = splitPathSegments(s);
+        for (int i = 0; i < list.size(); i++) {
+            list.set(i, urlDecode(list.get(i), charset != null ? charset : StandardCharsets.UTF_8, false));
+        }
+        return list;
+    }
+
+    /**
+     * Returns a list of URI path segments.
+     *
+     * @param s URI path component.
+     * @return list of segments.
+     *
+     * @since 4.5
+     */
+    public static List<String> parsePathSegments(final CharSequence s) {
+        return parsePathSegments(s, StandardCharsets.UTF_8);
+    }
+
+    /**
+     * Returns a string consisting of joint encoded path segments.
+     *
+     * @param segments the segments.
+     * @param charset parameter charset.
+     * @return URI path component
+     *
+     * @since 4.5
+     */
+    public static String formatSegments(final Iterable<String> segments, final Charset charset) {
+        Args.notNull(segments, "Segments");
+        final StringBuilder result = new StringBuilder();
+        for (final String segment : segments) {
+            result.append(PATH_SEPARATOR).append(urlEncode(segment, charset, PATHSAFE, false));
+        }
+        return result.toString();
+    }
+
+    /**
+     * Returns a string consisting of joint encoded path segments.
+     *
+     * @param segments the segments.
+     * @return URI path component
+     *
+     * @since 4.5
+     */
+    public static String formatSegments(final String... segments) {
+        return formatSegments(Arrays.asList(segments), StandardCharsets.UTF_8);
+    }
+
     /**
      * Returns a String that is suitable for use as an {@code application/x-www-form-urlencoded}
      * list of parameters in an HTTP PUT or HTTP POST.
@@ -239,6 +326,8 @@ public class URLEncodedUtils {
      */
     private static final BitSet URLENCODER   = new BitSet(256);
 
+    private static final BitSet PATH_SPECIAL = new BitSet(256);
+
     static {
         // unreserved chars
         // alpha characters
@@ -276,9 +365,8 @@ public class URLEncodedUtils {
 
         // URL path safe
         PATHSAFE.or(UNRESERVED);
-        PATHSAFE.set('/'); // segment separator
         PATHSAFE.set(';'); // param separator
-        PATHSAFE.set(':'); // rest as per list in 2396, i.e. : @ & = + $ ,
+        PATHSAFE.set(':'); // RFC 2396
         PATHSAFE.set('@');
         PATHSAFE.set('&');
         PATHSAFE.set('=');
@@ -286,6 +374,9 @@ public class URLEncodedUtils {
         PATHSAFE.set('$');
         PATHSAFE.set(',');
 
+        PATH_SPECIAL.or(PATHSAFE);
+        PATH_SPECIAL.set('/');
+
         RESERVED.set(';');
         RESERVED.set('/');
         RESERVED.set('?');
@@ -435,17 +526,4 @@ public class URLEncodedUtils {
         return urlEncode(content, charset, URIC, false);
     }
 
-    /**
-     * Encode a String using the {@link #PATHSAFE} set of characters.
-     * <p>
-     * Used by URIBuilder to encode path segments.
-     *
-     * @param content the string to encode, does not convert space to '+'
-     * @param charset the charset to use
-     * @return the encoded string
-     */
-    static String encPath(final String content, final Charset charset) {
-        return urlEncode(content, charset, PATHSAFE, false);
-    }
-
 }
diff --git a/httpcore5/src/test/java/org/apache/hc/core5/net/TestURLEncodedUtils.java b/httpcore5/src/test/java/org/apache/hc/core5/net/TestURLEncodedUtils.java
index f3c21fc..047cda3 100644
--- a/httpcore5/src/test/java/org/apache/hc/core5/net/TestURLEncodedUtils.java
+++ b/httpcore5/src/test/java/org/apache/hc/core5/net/TestURLEncodedUtils.java
@@ -30,10 +30,13 @@ package org.apache.hc.core5.net;
 import java.net.URI;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
 import java.util.List;
 
 import org.apache.hc.core5.http.NameValuePair;
 import org.apache.hc.core5.http.message.BasicNameValuePair;
+import org.hamcrest.CoreMatchers;
 import org.junit.Assert;
 import org.junit.Test;
 
@@ -95,6 +98,42 @@ public class TestURLEncodedUtils {
     }
 
     @Test
+    public void testParseSegments() throws Exception {
+        Assert.assertThat(URLEncodedUtils.parsePathSegments("/this/that"),
+                CoreMatchers.equalTo(Arrays.asList("this", "that")));
+        Assert.assertThat(URLEncodedUtils.parsePathSegments("this/that"),
+                CoreMatchers.equalTo(Arrays.asList("this", "that")));
+        Assert.assertThat(URLEncodedUtils.parsePathSegments("this//that"),
+                CoreMatchers.equalTo(Arrays.asList("this", "", "that")));
+        Assert.assertThat(URLEncodedUtils.parsePathSegments("this//that/"),
+                CoreMatchers.equalTo(Arrays.asList("this", "", "that", "")));
+        Assert.assertThat(URLEncodedUtils.parsePathSegments("this//that/%2fthis%20and%20that"),
+                CoreMatchers.equalTo(Arrays.asList("this", "", "that", "/this and that")));
+        Assert.assertThat(URLEncodedUtils.parsePathSegments("this///that//"),
+                CoreMatchers.equalTo(Arrays.asList("this", "", "", "that", "", "")));
+        Assert.assertThat(URLEncodedUtils.parsePathSegments("/"),
+                CoreMatchers.equalTo(Collections.singletonList("")));
+        Assert.assertThat(URLEncodedUtils.parsePathSegments(""),
+                CoreMatchers.equalTo(Collections.<String>emptyList()));
+    }
+
+    @Test
+    public void testFormatSegments() throws Exception {
+        Assert.assertThat(URLEncodedUtils.formatSegments("this", "that"),
+                CoreMatchers.equalTo("/this/that"));
+        Assert.assertThat(URLEncodedUtils.formatSegments("this", "", "that"),
+                CoreMatchers.equalTo("/this//that"));
+        Assert.assertThat(URLEncodedUtils.formatSegments("this", "", "that", "/this and that"),
+                CoreMatchers.equalTo("/this//that/%2Fthis%20and%20that"));
+        Assert.assertThat(URLEncodedUtils.formatSegments("this", "", "", "that", "", ""),
+                CoreMatchers.equalTo("/this///that//"));
+        Assert.assertThat(URLEncodedUtils.formatSegments(""),
+                CoreMatchers.equalTo("/"));
+        Assert.assertThat(URLEncodedUtils.formatSegments(),
+                CoreMatchers.equalTo(""));
+    }
+
+    @Test
     public void testParseURLCodedContentString() throws Exception {
         List <NameValuePair> result;
 


[httpcomponents-core] 02/02: HTTPCLIENT-1968: URIBuilder to split path component into path segments when digesting a URI

Posted by ol...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

olegk pushed a commit to branch HTTPCLIENT-1968
in repository https://gitbox.apache.org/repos/asf/httpcomponents-core.git

commit 36b69bb15b3ecbd0770428a6abd0af96f6dad259
Author: Oleg Kalnichevski <ol...@apache.org>
AuthorDate: Sat Feb 2 21:36:43 2019 +0100

    HTTPCLIENT-1968: URIBuilder to split path component into path segments when digesting a URI
---
 .../java/org/apache/hc/core5/net/URIBuilder.java   | 53 +++++++++++++++++-----
 1 file changed, 42 insertions(+), 11 deletions(-)

diff --git a/httpcore5/src/main/java/org/apache/hc/core5/net/URIBuilder.java b/httpcore5/src/main/java/org/apache/hc/core5/net/URIBuilder.java
index 01ab890..8264a80 100644
--- a/httpcore5/src/main/java/org/apache/hc/core5/net/URIBuilder.java
+++ b/httpcore5/src/main/java/org/apache/hc/core5/net/URIBuilder.java
@@ -33,6 +33,8 @@ import java.net.UnknownHostException;
 import java.nio.charset.Charset;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
 
@@ -76,8 +78,8 @@ public class URIBuilder {
     private String encodedUserInfo;
     private String host;
     private int port;
-    private String path;
     private String encodedPath;
+    private List<String> pathSegments;
     private String encodedQuery;
     private List<NameValuePair> queryParams;
     private String query;
@@ -135,6 +137,13 @@ public class URIBuilder {
         return null;
     }
 
+    private List <String> parsePath(final String path, final Charset charset) {
+        if (path != null && !path.isEmpty()) {
+            return URLEncodedUtils.parsePathSegments(path, charset);
+        }
+        return null;
+    }
+
     /**
      * Builds a {@link URI} instance.
      */
@@ -170,8 +179,8 @@ public class URIBuilder {
             }
             if (this.encodedPath != null) {
                 sb.append(normalizePath(this.encodedPath, sb.length() == 0));
-            } else if (this.path != null) {
-                sb.append(encodePath(normalizePath(this.path, sb.length() == 0)));
+            } else if (this.pathSegments != null) {
+                sb.append(encodePath(this.pathSegments));
             }
             if (this.encodedQuery != null) {
                 sb.append("?").append(this.encodedQuery);
@@ -209,7 +218,7 @@ public class URIBuilder {
         this.encodedUserInfo = uri.getRawUserInfo();
         this.userInfo = uri.getUserInfo();
         this.encodedPath = uri.getRawPath();
-        this.path = uri.getPath();
+        this.pathSegments = parsePath(uri.getPath(), this.charset != null ? this.charset : StandardCharsets.UTF_8);
         this.encodedQuery = uri.getRawQuery();
         this.queryParams = parseQuery(uri.getRawQuery(), this.charset != null ? this.charset : StandardCharsets.UTF_8);
         this.encodedFragment = uri.getRawFragment();
@@ -220,8 +229,8 @@ public class URIBuilder {
         return URLEncodedUtils.encUserInfo(userInfo, this.charset != null ? this.charset : StandardCharsets.UTF_8);
     }
 
-    private String encodePath(final String path) {
-        return URLEncodedUtils.encPath(path, this.charset != null ? this.charset : StandardCharsets.UTF_8);
+    private String encodePath(final List<String> pathSegments) {
+        return URLEncodedUtils.formatSegments(pathSegments, this.charset != null ? this.charset : StandardCharsets.UTF_8);
     }
 
     private String encodeUrlForm(final List<NameValuePair> params) {
@@ -322,7 +331,19 @@ public class URIBuilder {
      * @return this.
      */
     public URIBuilder setPath(final String path) {
-        this.path = !TextUtils.isBlank(path) ? path : null;
+        this.pathSegments = path != null ? URLEncodedUtils.splitPathSegments(path) : null;
+        this.encodedSchemeSpecificPart = null;
+        this.encodedPath = null;
+        return this;
+    }
+
+    /**
+     * Sets URI path. The value is expected to be unescaped and may contain non ASCII characters.
+     *
+     * @return this.
+     */
+    public URIBuilder setPathSegments(final String... pathSegments) {
+        this.pathSegments = pathSegments.length > 0 ? Arrays.asList(pathSegments) : null;
         this.encodedSchemeSpecificPart = null;
         this.encodedPath = null;
         return this;
@@ -518,7 +539,7 @@ public class URIBuilder {
      * @since 4.3
      */
     public boolean isOpaque() {
-        return this.path == null;
+        return this.pathSegments == null || this.pathSegments.isEmpty();
     }
 
     public String getScheme() {
@@ -537,13 +558,23 @@ public class URIBuilder {
         return this.port;
     }
 
+    public List<String> getPathSegments() {
+        return this.pathSegments != null ? new ArrayList<>(this.pathSegments) : Collections.<String>emptyList();
+    }
+
     public String getPath() {
-        return this.path;
+        if (this.pathSegments == null) {
+            return null;
+        }
+        final StringBuilder result = new StringBuilder();
+        for (final String segment : this.pathSegments) {
+            result.append('/').append(segment);
+        }
+        return result.toString();
     }
 
     public List<NameValuePair> getQueryParams() {
-        return this.queryParams != null ? new ArrayList<>(this.queryParams)
-                        : new ArrayList<NameValuePair>();
+        return this.queryParams != null ? new ArrayList<>(this.queryParams) : Collections.<NameValuePair>emptyList();
     }
 
     public String getFragment() {