You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by va...@apache.org on 2017/09/19 15:27:40 UTC

[couchdb] branch master updated: Fix replication ID parsing in URL paths

This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/couchdb.git


The following commit(s) were added to refs/heads/master by this push:
     new c531a13  Fix replication ID parsing in URL paths
c531a13 is described below

commit c531a13b22cc6fcea6afb342eb3f5cb315db0313
Author: Nick Vatamaniuc <va...@apache.org>
AuthorDate: Tue Sep 19 10:16:41 2017 -0400

    Fix replication ID parsing in URL paths
    
    Previously users had to URL encode replication IDs when using
    `_scheduler/jobs/<job_id>` endpoint because Mochiweb incorrectly decoded the
    `+` character from URL path. So users were forced to encode so that the
    replicator would correctly receive a `+` after Mochiweb parsing.
    
    `+` is decoded as ` ` (space) probably because in query strings that's a valid
    application/x-www-form-urlencoded encoding, but that decoding is not meant for
    decoding URL paths, only query strings.
    
    Notice RFC 3986 https://tools.ietf.org/html/rfc3986#section-2.2
    
    `+` is a `sub-delim` (term from RFC)  and in the path component it can be used
    unquoted as a delimiter.
    
    https://tools.ietf.org/html/rfc3986#section-3.3
    
    Indeed, the replication ID is a compound ID and `+` is a valid delimiter
    which separates the base part from the extensions.
    
    For more details see also:
    
    https://github.com/perwendel/spark/issues/490
    
    https://www.w3.org/TR/html401/interact/forms.html#h-17.13.4.1
    
    Fixes #825
---
 src/couch_replicator/src/couch_replicator_ids.erl | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/couch_replicator/src/couch_replicator_ids.erl b/src/couch_replicator/src/couch_replicator_ids.erl
index 62cfdf2..e706762 100644
--- a/src/couch_replicator/src/couch_replicator_ids.erl
+++ b/src/couch_replicator/src/couch_replicator_ids.erl
@@ -78,7 +78,11 @@ replication_id(#rep{user_ctx = UserCtx} = Rep, 1) ->
 -spec convert([_] | binary() | {string(), string()}) -> {string(), string()}.
 convert(Id) when is_list(Id) ->
     convert(?l2b(Id));
-convert(Id) when is_binary(Id) ->
+convert(Id0) when is_binary(Id0) ->
+    % Spaces can result from mochiweb incorrectly unquoting + characters from
+    % the URL path. So undo the incorrect parsing here to avoid forcing
+    % users to url encode + characters.
+    Id = binary:replace(Id0, <<" ">>, <<"+">>, [global]),
     lists:splitwith(fun(Char) -> Char =/= $+ end, ?b2l(Id));
 convert({BaseId, Ext} = Id) when is_list(BaseId), is_list(Ext) ->
     Id.
@@ -222,6 +226,16 @@ get_non_default_port(_Schema, Port) ->
 
 -include_lib("eunit/include/eunit.hrl").
 
+
+replication_id_convert_test_() ->
+    [?_assertEqual(Expected, convert(Id)) || {Expected, Id} <- [
+        {{"abc", ""}, "abc"},
+        {{"abc", ""}, <<"abc">>},
+        {{"abc", "+x+y"}, <<"abc+x+y">>},
+        {{"abc", "+x+y"}, {"abc", "+x+y"}},
+        {{"abc", "+x+y"}, <<"abc x y">>}
+    ]].
+
 http_v4_endpoint_test_() ->
     [?_assertMatch({remote, User, Host, Port, Path, HeadersNoAuth, undefined},
         get_v4_endpoint(nil, #httpdb{url = Url, headers = Headers})) ||

-- 
To stop receiving notification emails like this one, please contact
['"commits@couchdb.apache.org" <co...@couchdb.apache.org>'].