You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@couchdb.apache.org by "Robert Newson (JIRA)" <ji...@apache.org> on 2010/08/20 19:20:16 UTC

[jira] Created: (COUCHDB-864) multipart/related PUT's always close the connection.

multipart/related PUT's always close the connection.
----------------------------------------------------

                 Key: COUCHDB-864
                 URL: https://issues.apache.org/jira/browse/COUCHDB-864
             Project: CouchDB
          Issue Type: Bug
          Components: Database Core
            Reporter: Robert Newson



I noticed that mochiweb always closes the connection when doing a multipart/related PUT (to insert the JSON document and accompanying attachments in one call). Ultimately it's because we call recv(0) and not recv_body, thus consuming more data than we actually process. Mochiweb notices that there is unread data on the socket and closes the connection.

This impacts replication with attachments, as I believe they go through this code path (and, thus, are forever reconnecting).

The code below demonstrates a fix for this issue but isn't good enough for trunk. Adam provided the important process dictionary fix.

---
 src/couchdb/couch_doc.erl      |    1 +
 src/couchdb/couch_httpd_db.erl |   13 +++++++++----
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/couchdb/couch_doc.erl b/src/couchdb/couch_doc.erl
index 5009f8f..f8c874b 100644
--- a/src/couchdb/couch_doc.erl
+++ b/src/couchdb/couch_doc.erl
@@ -455,6 +455,7 @@ doc_from_multi_part_stream(ContentType, DataFun) ->
     Parser ! {get_doc_bytes, self()},
     receive 
     {doc_bytes, DocBytes} ->
+        erlang:put(mochiweb_request_recv, true),
         Doc = from_json_obj(?JSON_DECODE(DocBytes)),
         % go through the attachments looking for 'follows' in the data,
         % replace with function that reads the data from MIME stream.
diff --git a/src/couchdb/couch_httpd_db.erl b/src/couchdb/couch_httpd_db.erl
index b0fbe8d..eff7d67 100644
--- a/src/couchdb/couch_httpd_db.erl
+++ b/src/couchdb/couch_httpd_db.erl
@@ -651,12 +651,13 @@ db_doc_req(#httpd{method='PUT'}=Req, Db, DocId) ->
     } = parse_doc_query(Req),
     couch_doc:validate_docid(DocId),
     
+    Len = couch_httpd:header_value(Req,"Content-Length"),
     Loc = absolute_uri(Req, "/" ++ ?b2l(Db#db.name) ++ "/" ++ ?b2l(DocId)),
     RespHeaders = [{"Location", Loc}],
     case couch_util:to_list(couch_httpd:header_value(Req, "Content-Type")) of
     ("multipart/related;" ++ _) = ContentType ->
         {ok, Doc0} = couch_doc:doc_from_multi_part_stream(ContentType,
-                fun() -> receive_request_data(Req) end),
+                fun() -> receive_request_data(Req, Len) end),
         Doc = couch_doc_from_req(Req, DocId, Doc0),
         update_doc(Req, Db, DocId, Doc, RespHeaders, UpdateType);
     _Else ->
@@ -775,9 +776,13 @@ send_docs_multipart(Req, Results, Options) ->
     couch_httpd:send_chunk(Resp, <<"--">>),
     couch_httpd:last_chunk(Resp).
 
-receive_request_data(Req) ->
-    {couch_httpd:recv(Req, 0), fun() -> receive_request_data(Req) end}.
-    
+receive_request_data(Req, undefined) ->
+    receive_request_data(Req, 0);
+receive_request_data(Req, Len) when is_list(Len)->
+    Remaining = list_to_integer(Len),
+    Bin = couch_httpd:recv(Req, Remaining),
+    {Bin, fun() -> receive_request_data(Req, Remaining - iolist_size(Bin)) end}.
+
 update_doc_result_to_json({{Id, Rev}, Error}) ->
         {_Code, Err, Msg} = couch_httpd:error_info(Error),
         {[{id, Id}, {rev, couch_doc:rev_to_str(Rev)},
-- 
1.7.2.2

Umbra

-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.


[jira] Commented: (COUCHDB-864) multipart/related PUT's always close the connection.

Posted by "Robert Newson (JIRA)" <ji...@apache.org>.
    [ https://issues.apache.org/jira/browse/COUCHDB-864?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12900798#action_12900798 ] 

Robert Newson commented on COUCHDB-864:
---------------------------------------


I should have been clearer that that was speculation. I was told that this code path is used when replicated documents with attachments. If true, then this bug must be affecting that also.

> multipart/related PUT's always close the connection.
> ----------------------------------------------------
>
>                 Key: COUCHDB-864
>                 URL: https://issues.apache.org/jira/browse/COUCHDB-864
>             Project: CouchDB
>          Issue Type: Bug
>          Components: Database Core
>            Reporter: Robert Newson
>
> I noticed that mochiweb always closes the connection when doing a multipart/related PUT (to insert the JSON document and accompanying attachments in one call). Ultimately it's because we call recv(0) and not recv_body, thus consuming more data than we actually process. Mochiweb notices that there is unread data on the socket and closes the connection.
> This impacts replication with attachments, as I believe they go through this code path (and, thus, are forever reconnecting).
> The code below demonstrates a fix for this issue but isn't good enough for trunk. Adam provided the important process dictionary fix.
> ---
>  src/couchdb/couch_doc.erl      |    1 +
>  src/couchdb/couch_httpd_db.erl |   13 +++++++++----
>  2 files changed, 10 insertions(+), 4 deletions(-)
> diff --git a/src/couchdb/couch_doc.erl b/src/couchdb/couch_doc.erl
> index 5009f8f..f8c874b 100644
> --- a/src/couchdb/couch_doc.erl
> +++ b/src/couchdb/couch_doc.erl
> @@ -455,6 +455,7 @@ doc_from_multi_part_stream(ContentType, DataFun) ->
>      Parser ! {get_doc_bytes, self()},
>      receive 
>      {doc_bytes, DocBytes} ->
> +        erlang:put(mochiweb_request_recv, true),
>          Doc = from_json_obj(?JSON_DECODE(DocBytes)),
>          % go through the attachments looking for 'follows' in the data,
>          % replace with function that reads the data from MIME stream.
> diff --git a/src/couchdb/couch_httpd_db.erl b/src/couchdb/couch_httpd_db.erl
> index b0fbe8d..eff7d67 100644
> --- a/src/couchdb/couch_httpd_db.erl
> +++ b/src/couchdb/couch_httpd_db.erl
> @@ -651,12 +651,13 @@ db_doc_req(#httpd{method='PUT'}=Req, Db, DocId) ->
>      } = parse_doc_query(Req),
>      couch_doc:validate_docid(DocId),
>      
> +    Len = couch_httpd:header_value(Req,"Content-Length"),
>      Loc = absolute_uri(Req, "/" ++ ?b2l(Db#db.name) ++ "/" ++ ?b2l(DocId)),
>      RespHeaders = [{"Location", Loc}],
>      case couch_util:to_list(couch_httpd:header_value(Req, "Content-Type")) of
>      ("multipart/related;" ++ _) = ContentType ->
>          {ok, Doc0} = couch_doc:doc_from_multi_part_stream(ContentType,
> -                fun() -> receive_request_data(Req) end),
> +                fun() -> receive_request_data(Req, Len) end),
>          Doc = couch_doc_from_req(Req, DocId, Doc0),
>          update_doc(Req, Db, DocId, Doc, RespHeaders, UpdateType);
>      _Else ->
> @@ -775,9 +776,13 @@ send_docs_multipart(Req, Results, Options) ->
>      couch_httpd:send_chunk(Resp, <<"--">>),
>      couch_httpd:last_chunk(Resp).
>  
> -receive_request_data(Req) ->
> -    {couch_httpd:recv(Req, 0), fun() -> receive_request_data(Req) end}.
> -    
> +receive_request_data(Req, undefined) ->
> +    receive_request_data(Req, 0);
> +receive_request_data(Req, Len) when is_list(Len)->
> +    Remaining = list_to_integer(Len),
> +    Bin = couch_httpd:recv(Req, Remaining),
> +    {Bin, fun() -> receive_request_data(Req, Remaining - iolist_size(Bin)) end}.
> +
>  update_doc_result_to_json({{Id, Rev}, Error}) ->
>          {_Code, Err, Msg} = couch_httpd:error_info(Error),
>          {[{id, Id}, {rev, couch_doc:rev_to_str(Rev)},
> -- 
> 1.7.2.2
> Umbra

-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.


[jira] Commented: (COUCHDB-864) multipart/related PUT's always close the connection.

Posted by "Filipe Manana (JIRA)" <ji...@apache.org>.
    [ https://issues.apache.org/jira/browse/COUCHDB-864?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12901113#action_12901113 ] 

Filipe Manana commented on COUCHDB-864:
---------------------------------------

Hum, just added some logging code to mochiweb_request.erl where it decides whether to close or not the socket:

diff --git a/src/mochiweb/mochiweb_http.erl b/src/mochiweb/mochiweb_http.erl
index f1821f4..47c5a27 100644
--- a/src/mochiweb/mochiweb_http.erl
+++ b/src/mochiweb/mochiweb_http.erl
@@ -144,9 +144,11 @@ after_response(Body, Req) ->
     Socket = Req:get(socket),
     case Req:should_close() of
         true ->
+            error_logger:info_report(couch_info, {"~nCLOSING SOCKET~n", []}),
             gen_tcp:close(Socket),
             exit(normal);
         false ->
+            error_logger:info_report(couch_info, {"~nNOT CLOSING SOCKET~n", []}),
             Req:cleanup(),
             ?MODULE:loop(Socket, Body)
     end.

Then, without the previous patch applied to CouchDB (the one with the put(mochiweb_request_recv, true) in couch_httpd_db.erl) run the escript attached to this test.
I verified that mochiweb didn't close the socket immediately after the PUT request was served but only on the next receive loop iteration - which does a socket receive call with a timeout of 30 seconds.
So it seems it doesn't break an HTTP pipeline, unless consecutive requests take more than 30 seconds to be sent though the socket.

CouchDB's output:

[info] [<0.170.0>] 127.0.0.1 - - 'PUT' /testdb/doc1 201
[info] [<0.170.0>] 
NOT CLOSING SOCKET

The escript's output:

$ ./chunked.erl 
Received response:  <<"HTTP/1.1 201 Created\r\nServer: CouchDB/0.12.0ac533039-git (Erlang OTP/R13B)\r\nLocation: http://127.0.0.1:5984/testdb/doc1\r\nEtag: \"1-0e0b0a89b2adea5c71e7db2799f7f7c1\"\r\nDate: Sat, 21 Aug 2010 23:47:06 GMT\r\nContent-Type: text/plain;charset=utf-8\r\nContent-Length: 67\r\nCache-Control: must-revalidate\r\n\r\n{\"ok\":true,\"id\":\"doc1\",\"rev\":\"1-0e0b0a89b2adea5c71e7db2799f7f7c1\"}\n">>

Server closed connection after 30 seconds


Am I missing something?

> multipart/related PUT's always close the connection.
> ----------------------------------------------------
>
>                 Key: COUCHDB-864
>                 URL: https://issues.apache.org/jira/browse/COUCHDB-864
>             Project: CouchDB
>          Issue Type: Bug
>          Components: Database Core
>            Reporter: Robert Newson
>         Attachments: chunked.erl
>
>
> I noticed that mochiweb always closes the connection when doing a multipart/related PUT (to insert the JSON document and accompanying attachments in one call). Ultimately it's because we call recv(0) and not recv_body, thus consuming more data than we actually process. Mochiweb notices that there is unread data on the socket and closes the connection.
> This impacts replication with attachments, as I believe they go through this code path (and, thus, are forever reconnecting).
> The code below demonstrates a fix for this issue but isn't good enough for trunk. Adam provided the important process dictionary fix.
> ---
>  src/couchdb/couch_doc.erl      |    1 +
>  src/couchdb/couch_httpd_db.erl |   13 +++++++++----
>  2 files changed, 10 insertions(+), 4 deletions(-)
> diff --git a/src/couchdb/couch_doc.erl b/src/couchdb/couch_doc.erl
> index 5009f8f..f8c874b 100644
> --- a/src/couchdb/couch_doc.erl
> +++ b/src/couchdb/couch_doc.erl
> @@ -455,6 +455,7 @@ doc_from_multi_part_stream(ContentType, DataFun) ->
>      Parser ! {get_doc_bytes, self()},
>      receive 
>      {doc_bytes, DocBytes} ->
> +        erlang:put(mochiweb_request_recv, true),
>          Doc = from_json_obj(?JSON_DECODE(DocBytes)),
>          % go through the attachments looking for 'follows' in the data,
>          % replace with function that reads the data from MIME stream.
> diff --git a/src/couchdb/couch_httpd_db.erl b/src/couchdb/couch_httpd_db.erl
> index b0fbe8d..eff7d67 100644
> --- a/src/couchdb/couch_httpd_db.erl
> +++ b/src/couchdb/couch_httpd_db.erl
> @@ -651,12 +651,13 @@ db_doc_req(#httpd{method='PUT'}=Req, Db, DocId) ->
>      } = parse_doc_query(Req),
>      couch_doc:validate_docid(DocId),
>      
> +    Len = couch_httpd:header_value(Req,"Content-Length"),
>      Loc = absolute_uri(Req, "/" ++ ?b2l(Db#db.name) ++ "/" ++ ?b2l(DocId)),
>      RespHeaders = [{"Location", Loc}],
>      case couch_util:to_list(couch_httpd:header_value(Req, "Content-Type")) of
>      ("multipart/related;" ++ _) = ContentType ->
>          {ok, Doc0} = couch_doc:doc_from_multi_part_stream(ContentType,
> -                fun() -> receive_request_data(Req) end),
> +                fun() -> receive_request_data(Req, Len) end),
>          Doc = couch_doc_from_req(Req, DocId, Doc0),
>          update_doc(Req, Db, DocId, Doc, RespHeaders, UpdateType);
>      _Else ->
> @@ -775,9 +776,13 @@ send_docs_multipart(Req, Results, Options) ->
>      couch_httpd:send_chunk(Resp, <<"--">>),
>      couch_httpd:last_chunk(Resp).
>  
> -receive_request_data(Req) ->
> -    {couch_httpd:recv(Req, 0), fun() -> receive_request_data(Req) end}.
> -    
> +receive_request_data(Req, undefined) ->
> +    receive_request_data(Req, 0);
> +receive_request_data(Req, Len) when is_list(Len)->
> +    Remaining = list_to_integer(Len),
> +    Bin = couch_httpd:recv(Req, Remaining),
> +    {Bin, fun() -> receive_request_data(Req, Remaining - iolist_size(Bin)) end}.
> +
>  update_doc_result_to_json({{Id, Rev}, Error}) ->
>          {_Code, Err, Msg} = couch_httpd:error_info(Error),
>          {[{id, Id}, {rev, couch_doc:rev_to_str(Rev)},
> -- 
> 1.7.2.2
> Umbra

-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.


[jira] Commented: (COUCHDB-864) multipart/related PUT's always close the connection.

Posted by "Robert Newson (JIRA)" <ji...@apache.org>.
    [ https://issues.apache.org/jira/browse/COUCHDB-864?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12903053#action_12903053 ] 

Robert Newson commented on COUCHDB-864:
---------------------------------------

My concern is that only the process dictionary one-liner was necessary to allow many (thousands of) multipart/related PUT's on a stable connection. It still works with Filipe's patch, of course, but I think we should understand why it appears to work without.

It was revealed that latest mochiweb (the one with ssl) doesn't do unrecv any more, which was the previous way pipelining was thought to work.

> multipart/related PUT's always close the connection.
> ----------------------------------------------------
>
>                 Key: COUCHDB-864
>                 URL: https://issues.apache.org/jira/browse/COUCHDB-864
>             Project: CouchDB
>          Issue Type: Bug
>          Components: Database Core
>            Reporter: Robert Newson
>         Attachments: chunked.erl, mp_doc_put_http_pipeline.patch, mp_pipeline.patch
>
>
> I noticed that mochiweb always closes the connection when doing a multipart/related PUT (to insert the JSON document and accompanying attachments in one call). Ultimately it's because we call recv(0) and not recv_body, thus consuming more data than we actually process. Mochiweb notices that there is unread data on the socket and closes the connection.
> This impacts replication with attachments, as I believe they go through this code path (and, thus, are forever reconnecting).
> The code below demonstrates a fix for this issue but isn't good enough for trunk. Adam provided the important process dictionary fix.
> ---
>  src/couchdb/couch_doc.erl      |    1 +
>  src/couchdb/couch_httpd_db.erl |   13 +++++++++----
>  2 files changed, 10 insertions(+), 4 deletions(-)
> diff --git a/src/couchdb/couch_doc.erl b/src/couchdb/couch_doc.erl
> index 5009f8f..f8c874b 100644
> --- a/src/couchdb/couch_doc.erl
> +++ b/src/couchdb/couch_doc.erl
> @@ -455,6 +455,7 @@ doc_from_multi_part_stream(ContentType, DataFun) ->
>      Parser ! {get_doc_bytes, self()},
>      receive 
>      {doc_bytes, DocBytes} ->
> +        erlang:put(mochiweb_request_recv, true),
>          Doc = from_json_obj(?JSON_DECODE(DocBytes)),
>          % go through the attachments looking for 'follows' in the data,
>          % replace with function that reads the data from MIME stream.
> diff --git a/src/couchdb/couch_httpd_db.erl b/src/couchdb/couch_httpd_db.erl
> index b0fbe8d..eff7d67 100644
> --- a/src/couchdb/couch_httpd_db.erl
> +++ b/src/couchdb/couch_httpd_db.erl
> @@ -651,12 +651,13 @@ db_doc_req(#httpd{method='PUT'}=Req, Db, DocId) ->
>      } = parse_doc_query(Req),
>      couch_doc:validate_docid(DocId),
>      
> +    Len = couch_httpd:header_value(Req,"Content-Length"),
>      Loc = absolute_uri(Req, "/" ++ ?b2l(Db#db.name) ++ "/" ++ ?b2l(DocId)),
>      RespHeaders = [{"Location", Loc}],
>      case couch_util:to_list(couch_httpd:header_value(Req, "Content-Type")) of
>      ("multipart/related;" ++ _) = ContentType ->
>          {ok, Doc0} = couch_doc:doc_from_multi_part_stream(ContentType,
> -                fun() -> receive_request_data(Req) end),
> +                fun() -> receive_request_data(Req, Len) end),
>          Doc = couch_doc_from_req(Req, DocId, Doc0),
>          update_doc(Req, Db, DocId, Doc, RespHeaders, UpdateType);
>      _Else ->
> @@ -775,9 +776,13 @@ send_docs_multipart(Req, Results, Options) ->
>      couch_httpd:send_chunk(Resp, <<"--">>),
>      couch_httpd:last_chunk(Resp).
>  
> -receive_request_data(Req) ->
> -    {couch_httpd:recv(Req, 0), fun() -> receive_request_data(Req) end}.
> -    
> +receive_request_data(Req, undefined) ->
> +    receive_request_data(Req, 0);
> +receive_request_data(Req, Len) when is_list(Len)->
> +    Remaining = list_to_integer(Len),
> +    Bin = couch_httpd:recv(Req, Remaining),
> +    {Bin, fun() -> receive_request_data(Req, Remaining - iolist_size(Bin)) end}.
> +
>  update_doc_result_to_json({{Id, Rev}, Error}) ->
>          {_Code, Err, Msg} = couch_httpd:error_info(Error),
>          {[{id, Id}, {rev, couch_doc:rev_to_str(Rev)},
> -- 
> 1.7.2.2
> Umbra

-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.


[jira] Commented: (COUCHDB-864) multipart/related PUT's always close the connection.

Posted by "Damien Katz (JIRA)" <ji...@apache.org>.
    [ https://issues.apache.org/jira/browse/COUCHDB-864?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12903044#action_12903044 ] 

Damien Katz commented on COUCHDB-864:
-------------------------------------

Last patch looks good. It's got my ok to check in to trunk and 1.0.x.

> multipart/related PUT's always close the connection.
> ----------------------------------------------------
>
>                 Key: COUCHDB-864
>                 URL: https://issues.apache.org/jira/browse/COUCHDB-864
>             Project: CouchDB
>          Issue Type: Bug
>          Components: Database Core
>            Reporter: Robert Newson
>         Attachments: chunked.erl, mp_doc_put_http_pipeline.patch, mp_pipeline.patch
>
>
> I noticed that mochiweb always closes the connection when doing a multipart/related PUT (to insert the JSON document and accompanying attachments in one call). Ultimately it's because we call recv(0) and not recv_body, thus consuming more data than we actually process. Mochiweb notices that there is unread data on the socket and closes the connection.
> This impacts replication with attachments, as I believe they go through this code path (and, thus, are forever reconnecting).
> The code below demonstrates a fix for this issue but isn't good enough for trunk. Adam provided the important process dictionary fix.
> ---
>  src/couchdb/couch_doc.erl      |    1 +
>  src/couchdb/couch_httpd_db.erl |   13 +++++++++----
>  2 files changed, 10 insertions(+), 4 deletions(-)
> diff --git a/src/couchdb/couch_doc.erl b/src/couchdb/couch_doc.erl
> index 5009f8f..f8c874b 100644
> --- a/src/couchdb/couch_doc.erl
> +++ b/src/couchdb/couch_doc.erl
> @@ -455,6 +455,7 @@ doc_from_multi_part_stream(ContentType, DataFun) ->
>      Parser ! {get_doc_bytes, self()},
>      receive 
>      {doc_bytes, DocBytes} ->
> +        erlang:put(mochiweb_request_recv, true),
>          Doc = from_json_obj(?JSON_DECODE(DocBytes)),
>          % go through the attachments looking for 'follows' in the data,
>          % replace with function that reads the data from MIME stream.
> diff --git a/src/couchdb/couch_httpd_db.erl b/src/couchdb/couch_httpd_db.erl
> index b0fbe8d..eff7d67 100644
> --- a/src/couchdb/couch_httpd_db.erl
> +++ b/src/couchdb/couch_httpd_db.erl
> @@ -651,12 +651,13 @@ db_doc_req(#httpd{method='PUT'}=Req, Db, DocId) ->
>      } = parse_doc_query(Req),
>      couch_doc:validate_docid(DocId),
>      
> +    Len = couch_httpd:header_value(Req,"Content-Length"),
>      Loc = absolute_uri(Req, "/" ++ ?b2l(Db#db.name) ++ "/" ++ ?b2l(DocId)),
>      RespHeaders = [{"Location", Loc}],
>      case couch_util:to_list(couch_httpd:header_value(Req, "Content-Type")) of
>      ("multipart/related;" ++ _) = ContentType ->
>          {ok, Doc0} = couch_doc:doc_from_multi_part_stream(ContentType,
> -                fun() -> receive_request_data(Req) end),
> +                fun() -> receive_request_data(Req, Len) end),
>          Doc = couch_doc_from_req(Req, DocId, Doc0),
>          update_doc(Req, Db, DocId, Doc, RespHeaders, UpdateType);
>      _Else ->
> @@ -775,9 +776,13 @@ send_docs_multipart(Req, Results, Options) ->
>      couch_httpd:send_chunk(Resp, <<"--">>),
>      couch_httpd:last_chunk(Resp).
>  
> -receive_request_data(Req) ->
> -    {couch_httpd:recv(Req, 0), fun() -> receive_request_data(Req) end}.
> -    
> +receive_request_data(Req, undefined) ->
> +    receive_request_data(Req, 0);
> +receive_request_data(Req, Len) when is_list(Len)->
> +    Remaining = list_to_integer(Len),
> +    Bin = couch_httpd:recv(Req, Remaining),
> +    {Bin, fun() -> receive_request_data(Req, Remaining - iolist_size(Bin)) end}.
> +
>  update_doc_result_to_json({{Id, Rev}, Error}) ->
>          {_Code, Err, Msg} = couch_httpd:error_info(Error),
>          {[{id, Id}, {rev, couch_doc:rev_to_str(Rev)},
> -- 
> 1.7.2.2
> Umbra

-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.


[jira] Commented: (COUCHDB-864) multipart/related PUT's always close the connection.

Posted by "Robert Newson (JIRA)" <ji...@apache.org>.
    [ https://issues.apache.org/jira/browse/COUCHDB-864?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12901082#action_12901082 ] 

Robert Newson commented on COUCHDB-864:
---------------------------------------

It's good but it's not enough. If all multipart/related PUT requests include a Content-Length header then it does work.But it fails badly for chunked encoding (messages are queued up and it blows up collect_results).



> multipart/related PUT's always close the connection.
> ----------------------------------------------------
>
>                 Key: COUCHDB-864
>                 URL: https://issues.apache.org/jira/browse/COUCHDB-864
>             Project: CouchDB
>          Issue Type: Bug
>          Components: Database Core
>            Reporter: Robert Newson
>
> I noticed that mochiweb always closes the connection when doing a multipart/related PUT (to insert the JSON document and accompanying attachments in one call). Ultimately it's because we call recv(0) and not recv_body, thus consuming more data than we actually process. Mochiweb notices that there is unread data on the socket and closes the connection.
> This impacts replication with attachments, as I believe they go through this code path (and, thus, are forever reconnecting).
> The code below demonstrates a fix for this issue but isn't good enough for trunk. Adam provided the important process dictionary fix.
> ---
>  src/couchdb/couch_doc.erl      |    1 +
>  src/couchdb/couch_httpd_db.erl |   13 +++++++++----
>  2 files changed, 10 insertions(+), 4 deletions(-)
> diff --git a/src/couchdb/couch_doc.erl b/src/couchdb/couch_doc.erl
> index 5009f8f..f8c874b 100644
> --- a/src/couchdb/couch_doc.erl
> +++ b/src/couchdb/couch_doc.erl
> @@ -455,6 +455,7 @@ doc_from_multi_part_stream(ContentType, DataFun) ->
>      Parser ! {get_doc_bytes, self()},
>      receive 
>      {doc_bytes, DocBytes} ->
> +        erlang:put(mochiweb_request_recv, true),
>          Doc = from_json_obj(?JSON_DECODE(DocBytes)),
>          % go through the attachments looking for 'follows' in the data,
>          % replace with function that reads the data from MIME stream.
> diff --git a/src/couchdb/couch_httpd_db.erl b/src/couchdb/couch_httpd_db.erl
> index b0fbe8d..eff7d67 100644
> --- a/src/couchdb/couch_httpd_db.erl
> +++ b/src/couchdb/couch_httpd_db.erl
> @@ -651,12 +651,13 @@ db_doc_req(#httpd{method='PUT'}=Req, Db, DocId) ->
>      } = parse_doc_query(Req),
>      couch_doc:validate_docid(DocId),
>      
> +    Len = couch_httpd:header_value(Req,"Content-Length"),
>      Loc = absolute_uri(Req, "/" ++ ?b2l(Db#db.name) ++ "/" ++ ?b2l(DocId)),
>      RespHeaders = [{"Location", Loc}],
>      case couch_util:to_list(couch_httpd:header_value(Req, "Content-Type")) of
>      ("multipart/related;" ++ _) = ContentType ->
>          {ok, Doc0} = couch_doc:doc_from_multi_part_stream(ContentType,
> -                fun() -> receive_request_data(Req) end),
> +                fun() -> receive_request_data(Req, Len) end),
>          Doc = couch_doc_from_req(Req, DocId, Doc0),
>          update_doc(Req, Db, DocId, Doc, RespHeaders, UpdateType);
>      _Else ->
> @@ -775,9 +776,13 @@ send_docs_multipart(Req, Results, Options) ->
>      couch_httpd:send_chunk(Resp, <<"--">>),
>      couch_httpd:last_chunk(Resp).
>  
> -receive_request_data(Req) ->
> -    {couch_httpd:recv(Req, 0), fun() -> receive_request_data(Req) end}.
> -    
> +receive_request_data(Req, undefined) ->
> +    receive_request_data(Req, 0);
> +receive_request_data(Req, Len) when is_list(Len)->
> +    Remaining = list_to_integer(Len),
> +    Bin = couch_httpd:recv(Req, Remaining),
> +    {Bin, fun() -> receive_request_data(Req, Remaining - iolist_size(Bin)) end}.
> +
>  update_doc_result_to_json({{Id, Rev}, Error}) ->
>          {_Code, Err, Msg} = couch_httpd:error_info(Error),
>          {[{id, Id}, {rev, couch_doc:rev_to_str(Rev)},
> -- 
> 1.7.2.2
> Umbra

-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.


[jira] Commented: (COUCHDB-864) multipart/related PUT's always close the connection.

Posted by "Filipe Manana (JIRA)" <ji...@apache.org>.
    [ https://issues.apache.org/jira/browse/COUCHDB-864?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12901151#action_12901151 ] 

Filipe Manana commented on COUCHDB-864:
---------------------------------------

That test I did was without the line that could modify should_close/0 behaviour (the put into the process dictionary).

What I meant to demonstrate is that that line doesn't actually change the behaviour, should_close/0 returns false on both cases (with and without the patch).


> multipart/related PUT's always close the connection.
> ----------------------------------------------------
>
>                 Key: COUCHDB-864
>                 URL: https://issues.apache.org/jira/browse/COUCHDB-864
>             Project: CouchDB
>          Issue Type: Bug
>          Components: Database Core
>            Reporter: Robert Newson
>         Attachments: chunked.erl
>
>
> I noticed that mochiweb always closes the connection when doing a multipart/related PUT (to insert the JSON document and accompanying attachments in one call). Ultimately it's because we call recv(0) and not recv_body, thus consuming more data than we actually process. Mochiweb notices that there is unread data on the socket and closes the connection.
> This impacts replication with attachments, as I believe they go through this code path (and, thus, are forever reconnecting).
> The code below demonstrates a fix for this issue but isn't good enough for trunk. Adam provided the important process dictionary fix.
> ---
>  src/couchdb/couch_doc.erl      |    1 +
>  src/couchdb/couch_httpd_db.erl |   13 +++++++++----
>  2 files changed, 10 insertions(+), 4 deletions(-)
> diff --git a/src/couchdb/couch_doc.erl b/src/couchdb/couch_doc.erl
> index 5009f8f..f8c874b 100644
> --- a/src/couchdb/couch_doc.erl
> +++ b/src/couchdb/couch_doc.erl
> @@ -455,6 +455,7 @@ doc_from_multi_part_stream(ContentType, DataFun) ->
>      Parser ! {get_doc_bytes, self()},
>      receive 
>      {doc_bytes, DocBytes} ->
> +        erlang:put(mochiweb_request_recv, true),
>          Doc = from_json_obj(?JSON_DECODE(DocBytes)),
>          % go through the attachments looking for 'follows' in the data,
>          % replace with function that reads the data from MIME stream.
> diff --git a/src/couchdb/couch_httpd_db.erl b/src/couchdb/couch_httpd_db.erl
> index b0fbe8d..eff7d67 100644
> --- a/src/couchdb/couch_httpd_db.erl
> +++ b/src/couchdb/couch_httpd_db.erl
> @@ -651,12 +651,13 @@ db_doc_req(#httpd{method='PUT'}=Req, Db, DocId) ->
>      } = parse_doc_query(Req),
>      couch_doc:validate_docid(DocId),
>      
> +    Len = couch_httpd:header_value(Req,"Content-Length"),
>      Loc = absolute_uri(Req, "/" ++ ?b2l(Db#db.name) ++ "/" ++ ?b2l(DocId)),
>      RespHeaders = [{"Location", Loc}],
>      case couch_util:to_list(couch_httpd:header_value(Req, "Content-Type")) of
>      ("multipart/related;" ++ _) = ContentType ->
>          {ok, Doc0} = couch_doc:doc_from_multi_part_stream(ContentType,
> -                fun() -> receive_request_data(Req) end),
> +                fun() -> receive_request_data(Req, Len) end),
>          Doc = couch_doc_from_req(Req, DocId, Doc0),
>          update_doc(Req, Db, DocId, Doc, RespHeaders, UpdateType);
>      _Else ->
> @@ -775,9 +776,13 @@ send_docs_multipart(Req, Results, Options) ->
>      couch_httpd:send_chunk(Resp, <<"--">>),
>      couch_httpd:last_chunk(Resp).
>  
> -receive_request_data(Req) ->
> -    {couch_httpd:recv(Req, 0), fun() -> receive_request_data(Req) end}.
> -    
> +receive_request_data(Req, undefined) ->
> +    receive_request_data(Req, 0);
> +receive_request_data(Req, Len) when is_list(Len)->
> +    Remaining = list_to_integer(Len),
> +    Bin = couch_httpd:recv(Req, Remaining),
> +    {Bin, fun() -> receive_request_data(Req, Remaining - iolist_size(Bin)) end}.
> +
>  update_doc_result_to_json({{Id, Rev}, Error}) ->
>          {_Code, Err, Msg} = couch_httpd:error_info(Error),
>          {[{id, Id}, {rev, couch_doc:rev_to_str(Rev)},
> -- 
> 1.7.2.2
> Umbra

-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.


[jira] Updated: (COUCHDB-864) multipart/related PUT's always close the connection.

Posted by "Paul Joseph Davis (JIRA)" <ji...@apache.org>.
     [ https://issues.apache.org/jira/browse/COUCHDB-864?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Paul Joseph Davis updated COUCHDB-864:
--------------------------------------

    Skill Level: Regular Contributors Level (Easy to Medium)

> multipart/related PUT's always close the connection.
> ----------------------------------------------------
>
>                 Key: COUCHDB-864
>                 URL: https://issues.apache.org/jira/browse/COUCHDB-864
>             Project: CouchDB
>          Issue Type: Bug
>          Components: Database Core
>            Reporter: Robert Newson
>         Attachments: chunked.erl, mp_doc_put_http_pipeline.patch, mp_pipeline.patch
>
>
> I noticed that mochiweb always closes the connection when doing a multipart/related PUT (to insert the JSON document and accompanying attachments in one call). Ultimately it's because we call recv(0) and not recv_body, thus consuming more data than we actually process. Mochiweb notices that there is unread data on the socket and closes the connection.
> This impacts replication with attachments, as I believe they go through this code path (and, thus, are forever reconnecting).
> The code below demonstrates a fix for this issue but isn't good enough for trunk. Adam provided the important process dictionary fix.
> ---
>  src/couchdb/couch_doc.erl      |    1 +
>  src/couchdb/couch_httpd_db.erl |   13 +++++++++----
>  2 files changed, 10 insertions(+), 4 deletions(-)
> diff --git a/src/couchdb/couch_doc.erl b/src/couchdb/couch_doc.erl
> index 5009f8f..f8c874b 100644
> --- a/src/couchdb/couch_doc.erl
> +++ b/src/couchdb/couch_doc.erl
> @@ -455,6 +455,7 @@ doc_from_multi_part_stream(ContentType, DataFun) ->
>      Parser ! {get_doc_bytes, self()},
>      receive 
>      {doc_bytes, DocBytes} ->
> +        erlang:put(mochiweb_request_recv, true),
>          Doc = from_json_obj(?JSON_DECODE(DocBytes)),
>          % go through the attachments looking for 'follows' in the data,
>          % replace with function that reads the data from MIME stream.
> diff --git a/src/couchdb/couch_httpd_db.erl b/src/couchdb/couch_httpd_db.erl
> index b0fbe8d..eff7d67 100644
> --- a/src/couchdb/couch_httpd_db.erl
> +++ b/src/couchdb/couch_httpd_db.erl
> @@ -651,12 +651,13 @@ db_doc_req(#httpd{method='PUT'}=Req, Db, DocId) ->
>      } = parse_doc_query(Req),
>      couch_doc:validate_docid(DocId),
>      
> +    Len = couch_httpd:header_value(Req,"Content-Length"),
>      Loc = absolute_uri(Req, "/" ++ ?b2l(Db#db.name) ++ "/" ++ ?b2l(DocId)),
>      RespHeaders = [{"Location", Loc}],
>      case couch_util:to_list(couch_httpd:header_value(Req, "Content-Type")) of
>      ("multipart/related;" ++ _) = ContentType ->
>          {ok, Doc0} = couch_doc:doc_from_multi_part_stream(ContentType,
> -                fun() -> receive_request_data(Req) end),
> +                fun() -> receive_request_data(Req, Len) end),
>          Doc = couch_doc_from_req(Req, DocId, Doc0),
>          update_doc(Req, Db, DocId, Doc, RespHeaders, UpdateType);
>      _Else ->
> @@ -775,9 +776,13 @@ send_docs_multipart(Req, Results, Options) ->
>      couch_httpd:send_chunk(Resp, <<"--">>),
>      couch_httpd:last_chunk(Resp).
>  
> -receive_request_data(Req) ->
> -    {couch_httpd:recv(Req, 0), fun() -> receive_request_data(Req) end}.
> -    
> +receive_request_data(Req, undefined) ->
> +    receive_request_data(Req, 0);
> +receive_request_data(Req, Len) when is_list(Len)->
> +    Remaining = list_to_integer(Len),
> +    Bin = couch_httpd:recv(Req, Remaining),
> +    {Bin, fun() -> receive_request_data(Req, Remaining - iolist_size(Bin)) end}.
> +
>  update_doc_result_to_json({{Id, Rev}, Error}) ->
>          {_Code, Err, Msg} = couch_httpd:error_info(Error),
>          {[{id, Id}, {rev, couch_doc:rev_to_str(Rev)},
> -- 
> 1.7.2.2
> Umbra

-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.


[jira] Updated: (COUCHDB-864) multipart/related PUT's always close the connection.

Posted by "Filipe Manana (JIRA)" <ji...@apache.org>.
     [ https://issues.apache.org/jira/browse/COUCHDB-864?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Filipe Manana updated COUCHDB-864:
----------------------------------

    Attachment: mp_doc_put_http_pipeline.patch

So, a tested patch that assure the PUT multipart/related doc API doesn't read more data than it needs (to avoid consuming data from a subsequent request in an HTTP pipeline) and to allow HTTP request with chunked transfer-encoding as well.
This HTTP pipeline issue is important for the replicator, since it uses this API very frequently.

Etap test included.

If no one has anything against this or the patch itself, I'll commit it in a few days.

> multipart/related PUT's always close the connection.
> ----------------------------------------------------
>
>                 Key: COUCHDB-864
>                 URL: https://issues.apache.org/jira/browse/COUCHDB-864
>             Project: CouchDB
>          Issue Type: Bug
>          Components: Database Core
>            Reporter: Robert Newson
>         Attachments: chunked.erl, mp_doc_put_http_pipeline.patch, mp_pipeline.patch
>
>
> I noticed that mochiweb always closes the connection when doing a multipart/related PUT (to insert the JSON document and accompanying attachments in one call). Ultimately it's because we call recv(0) and not recv_body, thus consuming more data than we actually process. Mochiweb notices that there is unread data on the socket and closes the connection.
> This impacts replication with attachments, as I believe they go through this code path (and, thus, are forever reconnecting).
> The code below demonstrates a fix for this issue but isn't good enough for trunk. Adam provided the important process dictionary fix.
> ---
>  src/couchdb/couch_doc.erl      |    1 +
>  src/couchdb/couch_httpd_db.erl |   13 +++++++++----
>  2 files changed, 10 insertions(+), 4 deletions(-)
> diff --git a/src/couchdb/couch_doc.erl b/src/couchdb/couch_doc.erl
> index 5009f8f..f8c874b 100644
> --- a/src/couchdb/couch_doc.erl
> +++ b/src/couchdb/couch_doc.erl
> @@ -455,6 +455,7 @@ doc_from_multi_part_stream(ContentType, DataFun) ->
>      Parser ! {get_doc_bytes, self()},
>      receive 
>      {doc_bytes, DocBytes} ->
> +        erlang:put(mochiweb_request_recv, true),
>          Doc = from_json_obj(?JSON_DECODE(DocBytes)),
>          % go through the attachments looking for 'follows' in the data,
>          % replace with function that reads the data from MIME stream.
> diff --git a/src/couchdb/couch_httpd_db.erl b/src/couchdb/couch_httpd_db.erl
> index b0fbe8d..eff7d67 100644
> --- a/src/couchdb/couch_httpd_db.erl
> +++ b/src/couchdb/couch_httpd_db.erl
> @@ -651,12 +651,13 @@ db_doc_req(#httpd{method='PUT'}=Req, Db, DocId) ->
>      } = parse_doc_query(Req),
>      couch_doc:validate_docid(DocId),
>      
> +    Len = couch_httpd:header_value(Req,"Content-Length"),
>      Loc = absolute_uri(Req, "/" ++ ?b2l(Db#db.name) ++ "/" ++ ?b2l(DocId)),
>      RespHeaders = [{"Location", Loc}],
>      case couch_util:to_list(couch_httpd:header_value(Req, "Content-Type")) of
>      ("multipart/related;" ++ _) = ContentType ->
>          {ok, Doc0} = couch_doc:doc_from_multi_part_stream(ContentType,
> -                fun() -> receive_request_data(Req) end),
> +                fun() -> receive_request_data(Req, Len) end),
>          Doc = couch_doc_from_req(Req, DocId, Doc0),
>          update_doc(Req, Db, DocId, Doc, RespHeaders, UpdateType);
>      _Else ->
> @@ -775,9 +776,13 @@ send_docs_multipart(Req, Results, Options) ->
>      couch_httpd:send_chunk(Resp, <<"--">>),
>      couch_httpd:last_chunk(Resp).
>  
> -receive_request_data(Req) ->
> -    {couch_httpd:recv(Req, 0), fun() -> receive_request_data(Req) end}.
> -    
> +receive_request_data(Req, undefined) ->
> +    receive_request_data(Req, 0);
> +receive_request_data(Req, Len) when is_list(Len)->
> +    Remaining = list_to_integer(Len),
> +    Bin = couch_httpd:recv(Req, Remaining),
> +    {Bin, fun() -> receive_request_data(Req, Remaining - iolist_size(Bin)) end}.
> +
>  update_doc_result_to_json({{Id, Rev}, Error}) ->
>          {_Code, Err, Msg} = couch_httpd:error_info(Error),
>          {[{id, Id}, {rev, couch_doc:rev_to_str(Rev)},
> -- 
> 1.7.2.2
> Umbra

-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.


[jira] Commented: (COUCHDB-864) multipart/related PUT's always close the connection.

Posted by "Adam Kocoloski (JIRA)" <ji...@apache.org>.
    [ https://issues.apache.org/jira/browse/COUCHDB-864?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12900787#action_12900787 ] 

Adam Kocoloski commented on COUCHDB-864:
----------------------------------------

Repeating a bit more of the IRC discussion, the process dictionary hack is needed because we spawn another process to actually receive the data off the socket.  mochi sets the mochiweb_request_recv flag in that process, but then checks for the presence of the flag in the connection-handling process when deciding whether to close.

I think the main problem with the patch as it exists is the use of couch_httpd:recv(Req, Remaining).  This will try to receive all remaining bytes off the socket in one go.  We should instead use recv(Req, 0), check if we read too many bytes, and then gen_tcp:unrecv the remainder.  The pattern for how to do this can be found in mochiweb_request:stream_unchunked_body()

Another minor issue is that if the content-length header is missing we potentially read too many bytes off the socket.  In that case the connection probably should be closed after the request.

> multipart/related PUT's always close the connection.
> ----------------------------------------------------
>
>                 Key: COUCHDB-864
>                 URL: https://issues.apache.org/jira/browse/COUCHDB-864
>             Project: CouchDB
>          Issue Type: Bug
>          Components: Database Core
>            Reporter: Robert Newson
>
> I noticed that mochiweb always closes the connection when doing a multipart/related PUT (to insert the JSON document and accompanying attachments in one call). Ultimately it's because we call recv(0) and not recv_body, thus consuming more data than we actually process. Mochiweb notices that there is unread data on the socket and closes the connection.
> This impacts replication with attachments, as I believe they go through this code path (and, thus, are forever reconnecting).
> The code below demonstrates a fix for this issue but isn't good enough for trunk. Adam provided the important process dictionary fix.
> ---
>  src/couchdb/couch_doc.erl      |    1 +
>  src/couchdb/couch_httpd_db.erl |   13 +++++++++----
>  2 files changed, 10 insertions(+), 4 deletions(-)
> diff --git a/src/couchdb/couch_doc.erl b/src/couchdb/couch_doc.erl
> index 5009f8f..f8c874b 100644
> --- a/src/couchdb/couch_doc.erl
> +++ b/src/couchdb/couch_doc.erl
> @@ -455,6 +455,7 @@ doc_from_multi_part_stream(ContentType, DataFun) ->
>      Parser ! {get_doc_bytes, self()},
>      receive 
>      {doc_bytes, DocBytes} ->
> +        erlang:put(mochiweb_request_recv, true),
>          Doc = from_json_obj(?JSON_DECODE(DocBytes)),
>          % go through the attachments looking for 'follows' in the data,
>          % replace with function that reads the data from MIME stream.
> diff --git a/src/couchdb/couch_httpd_db.erl b/src/couchdb/couch_httpd_db.erl
> index b0fbe8d..eff7d67 100644
> --- a/src/couchdb/couch_httpd_db.erl
> +++ b/src/couchdb/couch_httpd_db.erl
> @@ -651,12 +651,13 @@ db_doc_req(#httpd{method='PUT'}=Req, Db, DocId) ->
>      } = parse_doc_query(Req),
>      couch_doc:validate_docid(DocId),
>      
> +    Len = couch_httpd:header_value(Req,"Content-Length"),
>      Loc = absolute_uri(Req, "/" ++ ?b2l(Db#db.name) ++ "/" ++ ?b2l(DocId)),
>      RespHeaders = [{"Location", Loc}],
>      case couch_util:to_list(couch_httpd:header_value(Req, "Content-Type")) of
>      ("multipart/related;" ++ _) = ContentType ->
>          {ok, Doc0} = couch_doc:doc_from_multi_part_stream(ContentType,
> -                fun() -> receive_request_data(Req) end),
> +                fun() -> receive_request_data(Req, Len) end),
>          Doc = couch_doc_from_req(Req, DocId, Doc0),
>          update_doc(Req, Db, DocId, Doc, RespHeaders, UpdateType);
>      _Else ->
> @@ -775,9 +776,13 @@ send_docs_multipart(Req, Results, Options) ->
>      couch_httpd:send_chunk(Resp, <<"--">>),
>      couch_httpd:last_chunk(Resp).
>  
> -receive_request_data(Req) ->
> -    {couch_httpd:recv(Req, 0), fun() -> receive_request_data(Req) end}.
> -    
> +receive_request_data(Req, undefined) ->
> +    receive_request_data(Req, 0);
> +receive_request_data(Req, Len) when is_list(Len)->
> +    Remaining = list_to_integer(Len),
> +    Bin = couch_httpd:recv(Req, Remaining),
> +    {Bin, fun() -> receive_request_data(Req, Remaining - iolist_size(Bin)) end}.
> +
>  update_doc_result_to_json({{Id, Rev}, Error}) ->
>          {_Code, Err, Msg} = couch_httpd:error_info(Error),
>          {[{id, Id}, {rev, couch_doc:rev_to_str(Rev)},
> -- 
> 1.7.2.2
> Umbra

-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.


[jira] Commented: (COUCHDB-864) multipart/related PUT's always close the connection.

Posted by "Robert Newson (JIRA)" <ji...@apache.org>.
    [ https://issues.apache.org/jira/browse/COUCHDB-864?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12900809#action_12900809 ] 

Robert Newson commented on COUCHDB-864:
---------------------------------------

JIRA is probably not the easiest way to convey the gist of the discussion Adam and I had today.

The first thing to clarify is that there's no bug as such, just a (significant) inefficiency. Mochiweb closes the request and the http client works correctly when that happens.

I discovered the problem as I'm doing performance testing. I'm using nodeload (a node.js-based http benchmarking kit). I tell it to keep the connections alive. This works fine in my original test where I upload standalone attachments. When I switched to uploading multipart/related, so that I can set json metadata as well, it blew up with 'connection reset by peer'. Assuming, as one does, that it was a node.js bug, I tried the same upload in other ways and they all behaved the same way.

CouchDB should be able to keep the connections alive for multiple requests in most cases. It does so, as far as I can tell, in every case exception multipart/related PUT's. Having read the related code, and discussed it with Adam on IRC, it's clear why.

I'll post a further patch later on when I've had more time to tidy and test.

> multipart/related PUT's always close the connection.
> ----------------------------------------------------
>
>                 Key: COUCHDB-864
>                 URL: https://issues.apache.org/jira/browse/COUCHDB-864
>             Project: CouchDB
>          Issue Type: Bug
>          Components: Database Core
>            Reporter: Robert Newson
>
> I noticed that mochiweb always closes the connection when doing a multipart/related PUT (to insert the JSON document and accompanying attachments in one call). Ultimately it's because we call recv(0) and not recv_body, thus consuming more data than we actually process. Mochiweb notices that there is unread data on the socket and closes the connection.
> This impacts replication with attachments, as I believe they go through this code path (and, thus, are forever reconnecting).
> The code below demonstrates a fix for this issue but isn't good enough for trunk. Adam provided the important process dictionary fix.
> ---
>  src/couchdb/couch_doc.erl      |    1 +
>  src/couchdb/couch_httpd_db.erl |   13 +++++++++----
>  2 files changed, 10 insertions(+), 4 deletions(-)
> diff --git a/src/couchdb/couch_doc.erl b/src/couchdb/couch_doc.erl
> index 5009f8f..f8c874b 100644
> --- a/src/couchdb/couch_doc.erl
> +++ b/src/couchdb/couch_doc.erl
> @@ -455,6 +455,7 @@ doc_from_multi_part_stream(ContentType, DataFun) ->
>      Parser ! {get_doc_bytes, self()},
>      receive 
>      {doc_bytes, DocBytes} ->
> +        erlang:put(mochiweb_request_recv, true),
>          Doc = from_json_obj(?JSON_DECODE(DocBytes)),
>          % go through the attachments looking for 'follows' in the data,
>          % replace with function that reads the data from MIME stream.
> diff --git a/src/couchdb/couch_httpd_db.erl b/src/couchdb/couch_httpd_db.erl
> index b0fbe8d..eff7d67 100644
> --- a/src/couchdb/couch_httpd_db.erl
> +++ b/src/couchdb/couch_httpd_db.erl
> @@ -651,12 +651,13 @@ db_doc_req(#httpd{method='PUT'}=Req, Db, DocId) ->
>      } = parse_doc_query(Req),
>      couch_doc:validate_docid(DocId),
>      
> +    Len = couch_httpd:header_value(Req,"Content-Length"),
>      Loc = absolute_uri(Req, "/" ++ ?b2l(Db#db.name) ++ "/" ++ ?b2l(DocId)),
>      RespHeaders = [{"Location", Loc}],
>      case couch_util:to_list(couch_httpd:header_value(Req, "Content-Type")) of
>      ("multipart/related;" ++ _) = ContentType ->
>          {ok, Doc0} = couch_doc:doc_from_multi_part_stream(ContentType,
> -                fun() -> receive_request_data(Req) end),
> +                fun() -> receive_request_data(Req, Len) end),
>          Doc = couch_doc_from_req(Req, DocId, Doc0),
>          update_doc(Req, Db, DocId, Doc, RespHeaders, UpdateType);
>      _Else ->
> @@ -775,9 +776,13 @@ send_docs_multipart(Req, Results, Options) ->
>      couch_httpd:send_chunk(Resp, <<"--">>),
>      couch_httpd:last_chunk(Resp).
>  
> -receive_request_data(Req) ->
> -    {couch_httpd:recv(Req, 0), fun() -> receive_request_data(Req) end}.
> -    
> +receive_request_data(Req, undefined) ->
> +    receive_request_data(Req, 0);
> +receive_request_data(Req, Len) when is_list(Len)->
> +    Remaining = list_to_integer(Len),
> +    Bin = couch_httpd:recv(Req, Remaining),
> +    {Bin, fun() -> receive_request_data(Req, Remaining - iolist_size(Bin)) end}.
> +
>  update_doc_result_to_json({{Id, Rev}, Error}) ->
>          {_Code, Err, Msg} = couch_httpd:error_info(Error),
>          {[{id, Id}, {rev, couch_doc:rev_to_str(Rev)},
> -- 
> 1.7.2.2
> Umbra

-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.


[jira] Commented: (COUCHDB-864) multipart/related PUT's always close the connection.

Posted by "Filipe Manana (JIRA)" <ji...@apache.org>.
    [ https://issues.apache.org/jira/browse/COUCHDB-864?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12900797#action_12900797 ] 

Filipe Manana commented on COUCHDB-864:
---------------------------------------

The multipart parser in couch_httpd buffers the data it receives in excess after each event. When in need of more data it checks first the buffer and only if it doesn't have all the necessary data it calls the DataFun.

Robert, when you say "This impacts replication with attachments, as I believe they go through this code path (and, thus, are forever reconnecting). "

Can you provide some logs? For which branches does this happen?

Is it when just the target is a remote DB or when both the source and target are remote DBs?

> multipart/related PUT's always close the connection.
> ----------------------------------------------------
>
>                 Key: COUCHDB-864
>                 URL: https://issues.apache.org/jira/browse/COUCHDB-864
>             Project: CouchDB
>          Issue Type: Bug
>          Components: Database Core
>            Reporter: Robert Newson
>
> I noticed that mochiweb always closes the connection when doing a multipart/related PUT (to insert the JSON document and accompanying attachments in one call). Ultimately it's because we call recv(0) and not recv_body, thus consuming more data than we actually process. Mochiweb notices that there is unread data on the socket and closes the connection.
> This impacts replication with attachments, as I believe they go through this code path (and, thus, are forever reconnecting).
> The code below demonstrates a fix for this issue but isn't good enough for trunk. Adam provided the important process dictionary fix.
> ---
>  src/couchdb/couch_doc.erl      |    1 +
>  src/couchdb/couch_httpd_db.erl |   13 +++++++++----
>  2 files changed, 10 insertions(+), 4 deletions(-)
> diff --git a/src/couchdb/couch_doc.erl b/src/couchdb/couch_doc.erl
> index 5009f8f..f8c874b 100644
> --- a/src/couchdb/couch_doc.erl
> +++ b/src/couchdb/couch_doc.erl
> @@ -455,6 +455,7 @@ doc_from_multi_part_stream(ContentType, DataFun) ->
>      Parser ! {get_doc_bytes, self()},
>      receive 
>      {doc_bytes, DocBytes} ->
> +        erlang:put(mochiweb_request_recv, true),
>          Doc = from_json_obj(?JSON_DECODE(DocBytes)),
>          % go through the attachments looking for 'follows' in the data,
>          % replace with function that reads the data from MIME stream.
> diff --git a/src/couchdb/couch_httpd_db.erl b/src/couchdb/couch_httpd_db.erl
> index b0fbe8d..eff7d67 100644
> --- a/src/couchdb/couch_httpd_db.erl
> +++ b/src/couchdb/couch_httpd_db.erl
> @@ -651,12 +651,13 @@ db_doc_req(#httpd{method='PUT'}=Req, Db, DocId) ->
>      } = parse_doc_query(Req),
>      couch_doc:validate_docid(DocId),
>      
> +    Len = couch_httpd:header_value(Req,"Content-Length"),
>      Loc = absolute_uri(Req, "/" ++ ?b2l(Db#db.name) ++ "/" ++ ?b2l(DocId)),
>      RespHeaders = [{"Location", Loc}],
>      case couch_util:to_list(couch_httpd:header_value(Req, "Content-Type")) of
>      ("multipart/related;" ++ _) = ContentType ->
>          {ok, Doc0} = couch_doc:doc_from_multi_part_stream(ContentType,
> -                fun() -> receive_request_data(Req) end),
> +                fun() -> receive_request_data(Req, Len) end),
>          Doc = couch_doc_from_req(Req, DocId, Doc0),
>          update_doc(Req, Db, DocId, Doc, RespHeaders, UpdateType);
>      _Else ->
> @@ -775,9 +776,13 @@ send_docs_multipart(Req, Results, Options) ->
>      couch_httpd:send_chunk(Resp, <<"--">>),
>      couch_httpd:last_chunk(Resp).
>  
> -receive_request_data(Req) ->
> -    {couch_httpd:recv(Req, 0), fun() -> receive_request_data(Req) end}.
> -    
> +receive_request_data(Req, undefined) ->
> +    receive_request_data(Req, 0);
> +receive_request_data(Req, Len) when is_list(Len)->
> +    Remaining = list_to_integer(Len),
> +    Bin = couch_httpd:recv(Req, Remaining),
> +    {Bin, fun() -> receive_request_data(Req, Remaining - iolist_size(Bin)) end}.
> +
>  update_doc_result_to_json({{Id, Rev}, Error}) ->
>          {_Code, Err, Msg} = couch_httpd:error_info(Error),
>          {[{id, Id}, {rev, couch_doc:rev_to_str(Rev)},
> -- 
> 1.7.2.2
> Umbra

-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.


[jira] Commented: (COUCHDB-864) multipart/related PUT's always close the connection.

Posted by "Adam Kocoloski (JIRA)" <ji...@apache.org>.
    [ https://issues.apache.org/jira/browse/COUCHDB-864?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12900808#action_12900808 ] 

Adam Kocoloski commented on COUCHDB-864:
----------------------------------------

Hi Filipe, receiving excess data is an issue if the excess data belongs to the next request (e.g. a pipelined one) and is discarded.  That's one bug this ticket is attempting to solve.

The other bug (and really, a prerequisite for solving the pipelining bug) is that the connection is actually closed after every request.  You should be able to observe this by logging the result of should_close() in mochiweb_http:after_response()

> multipart/related PUT's always close the connection.
> ----------------------------------------------------
>
>                 Key: COUCHDB-864
>                 URL: https://issues.apache.org/jira/browse/COUCHDB-864
>             Project: CouchDB
>          Issue Type: Bug
>          Components: Database Core
>            Reporter: Robert Newson
>
> I noticed that mochiweb always closes the connection when doing a multipart/related PUT (to insert the JSON document and accompanying attachments in one call). Ultimately it's because we call recv(0) and not recv_body, thus consuming more data than we actually process. Mochiweb notices that there is unread data on the socket and closes the connection.
> This impacts replication with attachments, as I believe they go through this code path (and, thus, are forever reconnecting).
> The code below demonstrates a fix for this issue but isn't good enough for trunk. Adam provided the important process dictionary fix.
> ---
>  src/couchdb/couch_doc.erl      |    1 +
>  src/couchdb/couch_httpd_db.erl |   13 +++++++++----
>  2 files changed, 10 insertions(+), 4 deletions(-)
> diff --git a/src/couchdb/couch_doc.erl b/src/couchdb/couch_doc.erl
> index 5009f8f..f8c874b 100644
> --- a/src/couchdb/couch_doc.erl
> +++ b/src/couchdb/couch_doc.erl
> @@ -455,6 +455,7 @@ doc_from_multi_part_stream(ContentType, DataFun) ->
>      Parser ! {get_doc_bytes, self()},
>      receive 
>      {doc_bytes, DocBytes} ->
> +        erlang:put(mochiweb_request_recv, true),
>          Doc = from_json_obj(?JSON_DECODE(DocBytes)),
>          % go through the attachments looking for 'follows' in the data,
>          % replace with function that reads the data from MIME stream.
> diff --git a/src/couchdb/couch_httpd_db.erl b/src/couchdb/couch_httpd_db.erl
> index b0fbe8d..eff7d67 100644
> --- a/src/couchdb/couch_httpd_db.erl
> +++ b/src/couchdb/couch_httpd_db.erl
> @@ -651,12 +651,13 @@ db_doc_req(#httpd{method='PUT'}=Req, Db, DocId) ->
>      } = parse_doc_query(Req),
>      couch_doc:validate_docid(DocId),
>      
> +    Len = couch_httpd:header_value(Req,"Content-Length"),
>      Loc = absolute_uri(Req, "/" ++ ?b2l(Db#db.name) ++ "/" ++ ?b2l(DocId)),
>      RespHeaders = [{"Location", Loc}],
>      case couch_util:to_list(couch_httpd:header_value(Req, "Content-Type")) of
>      ("multipart/related;" ++ _) = ContentType ->
>          {ok, Doc0} = couch_doc:doc_from_multi_part_stream(ContentType,
> -                fun() -> receive_request_data(Req) end),
> +                fun() -> receive_request_data(Req, Len) end),
>          Doc = couch_doc_from_req(Req, DocId, Doc0),
>          update_doc(Req, Db, DocId, Doc, RespHeaders, UpdateType);
>      _Else ->
> @@ -775,9 +776,13 @@ send_docs_multipart(Req, Results, Options) ->
>      couch_httpd:send_chunk(Resp, <<"--">>),
>      couch_httpd:last_chunk(Resp).
>  
> -receive_request_data(Req) ->
> -    {couch_httpd:recv(Req, 0), fun() -> receive_request_data(Req) end}.
> -    
> +receive_request_data(Req, undefined) ->
> +    receive_request_data(Req, 0);
> +receive_request_data(Req, Len) when is_list(Len)->
> +    Remaining = list_to_integer(Len),
> +    Bin = couch_httpd:recv(Req, Remaining),
> +    {Bin, fun() -> receive_request_data(Req, Remaining - iolist_size(Bin)) end}.
> +
>  update_doc_result_to_json({{Id, Rev}, Error}) ->
>          {_Code, Err, Msg} = couch_httpd:error_info(Error),
>          {[{id, Id}, {rev, couch_doc:rev_to_str(Rev)},
> -- 
> 1.7.2.2
> Umbra

-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.


[jira] Commented: (COUCHDB-864) multipart/related PUT's always close the connection.

Posted by "Filipe Manana (JIRA)" <ji...@apache.org>.
    [ https://issues.apache.org/jira/browse/COUCHDB-864?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12900804#action_12900804 ] 

Filipe Manana commented on COUCHDB-864:
---------------------------------------

Yes it's used when the target DB is remote and when we're PUTing docs with attachments to it.

However I don't think that receiving data in excess when calling recv is an issue. I would like either to be able to reproduce your issues (the ideal case) or at least see some logs.

After a fix in a patch that actually Adam committed on my behalf (trunk and 1.0.x) I don't see how a PUT of a doc with attachments to a remote DB might still cause issues. It caused an issue before if the first try failed, and subsequent retries would always fail (glad this was never in a release)

> multipart/related PUT's always close the connection.
> ----------------------------------------------------
>
>                 Key: COUCHDB-864
>                 URL: https://issues.apache.org/jira/browse/COUCHDB-864
>             Project: CouchDB
>          Issue Type: Bug
>          Components: Database Core
>            Reporter: Robert Newson
>
> I noticed that mochiweb always closes the connection when doing a multipart/related PUT (to insert the JSON document and accompanying attachments in one call). Ultimately it's because we call recv(0) and not recv_body, thus consuming more data than we actually process. Mochiweb notices that there is unread data on the socket and closes the connection.
> This impacts replication with attachments, as I believe they go through this code path (and, thus, are forever reconnecting).
> The code below demonstrates a fix for this issue but isn't good enough for trunk. Adam provided the important process dictionary fix.
> ---
>  src/couchdb/couch_doc.erl      |    1 +
>  src/couchdb/couch_httpd_db.erl |   13 +++++++++----
>  2 files changed, 10 insertions(+), 4 deletions(-)
> diff --git a/src/couchdb/couch_doc.erl b/src/couchdb/couch_doc.erl
> index 5009f8f..f8c874b 100644
> --- a/src/couchdb/couch_doc.erl
> +++ b/src/couchdb/couch_doc.erl
> @@ -455,6 +455,7 @@ doc_from_multi_part_stream(ContentType, DataFun) ->
>      Parser ! {get_doc_bytes, self()},
>      receive 
>      {doc_bytes, DocBytes} ->
> +        erlang:put(mochiweb_request_recv, true),
>          Doc = from_json_obj(?JSON_DECODE(DocBytes)),
>          % go through the attachments looking for 'follows' in the data,
>          % replace with function that reads the data from MIME stream.
> diff --git a/src/couchdb/couch_httpd_db.erl b/src/couchdb/couch_httpd_db.erl
> index b0fbe8d..eff7d67 100644
> --- a/src/couchdb/couch_httpd_db.erl
> +++ b/src/couchdb/couch_httpd_db.erl
> @@ -651,12 +651,13 @@ db_doc_req(#httpd{method='PUT'}=Req, Db, DocId) ->
>      } = parse_doc_query(Req),
>      couch_doc:validate_docid(DocId),
>      
> +    Len = couch_httpd:header_value(Req,"Content-Length"),
>      Loc = absolute_uri(Req, "/" ++ ?b2l(Db#db.name) ++ "/" ++ ?b2l(DocId)),
>      RespHeaders = [{"Location", Loc}],
>      case couch_util:to_list(couch_httpd:header_value(Req, "Content-Type")) of
>      ("multipart/related;" ++ _) = ContentType ->
>          {ok, Doc0} = couch_doc:doc_from_multi_part_stream(ContentType,
> -                fun() -> receive_request_data(Req) end),
> +                fun() -> receive_request_data(Req, Len) end),
>          Doc = couch_doc_from_req(Req, DocId, Doc0),
>          update_doc(Req, Db, DocId, Doc, RespHeaders, UpdateType);
>      _Else ->
> @@ -775,9 +776,13 @@ send_docs_multipart(Req, Results, Options) ->
>      couch_httpd:send_chunk(Resp, <<"--">>),
>      couch_httpd:last_chunk(Resp).
>  
> -receive_request_data(Req) ->
> -    {couch_httpd:recv(Req, 0), fun() -> receive_request_data(Req) end}.
> -    
> +receive_request_data(Req, undefined) ->
> +    receive_request_data(Req, 0);
> +receive_request_data(Req, Len) when is_list(Len)->
> +    Remaining = list_to_integer(Len),
> +    Bin = couch_httpd:recv(Req, Remaining),
> +    {Bin, fun() -> receive_request_data(Req, Remaining - iolist_size(Bin)) end}.
> +
>  update_doc_result_to_json({{Id, Rev}, Error}) ->
>          {_Code, Err, Msg} = couch_httpd:error_info(Error),
>          {[{id, Id}, {rev, couch_doc:rev_to_str(Rev)},
> -- 
> 1.7.2.2
> Umbra

-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.


[jira] Updated: (COUCHDB-864) multipart/related PUT's always close the connection.

Posted by "Filipe Manana (JIRA)" <ji...@apache.org>.
     [ https://issues.apache.org/jira/browse/COUCHDB-864?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Filipe Manana updated COUCHDB-864:
----------------------------------

    Attachment: chunked.erl

The escript test that does a PUT of document with a multipart content-type.

> multipart/related PUT's always close the connection.
> ----------------------------------------------------
>
>                 Key: COUCHDB-864
>                 URL: https://issues.apache.org/jira/browse/COUCHDB-864
>             Project: CouchDB
>          Issue Type: Bug
>          Components: Database Core
>            Reporter: Robert Newson
>         Attachments: chunked.erl
>
>
> I noticed that mochiweb always closes the connection when doing a multipart/related PUT (to insert the JSON document and accompanying attachments in one call). Ultimately it's because we call recv(0) and not recv_body, thus consuming more data than we actually process. Mochiweb notices that there is unread data on the socket and closes the connection.
> This impacts replication with attachments, as I believe they go through this code path (and, thus, are forever reconnecting).
> The code below demonstrates a fix for this issue but isn't good enough for trunk. Adam provided the important process dictionary fix.
> ---
>  src/couchdb/couch_doc.erl      |    1 +
>  src/couchdb/couch_httpd_db.erl |   13 +++++++++----
>  2 files changed, 10 insertions(+), 4 deletions(-)
> diff --git a/src/couchdb/couch_doc.erl b/src/couchdb/couch_doc.erl
> index 5009f8f..f8c874b 100644
> --- a/src/couchdb/couch_doc.erl
> +++ b/src/couchdb/couch_doc.erl
> @@ -455,6 +455,7 @@ doc_from_multi_part_stream(ContentType, DataFun) ->
>      Parser ! {get_doc_bytes, self()},
>      receive 
>      {doc_bytes, DocBytes} ->
> +        erlang:put(mochiweb_request_recv, true),
>          Doc = from_json_obj(?JSON_DECODE(DocBytes)),
>          % go through the attachments looking for 'follows' in the data,
>          % replace with function that reads the data from MIME stream.
> diff --git a/src/couchdb/couch_httpd_db.erl b/src/couchdb/couch_httpd_db.erl
> index b0fbe8d..eff7d67 100644
> --- a/src/couchdb/couch_httpd_db.erl
> +++ b/src/couchdb/couch_httpd_db.erl
> @@ -651,12 +651,13 @@ db_doc_req(#httpd{method='PUT'}=Req, Db, DocId) ->
>      } = parse_doc_query(Req),
>      couch_doc:validate_docid(DocId),
>      
> +    Len = couch_httpd:header_value(Req,"Content-Length"),
>      Loc = absolute_uri(Req, "/" ++ ?b2l(Db#db.name) ++ "/" ++ ?b2l(DocId)),
>      RespHeaders = [{"Location", Loc}],
>      case couch_util:to_list(couch_httpd:header_value(Req, "Content-Type")) of
>      ("multipart/related;" ++ _) = ContentType ->
>          {ok, Doc0} = couch_doc:doc_from_multi_part_stream(ContentType,
> -                fun() -> receive_request_data(Req) end),
> +                fun() -> receive_request_data(Req, Len) end),
>          Doc = couch_doc_from_req(Req, DocId, Doc0),
>          update_doc(Req, Db, DocId, Doc, RespHeaders, UpdateType);
>      _Else ->
> @@ -775,9 +776,13 @@ send_docs_multipart(Req, Results, Options) ->
>      couch_httpd:send_chunk(Resp, <<"--">>),
>      couch_httpd:last_chunk(Resp).
>  
> -receive_request_data(Req) ->
> -    {couch_httpd:recv(Req, 0), fun() -> receive_request_data(Req) end}.
> -    
> +receive_request_data(Req, undefined) ->
> +    receive_request_data(Req, 0);
> +receive_request_data(Req, Len) when is_list(Len)->
> +    Remaining = list_to_integer(Len),
> +    Bin = couch_httpd:recv(Req, Remaining),
> +    {Bin, fun() -> receive_request_data(Req, Remaining - iolist_size(Bin)) end}.
> +
>  update_doc_result_to_json({{Id, Rev}, Error}) ->
>          {_Code, Err, Msg} = couch_httpd:error_info(Error),
>          {[{id, Id}, {rev, couch_doc:rev_to_str(Rev)},
> -- 
> 1.7.2.2
> Umbra

-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.


[jira] Commented: (COUCHDB-864) multipart/related PUT's always close the connection.

Posted by "Robert Newson (JIRA)" <ji...@apache.org>.
    [ https://issues.apache.org/jira/browse/COUCHDB-864?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12901147#action_12901147 ] 

Robert Newson commented on COUCHDB-864:
---------------------------------------

I meant to clarify that, yes, this one line change alters the return value of should_close(). The real question is whether you can then successfully send another multipart/related PUT, and I think the answer there is no for chunked encoding and yes for content-length (identity) encoding.

multipart/related PUT doesn't work as well as normal PUT's and I think it should. the receive_request_data() method is used only for these things and seemingly does not do as comprehensive a job as the more usual code.

I'm working on an etap test to demonstrate this problem.

> multipart/related PUT's always close the connection.
> ----------------------------------------------------
>
>                 Key: COUCHDB-864
>                 URL: https://issues.apache.org/jira/browse/COUCHDB-864
>             Project: CouchDB
>          Issue Type: Bug
>          Components: Database Core
>            Reporter: Robert Newson
>         Attachments: chunked.erl
>
>
> I noticed that mochiweb always closes the connection when doing a multipart/related PUT (to insert the JSON document and accompanying attachments in one call). Ultimately it's because we call recv(0) and not recv_body, thus consuming more data than we actually process. Mochiweb notices that there is unread data on the socket and closes the connection.
> This impacts replication with attachments, as I believe they go through this code path (and, thus, are forever reconnecting).
> The code below demonstrates a fix for this issue but isn't good enough for trunk. Adam provided the important process dictionary fix.
> ---
>  src/couchdb/couch_doc.erl      |    1 +
>  src/couchdb/couch_httpd_db.erl |   13 +++++++++----
>  2 files changed, 10 insertions(+), 4 deletions(-)
> diff --git a/src/couchdb/couch_doc.erl b/src/couchdb/couch_doc.erl
> index 5009f8f..f8c874b 100644
> --- a/src/couchdb/couch_doc.erl
> +++ b/src/couchdb/couch_doc.erl
> @@ -455,6 +455,7 @@ doc_from_multi_part_stream(ContentType, DataFun) ->
>      Parser ! {get_doc_bytes, self()},
>      receive 
>      {doc_bytes, DocBytes} ->
> +        erlang:put(mochiweb_request_recv, true),
>          Doc = from_json_obj(?JSON_DECODE(DocBytes)),
>          % go through the attachments looking for 'follows' in the data,
>          % replace with function that reads the data from MIME stream.
> diff --git a/src/couchdb/couch_httpd_db.erl b/src/couchdb/couch_httpd_db.erl
> index b0fbe8d..eff7d67 100644
> --- a/src/couchdb/couch_httpd_db.erl
> +++ b/src/couchdb/couch_httpd_db.erl
> @@ -651,12 +651,13 @@ db_doc_req(#httpd{method='PUT'}=Req, Db, DocId) ->
>      } = parse_doc_query(Req),
>      couch_doc:validate_docid(DocId),
>      
> +    Len = couch_httpd:header_value(Req,"Content-Length"),
>      Loc = absolute_uri(Req, "/" ++ ?b2l(Db#db.name) ++ "/" ++ ?b2l(DocId)),
>      RespHeaders = [{"Location", Loc}],
>      case couch_util:to_list(couch_httpd:header_value(Req, "Content-Type")) of
>      ("multipart/related;" ++ _) = ContentType ->
>          {ok, Doc0} = couch_doc:doc_from_multi_part_stream(ContentType,
> -                fun() -> receive_request_data(Req) end),
> +                fun() -> receive_request_data(Req, Len) end),
>          Doc = couch_doc_from_req(Req, DocId, Doc0),
>          update_doc(Req, Db, DocId, Doc, RespHeaders, UpdateType);
>      _Else ->
> @@ -775,9 +776,13 @@ send_docs_multipart(Req, Results, Options) ->
>      couch_httpd:send_chunk(Resp, <<"--">>),
>      couch_httpd:last_chunk(Resp).
>  
> -receive_request_data(Req) ->
> -    {couch_httpd:recv(Req, 0), fun() -> receive_request_data(Req) end}.
> -    
> +receive_request_data(Req, undefined) ->
> +    receive_request_data(Req, 0);
> +receive_request_data(Req, Len) when is_list(Len)->
> +    Remaining = list_to_integer(Len),
> +    Bin = couch_httpd:recv(Req, Remaining),
> +    {Bin, fun() -> receive_request_data(Req, Remaining - iolist_size(Bin)) end}.
> +
>  update_doc_result_to_json({{Id, Rev}, Error}) ->
>          {_Code, Err, Msg} = couch_httpd:error_info(Error),
>          {[{id, Id}, {rev, couch_doc:rev_to_str(Rev)},
> -- 
> 1.7.2.2
> Umbra

-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.


[jira] Commented: (COUCHDB-864) multipart/related PUT's always close the connection.

Posted by "Filipe Manana (JIRA)" <ji...@apache.org>.
    [ https://issues.apache.org/jira/browse/COUCHDB-864?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12901079#action_12901079 ] 

Filipe Manana commented on COUCHDB-864:
---------------------------------------

Would the following patch be ok?

diff --git a/src/couchdb/couch_httpd_db.erl b/src/couchdb/couch_httpd_db.erl
index 87fc15d..769abe4 100644
--- a/src/couchdb/couch_httpd_db.erl
+++ b/src/couchdb/couch_httpd_db.erl
@@ -661,7 +661,9 @@ db_doc_req(#httpd{method='PUT'}=Req, Db, DocId) ->
         {ok, Doc0} = couch_doc:doc_from_multi_part_stream(ContentType,
                 fun() -> receive_request_data(Req) end),
         Doc = couch_doc_from_req(Req, DocId, Doc0),
-        update_doc(Req, Db, DocId, Doc, RespHeaders, UpdateType);
+        Result = update_doc(Req, Db, DocId, Doc, RespHeaders, UpdateType),
+        put(mochiweb_request_recv, true),
+        Result;
     _Else ->
         case couch_httpd:qs_value(Req, "batch") of
         "ok" ->


I tested adding a debug message of the result of calling MochiReq:should_close() and it returns false, also doesn't seem to close the socket in the tests I did.

I feel this is a bit hackish, that's the only downside for me.
Opinions are welcome

cheers

> multipart/related PUT's always close the connection.
> ----------------------------------------------------
>
>                 Key: COUCHDB-864
>                 URL: https://issues.apache.org/jira/browse/COUCHDB-864
>             Project: CouchDB
>          Issue Type: Bug
>          Components: Database Core
>            Reporter: Robert Newson
>
> I noticed that mochiweb always closes the connection when doing a multipart/related PUT (to insert the JSON document and accompanying attachments in one call). Ultimately it's because we call recv(0) and not recv_body, thus consuming more data than we actually process. Mochiweb notices that there is unread data on the socket and closes the connection.
> This impacts replication with attachments, as I believe they go through this code path (and, thus, are forever reconnecting).
> The code below demonstrates a fix for this issue but isn't good enough for trunk. Adam provided the important process dictionary fix.
> ---
>  src/couchdb/couch_doc.erl      |    1 +
>  src/couchdb/couch_httpd_db.erl |   13 +++++++++----
>  2 files changed, 10 insertions(+), 4 deletions(-)
> diff --git a/src/couchdb/couch_doc.erl b/src/couchdb/couch_doc.erl
> index 5009f8f..f8c874b 100644
> --- a/src/couchdb/couch_doc.erl
> +++ b/src/couchdb/couch_doc.erl
> @@ -455,6 +455,7 @@ doc_from_multi_part_stream(ContentType, DataFun) ->
>      Parser ! {get_doc_bytes, self()},
>      receive 
>      {doc_bytes, DocBytes} ->
> +        erlang:put(mochiweb_request_recv, true),
>          Doc = from_json_obj(?JSON_DECODE(DocBytes)),
>          % go through the attachments looking for 'follows' in the data,
>          % replace with function that reads the data from MIME stream.
> diff --git a/src/couchdb/couch_httpd_db.erl b/src/couchdb/couch_httpd_db.erl
> index b0fbe8d..eff7d67 100644
> --- a/src/couchdb/couch_httpd_db.erl
> +++ b/src/couchdb/couch_httpd_db.erl
> @@ -651,12 +651,13 @@ db_doc_req(#httpd{method='PUT'}=Req, Db, DocId) ->
>      } = parse_doc_query(Req),
>      couch_doc:validate_docid(DocId),
>      
> +    Len = couch_httpd:header_value(Req,"Content-Length"),
>      Loc = absolute_uri(Req, "/" ++ ?b2l(Db#db.name) ++ "/" ++ ?b2l(DocId)),
>      RespHeaders = [{"Location", Loc}],
>      case couch_util:to_list(couch_httpd:header_value(Req, "Content-Type")) of
>      ("multipart/related;" ++ _) = ContentType ->
>          {ok, Doc0} = couch_doc:doc_from_multi_part_stream(ContentType,
> -                fun() -> receive_request_data(Req) end),
> +                fun() -> receive_request_data(Req, Len) end),
>          Doc = couch_doc_from_req(Req, DocId, Doc0),
>          update_doc(Req, Db, DocId, Doc, RespHeaders, UpdateType);
>      _Else ->
> @@ -775,9 +776,13 @@ send_docs_multipart(Req, Results, Options) ->
>      couch_httpd:send_chunk(Resp, <<"--">>),
>      couch_httpd:last_chunk(Resp).
>  
> -receive_request_data(Req) ->
> -    {couch_httpd:recv(Req, 0), fun() -> receive_request_data(Req) end}.
> -    
> +receive_request_data(Req, undefined) ->
> +    receive_request_data(Req, 0);
> +receive_request_data(Req, Len) when is_list(Len)->
> +    Remaining = list_to_integer(Len),
> +    Bin = couch_httpd:recv(Req, Remaining),
> +    {Bin, fun() -> receive_request_data(Req, Remaining - iolist_size(Bin)) end}.
> +
>  update_doc_result_to_json({{Id, Rev}, Error}) ->
>          {_Code, Err, Msg} = couch_httpd:error_info(Error),
>          {[{id, Id}, {rev, couch_doc:rev_to_str(Rev)},
> -- 
> 1.7.2.2
> Umbra

-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.


[jira] Commented: (COUCHDB-864) multipart/related PUT's always close the connection.

Posted by "Robert Newson (JIRA)" <ji...@apache.org>.
    [ https://issues.apache.org/jira/browse/COUCHDB-864?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12900783#action_12900783 ] 

Robert Newson commented on COUCHDB-864:
---------------------------------------


Adam speculated that this might be related to another replicator bug;

"now i remember fdmanana occasionally complaining about logging messages regarding "too many bytes received in the replicator"

> multipart/related PUT's always close the connection.
> ----------------------------------------------------
>
>                 Key: COUCHDB-864
>                 URL: https://issues.apache.org/jira/browse/COUCHDB-864
>             Project: CouchDB
>          Issue Type: Bug
>          Components: Database Core
>            Reporter: Robert Newson
>
> I noticed that mochiweb always closes the connection when doing a multipart/related PUT (to insert the JSON document and accompanying attachments in one call). Ultimately it's because we call recv(0) and not recv_body, thus consuming more data than we actually process. Mochiweb notices that there is unread data on the socket and closes the connection.
> This impacts replication with attachments, as I believe they go through this code path (and, thus, are forever reconnecting).
> The code below demonstrates a fix for this issue but isn't good enough for trunk. Adam provided the important process dictionary fix.
> ---
>  src/couchdb/couch_doc.erl      |    1 +
>  src/couchdb/couch_httpd_db.erl |   13 +++++++++----
>  2 files changed, 10 insertions(+), 4 deletions(-)
> diff --git a/src/couchdb/couch_doc.erl b/src/couchdb/couch_doc.erl
> index 5009f8f..f8c874b 100644
> --- a/src/couchdb/couch_doc.erl
> +++ b/src/couchdb/couch_doc.erl
> @@ -455,6 +455,7 @@ doc_from_multi_part_stream(ContentType, DataFun) ->
>      Parser ! {get_doc_bytes, self()},
>      receive 
>      {doc_bytes, DocBytes} ->
> +        erlang:put(mochiweb_request_recv, true),
>          Doc = from_json_obj(?JSON_DECODE(DocBytes)),
>          % go through the attachments looking for 'follows' in the data,
>          % replace with function that reads the data from MIME stream.
> diff --git a/src/couchdb/couch_httpd_db.erl b/src/couchdb/couch_httpd_db.erl
> index b0fbe8d..eff7d67 100644
> --- a/src/couchdb/couch_httpd_db.erl
> +++ b/src/couchdb/couch_httpd_db.erl
> @@ -651,12 +651,13 @@ db_doc_req(#httpd{method='PUT'}=Req, Db, DocId) ->
>      } = parse_doc_query(Req),
>      couch_doc:validate_docid(DocId),
>      
> +    Len = couch_httpd:header_value(Req,"Content-Length"),
>      Loc = absolute_uri(Req, "/" ++ ?b2l(Db#db.name) ++ "/" ++ ?b2l(DocId)),
>      RespHeaders = [{"Location", Loc}],
>      case couch_util:to_list(couch_httpd:header_value(Req, "Content-Type")) of
>      ("multipart/related;" ++ _) = ContentType ->
>          {ok, Doc0} = couch_doc:doc_from_multi_part_stream(ContentType,
> -                fun() -> receive_request_data(Req) end),
> +                fun() -> receive_request_data(Req, Len) end),
>          Doc = couch_doc_from_req(Req, DocId, Doc0),
>          update_doc(Req, Db, DocId, Doc, RespHeaders, UpdateType);
>      _Else ->
> @@ -775,9 +776,13 @@ send_docs_multipart(Req, Results, Options) ->
>      couch_httpd:send_chunk(Resp, <<"--">>),
>      couch_httpd:last_chunk(Resp).
>  
> -receive_request_data(Req) ->
> -    {couch_httpd:recv(Req, 0), fun() -> receive_request_data(Req) end}.
> -    
> +receive_request_data(Req, undefined) ->
> +    receive_request_data(Req, 0);
> +receive_request_data(Req, Len) when is_list(Len)->
> +    Remaining = list_to_integer(Len),
> +    Bin = couch_httpd:recv(Req, Remaining),
> +    {Bin, fun() -> receive_request_data(Req, Remaining - iolist_size(Bin)) end}.
> +
>  update_doc_result_to_json({{Id, Rev}, Error}) ->
>          {_Code, Err, Msg} = couch_httpd:error_info(Error),
>          {[{id, Id}, {rev, couch_doc:rev_to_str(Rev)},
> -- 
> 1.7.2.2
> Umbra

-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.


[jira] Updated: (COUCHDB-864) multipart/related PUT's always close the connection.

Posted by "Filipe Manana (JIRA)" <ji...@apache.org>.
     [ https://issues.apache.org/jira/browse/COUCHDB-864?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Filipe Manana updated COUCHDB-864:
----------------------------------

    Attachment: mp_pipeline.patch

As I was discussing with Robert on IRC, the following patch is likely to fix the issue of HTTP pipelines and chunked transfer-encoding and identify transfer-encoding - where the issue is reading and discarding data from subsequent requests in the pipeline.

Hope Robert can test this with his node.js code and provide feedback.

Thanks Robert and Adam.

> multipart/related PUT's always close the connection.
> ----------------------------------------------------
>
>                 Key: COUCHDB-864
>                 URL: https://issues.apache.org/jira/browse/COUCHDB-864
>             Project: CouchDB
>          Issue Type: Bug
>          Components: Database Core
>            Reporter: Robert Newson
>         Attachments: chunked.erl, mp_pipeline.patch
>
>
> I noticed that mochiweb always closes the connection when doing a multipart/related PUT (to insert the JSON document and accompanying attachments in one call). Ultimately it's because we call recv(0) and not recv_body, thus consuming more data than we actually process. Mochiweb notices that there is unread data on the socket and closes the connection.
> This impacts replication with attachments, as I believe they go through this code path (and, thus, are forever reconnecting).
> The code below demonstrates a fix for this issue but isn't good enough for trunk. Adam provided the important process dictionary fix.
> ---
>  src/couchdb/couch_doc.erl      |    1 +
>  src/couchdb/couch_httpd_db.erl |   13 +++++++++----
>  2 files changed, 10 insertions(+), 4 deletions(-)
> diff --git a/src/couchdb/couch_doc.erl b/src/couchdb/couch_doc.erl
> index 5009f8f..f8c874b 100644
> --- a/src/couchdb/couch_doc.erl
> +++ b/src/couchdb/couch_doc.erl
> @@ -455,6 +455,7 @@ doc_from_multi_part_stream(ContentType, DataFun) ->
>      Parser ! {get_doc_bytes, self()},
>      receive 
>      {doc_bytes, DocBytes} ->
> +        erlang:put(mochiweb_request_recv, true),
>          Doc = from_json_obj(?JSON_DECODE(DocBytes)),
>          % go through the attachments looking for 'follows' in the data,
>          % replace with function that reads the data from MIME stream.
> diff --git a/src/couchdb/couch_httpd_db.erl b/src/couchdb/couch_httpd_db.erl
> index b0fbe8d..eff7d67 100644
> --- a/src/couchdb/couch_httpd_db.erl
> +++ b/src/couchdb/couch_httpd_db.erl
> @@ -651,12 +651,13 @@ db_doc_req(#httpd{method='PUT'}=Req, Db, DocId) ->
>      } = parse_doc_query(Req),
>      couch_doc:validate_docid(DocId),
>      
> +    Len = couch_httpd:header_value(Req,"Content-Length"),
>      Loc = absolute_uri(Req, "/" ++ ?b2l(Db#db.name) ++ "/" ++ ?b2l(DocId)),
>      RespHeaders = [{"Location", Loc}],
>      case couch_util:to_list(couch_httpd:header_value(Req, "Content-Type")) of
>      ("multipart/related;" ++ _) = ContentType ->
>          {ok, Doc0} = couch_doc:doc_from_multi_part_stream(ContentType,
> -                fun() -> receive_request_data(Req) end),
> +                fun() -> receive_request_data(Req, Len) end),
>          Doc = couch_doc_from_req(Req, DocId, Doc0),
>          update_doc(Req, Db, DocId, Doc, RespHeaders, UpdateType);
>      _Else ->
> @@ -775,9 +776,13 @@ send_docs_multipart(Req, Results, Options) ->
>      couch_httpd:send_chunk(Resp, <<"--">>),
>      couch_httpd:last_chunk(Resp).
>  
> -receive_request_data(Req) ->
> -    {couch_httpd:recv(Req, 0), fun() -> receive_request_data(Req) end}.
> -    
> +receive_request_data(Req, undefined) ->
> +    receive_request_data(Req, 0);
> +receive_request_data(Req, Len) when is_list(Len)->
> +    Remaining = list_to_integer(Len),
> +    Bin = couch_httpd:recv(Req, Remaining),
> +    {Bin, fun() -> receive_request_data(Req, Remaining - iolist_size(Bin)) end}.
> +
>  update_doc_result_to_json({{Id, Rev}, Error}) ->
>          {_Code, Err, Msg} = couch_httpd:error_info(Error),
>          {[{id, Id}, {rev, couch_doc:rev_to_str(Rev)},
> -- 
> 1.7.2.2
> Umbra

-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.


[jira] Commented: (COUCHDB-864) multipart/related PUT's always close the connection.

Posted by "Adam Kocoloski (JIRA)" <ji...@apache.org>.
    [ https://issues.apache.org/jira/browse/COUCHDB-864?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12903059#action_12903059 ] 

Adam Kocoloski commented on COUCHDB-864:
----------------------------------------

That troubles me, too.  Pipelining should not have worked for very long.

> multipart/related PUT's always close the connection.
> ----------------------------------------------------
>
>                 Key: COUCHDB-864
>                 URL: https://issues.apache.org/jira/browse/COUCHDB-864
>             Project: CouchDB
>          Issue Type: Bug
>          Components: Database Core
>            Reporter: Robert Newson
>         Attachments: chunked.erl, mp_doc_put_http_pipeline.patch, mp_pipeline.patch
>
>
> I noticed that mochiweb always closes the connection when doing a multipart/related PUT (to insert the JSON document and accompanying attachments in one call). Ultimately it's because we call recv(0) and not recv_body, thus consuming more data than we actually process. Mochiweb notices that there is unread data on the socket and closes the connection.
> This impacts replication with attachments, as I believe they go through this code path (and, thus, are forever reconnecting).
> The code below demonstrates a fix for this issue but isn't good enough for trunk. Adam provided the important process dictionary fix.
> ---
>  src/couchdb/couch_doc.erl      |    1 +
>  src/couchdb/couch_httpd_db.erl |   13 +++++++++----
>  2 files changed, 10 insertions(+), 4 deletions(-)
> diff --git a/src/couchdb/couch_doc.erl b/src/couchdb/couch_doc.erl
> index 5009f8f..f8c874b 100644
> --- a/src/couchdb/couch_doc.erl
> +++ b/src/couchdb/couch_doc.erl
> @@ -455,6 +455,7 @@ doc_from_multi_part_stream(ContentType, DataFun) ->
>      Parser ! {get_doc_bytes, self()},
>      receive 
>      {doc_bytes, DocBytes} ->
> +        erlang:put(mochiweb_request_recv, true),
>          Doc = from_json_obj(?JSON_DECODE(DocBytes)),
>          % go through the attachments looking for 'follows' in the data,
>          % replace with function that reads the data from MIME stream.
> diff --git a/src/couchdb/couch_httpd_db.erl b/src/couchdb/couch_httpd_db.erl
> index b0fbe8d..eff7d67 100644
> --- a/src/couchdb/couch_httpd_db.erl
> +++ b/src/couchdb/couch_httpd_db.erl
> @@ -651,12 +651,13 @@ db_doc_req(#httpd{method='PUT'}=Req, Db, DocId) ->
>      } = parse_doc_query(Req),
>      couch_doc:validate_docid(DocId),
>      
> +    Len = couch_httpd:header_value(Req,"Content-Length"),
>      Loc = absolute_uri(Req, "/" ++ ?b2l(Db#db.name) ++ "/" ++ ?b2l(DocId)),
>      RespHeaders = [{"Location", Loc}],
>      case couch_util:to_list(couch_httpd:header_value(Req, "Content-Type")) of
>      ("multipart/related;" ++ _) = ContentType ->
>          {ok, Doc0} = couch_doc:doc_from_multi_part_stream(ContentType,
> -                fun() -> receive_request_data(Req) end),
> +                fun() -> receive_request_data(Req, Len) end),
>          Doc = couch_doc_from_req(Req, DocId, Doc0),
>          update_doc(Req, Db, DocId, Doc, RespHeaders, UpdateType);
>      _Else ->
> @@ -775,9 +776,13 @@ send_docs_multipart(Req, Results, Options) ->
>      couch_httpd:send_chunk(Resp, <<"--">>),
>      couch_httpd:last_chunk(Resp).
>  
> -receive_request_data(Req) ->
> -    {couch_httpd:recv(Req, 0), fun() -> receive_request_data(Req) end}.
> -    
> +receive_request_data(Req, undefined) ->
> +    receive_request_data(Req, 0);
> +receive_request_data(Req, Len) when is_list(Len)->
> +    Remaining = list_to_integer(Len),
> +    Bin = couch_httpd:recv(Req, Remaining),
> +    {Bin, fun() -> receive_request_data(Req, Remaining - iolist_size(Bin)) end}.
> +
>  update_doc_result_to_json({{Id, Rev}, Error}) ->
>          {_Code, Err, Msg} = couch_httpd:error_info(Error),
>          {[{id, Id}, {rev, couch_doc:rev_to_str(Rev)},
> -- 
> 1.7.2.2
> Umbra

-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.


[jira] Commented: (COUCHDB-864) multipart/related PUT's always close the connection.

Posted by "Filipe Manana (JIRA)" <ji...@apache.org>.
    [ https://issues.apache.org/jira/browse/COUCHDB-864?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12900835#action_12900835 ] 

Filipe Manana commented on COUCHDB-864:
---------------------------------------

Ah, not it's clear to me what the issue is! :)

I see 3 possible solutions:

1) The DataFun we pass to couch_doc:doc_from_multipart_stream would keep information about how many bytes it read from the socket (through a closure) and everytime it is invoked it would just read X bytes from the mochiweb socket. When the number of read bytes is bigger than the total length of the request, it would just do the unreceive trick of the data in excess;

2) Patch mochiweb to not close the connection and hope that all clients are well behaved in the sense that they close the connections themselves or close the connection after some inactivity period;

3) Change the replicator to use a direct HTTP connection for this case (through ibrowse:spawn_link_worker) - since the replicator has only 1 process that writes docs to the target it shouldn't be a problem (not too many http connections in parallel) however we wouldn't take advantage of HTTP pipelining for DBs where all or most docs have attachments


So 1) is possibly the most viable solution
 

> multipart/related PUT's always close the connection.
> ----------------------------------------------------
>
>                 Key: COUCHDB-864
>                 URL: https://issues.apache.org/jira/browse/COUCHDB-864
>             Project: CouchDB
>          Issue Type: Bug
>          Components: Database Core
>            Reporter: Robert Newson
>
> I noticed that mochiweb always closes the connection when doing a multipart/related PUT (to insert the JSON document and accompanying attachments in one call). Ultimately it's because we call recv(0) and not recv_body, thus consuming more data than we actually process. Mochiweb notices that there is unread data on the socket and closes the connection.
> This impacts replication with attachments, as I believe they go through this code path (and, thus, are forever reconnecting).
> The code below demonstrates a fix for this issue but isn't good enough for trunk. Adam provided the important process dictionary fix.
> ---
>  src/couchdb/couch_doc.erl      |    1 +
>  src/couchdb/couch_httpd_db.erl |   13 +++++++++----
>  2 files changed, 10 insertions(+), 4 deletions(-)
> diff --git a/src/couchdb/couch_doc.erl b/src/couchdb/couch_doc.erl
> index 5009f8f..f8c874b 100644
> --- a/src/couchdb/couch_doc.erl
> +++ b/src/couchdb/couch_doc.erl
> @@ -455,6 +455,7 @@ doc_from_multi_part_stream(ContentType, DataFun) ->
>      Parser ! {get_doc_bytes, self()},
>      receive 
>      {doc_bytes, DocBytes} ->
> +        erlang:put(mochiweb_request_recv, true),
>          Doc = from_json_obj(?JSON_DECODE(DocBytes)),
>          % go through the attachments looking for 'follows' in the data,
>          % replace with function that reads the data from MIME stream.
> diff --git a/src/couchdb/couch_httpd_db.erl b/src/couchdb/couch_httpd_db.erl
> index b0fbe8d..eff7d67 100644
> --- a/src/couchdb/couch_httpd_db.erl
> +++ b/src/couchdb/couch_httpd_db.erl
> @@ -651,12 +651,13 @@ db_doc_req(#httpd{method='PUT'}=Req, Db, DocId) ->
>      } = parse_doc_query(Req),
>      couch_doc:validate_docid(DocId),
>      
> +    Len = couch_httpd:header_value(Req,"Content-Length"),
>      Loc = absolute_uri(Req, "/" ++ ?b2l(Db#db.name) ++ "/" ++ ?b2l(DocId)),
>      RespHeaders = [{"Location", Loc}],
>      case couch_util:to_list(couch_httpd:header_value(Req, "Content-Type")) of
>      ("multipart/related;" ++ _) = ContentType ->
>          {ok, Doc0} = couch_doc:doc_from_multi_part_stream(ContentType,
> -                fun() -> receive_request_data(Req) end),
> +                fun() -> receive_request_data(Req, Len) end),
>          Doc = couch_doc_from_req(Req, DocId, Doc0),
>          update_doc(Req, Db, DocId, Doc, RespHeaders, UpdateType);
>      _Else ->
> @@ -775,9 +776,13 @@ send_docs_multipart(Req, Results, Options) ->
>      couch_httpd:send_chunk(Resp, <<"--">>),
>      couch_httpd:last_chunk(Resp).
>  
> -receive_request_data(Req) ->
> -    {couch_httpd:recv(Req, 0), fun() -> receive_request_data(Req) end}.
> -    
> +receive_request_data(Req, undefined) ->
> +    receive_request_data(Req, 0);
> +receive_request_data(Req, Len) when is_list(Len)->
> +    Remaining = list_to_integer(Len),
> +    Bin = couch_httpd:recv(Req, Remaining),
> +    {Bin, fun() -> receive_request_data(Req, Remaining - iolist_size(Bin)) end}.
> +
>  update_doc_result_to_json({{Id, Rev}, Error}) ->
>          {_Code, Err, Msg} = couch_httpd:error_info(Error),
>          {[{id, Id}, {rev, couch_doc:rev_to_str(Rev)},
> -- 
> 1.7.2.2
> Umbra

-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.