You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by va...@apache.org on 2020/09/15 20:13:59 UTC

[couchdb] 02/16: Read attachment data outside the transaction

This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch prototype/fdb-layer
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit e3b1c418ebc9735a46ef8e1f36d09d7023939372
Author: Nick Vatamaniuc <va...@apache.org>
AuthorDate: Fri Aug 28 04:30:27 2020 -0400

    Read attachment data outside the transaction
    
    Previously the data was read from the parser in the transaction. If the
    transaction had to retry, for example, because of a conflict, the parser would
    have been drained and exited resulting the request failing with a 500
    "mp_parser noproc" error.
    
    Since FDB cannot handle transactions larger than 10MB opt to read the
    attachment data into memory first, before the transaction starts.
---
 src/chttpd/src/chttpd_db.erl | 26 ++++++++++++++++++--------
 src/couch/src/couch_att.erl  | 20 ++++++++++++++++++--
 2 files changed, 36 insertions(+), 10 deletions(-)

diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl
index ec4a1a4..b57010d 100644
--- a/src/chttpd/src/chttpd_db.erl
+++ b/src/chttpd/src/chttpd_db.erl
@@ -418,12 +418,13 @@ db_req(#httpd{method='POST', path_parts=[DbName]}=Req, Db) ->
         _ ->
             Doc1
     end,
-    DocId = Doc2#doc.id,
+    Doc3 = read_att_data(Doc2),
+    DocId = Doc3#doc.id,
     case chttpd:qs_value(Req, "batch") of
     "ok" ->
         % async_batching
         spawn(fun() ->
-                case catch(fabric2_db:update_doc(Db, Doc2, [])) of
+                case catch(fabric2_db:update_doc(Db, Doc3, [])) of
                 {ok, _} ->
                     chttpd_stats:incr_writes(),
                     ok;
@@ -443,7 +444,7 @@ db_req(#httpd{method='POST', path_parts=[DbName]}=Req, Db) ->
         % normal
         DocUrl = absolute_uri(Req, [$/, couch_util:url_encode(DbName),
             $/, couch_util:url_encode(DocId)]),
-        case fabric2_db:update_doc(Db, Doc2, []) of
+        case fabric2_db:update_doc(Db, Doc3, []) of
         {ok, NewRev} ->
             chttpd_stats:incr_writes(),
             HttpCode = 201;
@@ -1174,7 +1175,8 @@ db_doc_req(#httpd{method='POST'}=Req, Db, DocId) ->
     NewDoc = Doc#doc{
         atts = UpdatedAtts ++ OldAtts2
     },
-    case fabric2_db:update_doc(Db, NewDoc, []) of
+    NewDoc1 = read_att_data(NewDoc),
+    case fabric2_db:update_doc(Db, NewDoc1, []) of
     {ok, NewRev} ->
         chttpd_stats:incr_writes(),
         HttpCode = 201;
@@ -1218,8 +1220,8 @@ db_doc_req(#httpd{method='PUT'}=Req, Db, DocId) ->
         case chttpd:qs_value(Req, "batch") of
         "ok" ->
             % batch
-            Doc = couch_doc_from_req(Req, Db, DocId, chttpd:json_body(Req)),
-
+            Doc0 = couch_doc_from_req(Req, Db, DocId, chttpd:json_body(Req)),
+            Doc = read_att_data(Doc0),
             spawn(fun() ->
                     case catch(fabric2_db:update_doc(Db, Doc, [])) of
                     {ok, _} ->
@@ -1479,7 +1481,8 @@ http_code_from_status(Status) ->
             200
     end.
 
-update_doc(Db, DocId, #doc{deleted=Deleted, body=DocBody}=Doc, Options) ->
+update_doc(Db, DocId, #doc{deleted=Deleted, body=DocBody}=Doc0, Options) ->
+    Doc = read_att_data(Doc0),
     case fabric2_db:update_doc(Db, Doc, Options) of
     {ok, NewRev} ->
         Accepted = false;
@@ -1766,9 +1769,10 @@ db_attachment_req(#httpd{method=Method}=Req, Db, DocId, FileNameParts)
     end,
 
     #doc{atts=Atts} = Doc,
-    DocEdited = Doc#doc{
+    DocEdited0 = Doc#doc{
         atts = NewAtt ++ [A || A <- Atts, couch_att:fetch(name, A) /= FileName]
     },
+    DocEdited = read_att_data(DocEdited0),
     case fabric2_db:update_doc(Db, DocEdited, []) of
     {ok, UpdatedRev} ->
         chttpd_stats:incr_writes(),
@@ -2240,3 +2244,9 @@ bulk_get_json_error(DocId, Rev, Error, Reason) ->
                              {<<"rev">>, Rev},
                              {<<"error">>, Error},
                              {<<"reason">>, Reason}]}}]}).
+
+
+read_att_data(#doc{} = Doc) ->
+    #doc{atts = Atts} = Doc,
+    Atts1 = lists:map(fun couch_att:read_data/1, Atts),
+    Doc#doc{atts = Atts1}.
diff --git a/src/couch/src/couch_att.erl b/src/couch/src/couch_att.erl
index d41ab5b..b4c95e9 100644
--- a/src/couch/src/couch_att.erl
+++ b/src/couch/src/couch_att.erl
@@ -40,6 +40,7 @@
 
 -export([
     flush/3,
+    read_data/1,
     foldl/3,
     range_foldl/5,
     foldl_decode/3,
@@ -374,7 +375,14 @@ to_json(Att, OutputData, DataToFollow, ShowEncoding) ->
 
 
 flush(Db, DocId, Att1) ->
-    Att2 = read_data(fetch(data, Att1), Att1),
+    Data0 = fetch(data, Att1),
+    case {Data0, Db} of
+        {{follows, _, _}, #{tx := Tx}} when Tx =/= undefined ->
+            error(follows_cannot_be_used_in_a_transaction);
+        {_, #{}} ->
+            ok
+    end,
+    Att2 = read_data(Data0, Att1),
     [
         Data,
         AttLen,
@@ -419,6 +427,11 @@ flush(Db, DocId, Att1) ->
     end.
 
 
+read_data(Att) ->
+    Data = fetch(data, Att),
+    read_data(Data, Att).
+
+
 read_data({loc, #{}, _DocId, _AttId}, Att) ->
     % Attachment already written to fdb
     Att;
@@ -443,7 +456,10 @@ read_data({follows, Parser, Ref}, Att) ->
     end;
 
 read_data(Data, Att) when is_binary(Data) ->
-    Att;
+    case fetch(att_len, Att) of
+        undefined -> store(att_len, size(Data), Att);
+        Int when is_integer(Int) ->  Att
+    end;
 
 read_data(Fun, Att) when is_function(Fun) ->
     [AttName, AttLen, InMd5] = fetch([name, att_len, md5], Att),