You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by da...@apache.org on 2017/09/08 14:40:58 UTC

[couchdb] branch compactor-optimize-emsort updated (d1a8fc5 -> 4552cdb)

This is an automated email from the ASF dual-hosted git repository.

davisp pushed a change to branch compactor-optimize-emsort
in repository https://gitbox.apache.org/repos/asf/couchdb.git.


    from d1a8fc5  Optimize couch_emsort writes
     new 3890387  ss - add parallel file ops
     new e1d7914  Revert "Optimize couch_emsort writes"
     new 3a5c77e  ss - couch_file bug fixes
     new 4552cdb  Use multi-IO operations in couch_file

The 4 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 src/couch/src/couch_db_updater.erl |  59 +++++++++-------
 src/couch/src/couch_emsort.erl     |  57 ++++-----------
 src/couch/src/couch_file.erl       | 139 ++++++++++++++++++++++++++++++-------
 3 files changed, 162 insertions(+), 93 deletions(-)

-- 
To stop receiving notification emails like this one, please contact
['"commits@couchdb.apache.org" <co...@couchdb.apache.org>'].

[couchdb] 03/04: ss - couch_file bug fixes

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

davisp pushed a commit to branch compactor-optimize-emsort
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 3a5c77e50cf260b5209e5dbeaf8631c6b8fd0b48
Author: Paul J. Davis <pa...@gmail.com>
AuthorDate: Fri Sep 8 09:40:26 2017 -0500

    ss - couch_file bug fixes
---
 src/couch/src/couch_file.erl | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/couch/src/couch_file.erl b/src/couch/src/couch_file.erl
index e96e5c3..a8fcc6c 100644
--- a/src/couch/src/couch_file.erl
+++ b/src/couch/src/couch_file.erl
@@ -198,7 +198,7 @@ pread_binaries(Fd, PosList) ->
 pread_iolists(Fd, PosList) ->
     case ioq:call(Fd, {pread_iolists, PosList}, erlang:get(io_priority)) of
         {ok, DataMd5s} ->
-            Data = lists:zipwith(fun({Pos, _}, {IoList, Md5}) ->
+            Data = lists:zipwith(fun(Pos, {IoList, Md5}) ->
                 verify_md5(Fd, Pos, IoList, Md5)
             end, PosList, DataMd5s),
             {ok, Data};
@@ -487,9 +487,9 @@ handle_call({pread_iolist, Pos}, _From, File) ->
 
 handle_call({pread_iolists, PosL}, _From, File) ->
     update_read_timestamp(),
-    LocNums = [{Pos, 4} || Pos <- PosL],
-    DataSizes = read_multi_raw_iolists_int(File, LocNums),
-    LocNums = lists:map(fun({LenIoList, NextPos}) ->
+    LocNums1 = [{Pos, 4} || Pos <- PosL],
+    DataSizes = read_multi_raw_iolists_int(File, LocNums1),
+    LocNums2 = lists:map(fun({LenIoList, NextPos}) ->
         case iolist_to_binary(LenIoList) of
             <<1:1/integer, Len:31/integer>> -> % an MD5-prefixed term
                 {NextPos, Len + 16};
@@ -497,13 +497,13 @@ handle_call({pread_iolists, PosL}, _From, File) ->
                 {NextPos, Len}
         end
     end, DataSizes),
-    {ok, Resps} = read_multi_raw_iolists_int(File, LocNums),
+    Resps = read_multi_raw_iolists_int(File, LocNums2),
     Extracted = lists:zipwith(fun({LenIoList, _}, {IoList, _}) ->
         case iolist_to_binary(LenIoList) of
-            <<1:1/integer, _/binary>> ->
+            <<1:1/integer, _:31/integer>> ->
                 {Md5, IoList} = extract_md5(IoList),
                 {IoList, Md5};
-            <<0:1/integer, _/binary>> ->
+            <<0:1/integer, _:31/integer>> ->
                 {IoList, <<>>}
         end
     end, DataSizes, Resps),

-- 
To stop receiving notification emails like this one, please contact
"commits@couchdb.apache.org" <co...@couchdb.apache.org>.

[couchdb] 04/04: Use multi-IO operations in couch_file

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

davisp pushed a commit to branch compactor-optimize-emsort
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 4552cdbfdab187e589d9974e87716d60cf703b7d
Author: Paul J. Davis <pa...@gmail.com>
AuthorDate: Fri Sep 8 09:40:52 2017 -0500

    Use multi-IO operations in couch_file
---
 src/couch/src/couch_db_updater.erl | 54 +++++++++++++++++++++++---------------
 1 file changed, 33 insertions(+), 21 deletions(-)

diff --git a/src/couch/src/couch_db_updater.erl b/src/couch/src/couch_db_updater.erl
index 226acb2..7fc344e 100644
--- a/src/couch/src/couch_db_updater.erl
+++ b/src/couch/src/couch_db_updater.erl
@@ -36,11 +36,12 @@
 }).
 
 -record(merge_st, {
+    src_fd,
     id_tree,
     seq_tree,
     curr,
     rem_seqs,
-    infos
+    locs
 }).
 
 init({DbName, Filepath, Fd, Options}) ->
@@ -1340,10 +1341,16 @@ copy_docs(Db, #db{fd = DestFd} = NewDb, MixedInfos, Retry) ->
     {ok, SeqTree} = couch_btree:add_remove(
             NewDb#db.seq_tree, NewInfos, RemoveSeqs),
 
-    FDIKVs = lists:map(fun(#full_doc_info{id=Id, update_seq=Seq}=FDI) ->
-        {{Id, Seq}, FDI}
-    end, NewInfos),
-    {ok, IdEms} = couch_emsort:add(NewDb#db.id_tree, FDIKVs),
+    EMSortFd = couch_emsort:get_fd(NewDb#db.id_tree),
+    {ok, LocSizes} = couch_file:append_terms(EMSortFd, NewInfos),
+    EMSortEntries = lists:zipwith(fun(FDI, {Loc, _}) ->
+        #full_doc_info{
+            id = Id,
+            update_seq = Seq
+        } = FDI,
+        {{Id, Seq}, Loc}
+    end, NewInfos, LocSizes),
+    {ok, IdEms} = couch_emsort:add(NewDb#db.id_tree, EMSortEntries),
     update_compact_task(length(NewInfos)),
     NewDb#db{id_tree=IdEms, seq_tree=SeqTree}.
 
@@ -1455,6 +1462,7 @@ copy_meta_data(#comp_st{new_db = Db} = CompSt) ->
         header = Header
     } = Db,
     Src = Db#db.id_tree,
+    SrcFd = couch_emsort:get_fd(Src),
     DstState = couch_db_header:id_tree_state(Header),
     {ok, IdTree0} = couch_btree:open(DstState, Fd, [
         {split, fun ?MODULE:btree_by_id_split/1},
@@ -1463,14 +1471,16 @@ copy_meta_data(#comp_st{new_db = Db} = CompSt) ->
     ]),
     {ok, Iter} = couch_emsort:iter(Src),
     Acc0 = #merge_st{
+        src_fd=SrcFd,
         id_tree=IdTree0,
         seq_tree=Db#db.seq_tree,
         rem_seqs=[],
-        infos=[]
+        locs=[]
     },
     ?COMP_EVENT(md_copy_init),
     Acc = merge_docids(Iter, Acc0),
-    {ok, IdTree} = couch_btree:add(Acc#merge_st.id_tree, Acc#merge_st.infos),
+    {ok, Infos} = couch_file:pread_terms(SrcFd, Acc#merge_st.locs),
+    {ok, IdTree} = couch_btree:add(Acc#merge_st.id_tree, Infos),
     {ok, SeqTree} = couch_btree:add_remove(
         Acc#merge_st.seq_tree, [], Acc#merge_st.rem_seqs
     ),
@@ -1523,34 +1533,36 @@ verify_compaction(#comp_st{old_db = OldDb, new_db = NewDb} = CompSt) ->
     CompSt.
 
 
-merge_docids(Iter, #merge_st{infos=Infos}=Acc) when length(Infos) > 1000 ->
+merge_docids(Iter, #merge_st{locs=Locs}=Acc) when length(Locs) > 1000 ->
     #merge_st{
+        src_fd=SrcFd,
         id_tree=IdTree0,
         seq_tree=SeqTree0,
         rem_seqs=RemSeqs
     } = Acc,
+    {ok, Infos} = couch_file:pread_terms(SrcFd, Locs),
     {ok, IdTree1} = couch_btree:add(IdTree0, Infos),
     {ok, SeqTree1} = couch_btree:add_remove(SeqTree0, [], RemSeqs),
     Acc1 = Acc#merge_st{
         id_tree=IdTree1,
         seq_tree=SeqTree1,
         rem_seqs=[],
-        infos=[]
+        locs=[]
     },
     merge_docids(Iter, Acc1);
 merge_docids(Iter, #merge_st{curr=Curr}=Acc) ->
     case next_info(Iter, Curr, []) of
-        {NextIter, NewCurr, FDI, Seqs} ->
+        {NextIter, NewCurr, Loc, Seqs} ->
             Acc1 = Acc#merge_st{
-                infos = [FDI | Acc#merge_st.infos],
+                locs = [Loc | Acc#merge_st.locs],
                 rem_seqs = Seqs ++ Acc#merge_st.rem_seqs,
                 curr = NewCurr
             },
             ?COMP_EVENT(md_copy_row),
             merge_docids(NextIter, Acc1);
-        {finished, FDI, Seqs} ->
+        {finished, Loc, Seqs} ->
             Acc#merge_st{
-                infos = [FDI | Acc#merge_st.infos],
+                locs = [Loc | Acc#merge_st.locs],
                 rem_seqs = Seqs ++ Acc#merge_st.rem_seqs,
                 curr = undefined
             };
@@ -1561,19 +1573,19 @@ merge_docids(Iter, #merge_st{curr=Curr}=Acc) ->
 
 next_info(Iter, undefined, []) ->
     case couch_emsort:next(Iter) of
-        {ok, {{Id, Seq}, FDI}, NextIter} ->
-            next_info(NextIter, {Id, Seq, FDI}, []);
+        {ok, {{Id, Seq}, Loc}, NextIter} ->
+            next_info(NextIter, {Id, Seq, Loc}, []);
         finished ->
             empty
     end;
-next_info(Iter, {Id, Seq, FDI}, Seqs) ->
+next_info(Iter, {Id, Seq, Loc}, Seqs) ->
     case couch_emsort:next(Iter) of
-        {ok, {{Id, NSeq}, NFDI}, NextIter} ->
-            next_info(NextIter, {Id, NSeq, NFDI}, [Seq | Seqs]);
-        {ok, {{NId, NSeq}, NFDI}, NextIter} ->
-            {NextIter, {NId, NSeq, NFDI}, FDI, Seqs};
+        {ok, {{Id, NSeq}, NLoc}, NextIter} ->
+            next_info(NextIter, {Id, NSeq, NLoc}, [Seq | Seqs]);
+        {ok, {{NId, NSeq}, NLoc}, NextIter} ->
+            {NextIter, {NId, NSeq, NLoc}, Loc, Seqs};
         finished ->
-            {finished, FDI, Seqs}
+            {finished, Loc, Seqs}
     end.
 
 

-- 
To stop receiving notification emails like this one, please contact
"commits@couchdb.apache.org" <co...@couchdb.apache.org>.

[couchdb] 02/04: Revert "Optimize couch_emsort writes"

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

davisp pushed a commit to branch compactor-optimize-emsort
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit e1d79147eb51e8c505fce2370609e3202443b182
Author: Paul J. Davis <pa...@gmail.com>
AuthorDate: Fri Sep 8 09:21:41 2017 -0500

    Revert "Optimize couch_emsort writes"
    
    This reverts commit d1a8fc58fabefc0ce75b7ca14df3ab27197cdf98.
---
 src/couch/src/couch_db_updater.erl |  5 ++--
 src/couch/src/couch_emsort.erl     | 57 ++++++++++----------------------------
 2 files changed, 16 insertions(+), 46 deletions(-)

diff --git a/src/couch/src/couch_db_updater.erl b/src/couch/src/couch_db_updater.erl
index f3c79ce..226acb2 100644
--- a/src/couch/src/couch_db_updater.erl
+++ b/src/couch/src/couch_db_updater.erl
@@ -1403,9 +1403,8 @@ commit_compaction_data(#db{header=OldHeader}=Db0, Fd) ->
     % fd instead of the Filepath stuff that commit_data/2
     % does.
     DataState = couch_db_header:id_tree_state(OldHeader),
-    {ok, Ems} = couch_emsort:flush(Db0#db.id_tree),
-    MetaFd = couch_emsort:get_fd(Ems),
-    MetaState = couch_emsort:get_state(Ems),
+    MetaFd = couch_emsort:get_fd(Db0#db.id_tree),
+    MetaState = couch_emsort:get_state(Db0#db.id_tree),
     Db1 = bind_id_tree(Db0, Db0#db.fd, DataState),
     Header = db_to_header(Db1, OldHeader),
     CompHeader = #comp_header{
diff --git a/src/couch/src/couch_emsort.erl b/src/couch/src/couch_emsort.erl
index b70f8d3..2a25a23 100644
--- a/src/couch/src/couch_emsort.erl
+++ b/src/couch/src/couch_emsort.erl
@@ -129,14 +129,13 @@
 %     CA3                  CD3
 %
 
--export([open/1, open/2, get_fd/1, get_state/1, flush/1]).
+-export([open/1, open/2, get_fd/1, get_state/1]).
 -export([add/2, merge/1, sort/1, iter/1, next/1]).
 
 
 -record(ems, {
     fd,
     root,
-    curr_batch = [],
     bb_chunk = 10,
     chain_chunk = 100
 }).
@@ -164,35 +163,15 @@ get_fd(#ems{fd=Fd}) ->
     Fd.
 
 
-get_state(#ems{root=Root, curr_batch=[]}) ->
+get_state(#ems{root=Root}) ->
     Root.
 
 
 add(Ems, []) ->
     {ok, Ems};
 add(Ems, KVs) ->
-    #ems{
-        fd = Fd,
-        curr_batch = CurrBatch,
-        bb_chunk = BBChunk,
-        chain_chunk = ChainChunk
-    } = Ems,
-    KPs = write_values(Fd, KVs),
-    Limit = BBChunk * ChainChunk,
-    case length(KPs) + length(CurrBatch) > Limit of
-        true ->
-            flush_kps(Ems, KPs ++ CurrBatch);
-        false ->
-            {ok, Ems#ems{
-                curr_batch = KPs ++ CurrBatch
-            }}
-    end.
-
-
-flush(#ems{curr_batch=[]}=Ems) ->
-    {ok, Ems};
-flush(Ems) ->
-    flush_kps(Ems, Ems#ems.curr_batch).
+    Pos = write_kvs(Ems, KVs),
+    {ok, add_bb_pos(Ems, Pos)}.
 
 
 sort(#ems{}=Ems) ->
@@ -203,8 +182,7 @@ sort(#ems{}=Ems) ->
 merge(#ems{root=undefined}=Ems) ->
     {ok, Ems};
 merge(#ems{}=Ems) ->
-    {ok, FlushedEms} = flush(Ems),
-    {ok, decimate(FlushedEms)}.
+    {ok, decimate(Ems)}.
 
 
 iter(#ems{root=undefined}=Ems) ->
@@ -219,9 +197,8 @@ iter(#ems{root={_, _}}) ->
 next({_Ems, []}) ->
     finished;
 next({Ems, Chains}) ->
-    {{Key, Pos}, RestChains} = choose_kv(small, Ems, Chains),
-    {ok, Val} = couch_file:pread_term(Ems#ems.fd, Pos),
-    {ok, {Key, Val}, {Ems, RestChains}}.
+    {KV, RestChains} = choose_kv(small, Ems, Chains),
+    {ok, KV, {Ems, RestChains}}.
 
 
 add_bb_pos(#ems{root=undefined}=Ems, Pos) ->
@@ -231,22 +208,16 @@ add_bb_pos(#ems{root={BB, Prev}}=Ems, Pos) ->
     Ems#ems{root={NewBB, NewPrev}}.
 
 
-write_values(Fd, KVs) ->
-    {Keys, Vals} = lists:unzip(KVs),
-    {ok, PosSizes} = couch_file:append_terms(Fd, Vals),
-    {Pos, _} = lists:unzip(PosSizes),
-    lists:zip(Keys, Pos).
-
-
-flush_kps(#ems{fd=Fd}=Ems, KPs) ->
+write_kvs(Ems, KVs) ->
     % Write the list of KV's to disk in sorted order in chunks
     % of 100. Also make sure that the order is so that they
     % can be streamed in asscending order.
-    {LastKPs, LastPos} = lists:foldr(fun(KP, Acc) ->
-        append_item(Ems, Acc, KP, Ems#ems.chain_chunk)
-    end, {[], nil}, lists:sort(KPs)),
-    {ok, Final, _} = couch_file:append_term(Fd, {LastKPs, LastPos}),
-    {ok, add_bb_pos(Ems#ems{curr_batch = []}, Final)}.
+    {LastKVs, LastPos} =
+    lists:foldr(fun(KV, Acc) ->
+        append_item(Ems, Acc, KV, Ems#ems.chain_chunk)
+    end, {[], nil}, lists:sort(KVs)),
+    {ok, Final, _} = couch_file:append_term(Ems#ems.fd, {LastKVs, LastPos}),
+    Final.
 
 
 decimate(#ems{root={_BB, nil}}=Ems) ->

-- 
To stop receiving notification emails like this one, please contact
"commits@couchdb.apache.org" <co...@couchdb.apache.org>.

[couchdb] 01/04: ss - add parallel file ops

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

davisp pushed a commit to branch compactor-optimize-emsort
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 3890387e3c315ea388a698ada9fc1042f05cd24e
Author: Paul J. Davis <pa...@gmail.com>
AuthorDate: Fri Sep 8 09:21:13 2017 -0500

    ss - add parallel file ops
---
 src/couch/src/couch_file.erl | 139 +++++++++++++++++++++++++++++++++++--------
 1 file changed, 113 insertions(+), 26 deletions(-)

diff --git a/src/couch/src/couch_file.erl b/src/couch/src/couch_file.erl
index 4068872..e96e5c3 100644
--- a/src/couch/src/couch_file.erl
+++ b/src/couch/src/couch_file.erl
@@ -42,6 +42,7 @@
 -export([append_binary/2, append_binary_md5/2]).
 -export([append_raw_chunk/2, assemble_file_chunk/1, assemble_file_chunk/2]).
 -export([append_term/2, append_term/3, append_term_md5/2, append_term_md5/3]).
+-export([pread_terms/2, pread_binaries/2, pread_iolists/2]).
 -export([append_terms/2, append_terms/3, append_binaries/2]).
 -export([write_header/2, read_header/1]).
 -export([delete/2, delete/3, nuke_dir/2, init_delete_dir/1]).
@@ -120,15 +121,6 @@ append_term_md5(Fd, Term, Options) ->
     Comp = couch_util:get_value(compression, Options, ?DEFAULT_COMPRESSION),
     append_binary_md5(Fd, couch_compress:compress(Term, Comp)).
 
-append_terms(Fd, Terms) ->
-    append_terms(Fd, Terms, []).
-
-append_terms(Fd, Terms, Options) ->
-    Comp = couch_util:get_value(compression, Options, ?DEFAULT_COMPRESSION),
-    Bins = lists:map(fun(Term) ->
-        couch_compress:compress(Term, Comp)
-    end, Terms),
-    append_binaries(Fd, Bins).
 
 %%----------------------------------------------------------------------
 %% Purpose: To append an Erlang binary to the end of the file.
@@ -146,10 +138,6 @@ append_binary_md5(Fd, Bin) ->
         {append_bin, assemble_file_chunk(Bin, crypto:hash(md5, Bin))},
         erlang:get(io_priority)).
 
-append_binaries(Fd, Bins) ->
-    WriteBins = lists:map(fun assemble_file_chunk/1, Bins),
-    ioq:call(Fd, {append_bins, WriteBins}, erlang:get(io_priority)).
-
 append_raw_chunk(Fd, Chunk) ->
     ioq:call(Fd, {append_bin, Chunk}, erlang:get(io_priority)).
 
@@ -187,21 +175,55 @@ pread_binary(Fd, Pos) ->
 
 pread_iolist(Fd, Pos) ->
     case ioq:call(Fd, {pread_iolist, Pos}, erlang:get(io_priority)) of
-    {ok, IoList, <<>>} ->
-        {ok, IoList};
-    {ok, IoList, Md5} ->
-        case crypto:hash(md5, IoList) of
-        Md5 ->
-            {ok, IoList};
-        _ ->
-            couch_log:emergency("File corruption in ~p at position ~B",
-                     [Fd, Pos]),
-            exit({file_corruption, <<"file corruption">>})
-        end;
-    Error ->
-        Error
+        {ok, IoList, Md5} ->
+            {ok, verify_md5(Fd, Pos, IoList, Md5)};
+        Error ->
+            Error
     end.
 
+
+pread_terms(Fd, PosList) ->
+    {ok, Bins} = pread_binaries(Fd, PosList),
+    Terms = lists:map(fun(Bin) ->
+        couch_compress:decompress(Bin)
+    end, Bins),
+    {ok, Terms}.
+
+
+pread_binaries(Fd, PosList) ->
+    {ok, Data} = pread_iolists(Fd, PosList),
+    {ok, lists:map(fun erlang:iolist_to_binary/1, Data)}.
+
+
+pread_iolists(Fd, PosList) ->
+    case ioq:call(Fd, {pread_iolists, PosList}, erlang:get(io_priority)) of
+        {ok, DataMd5s} ->
+            Data = lists:zipwith(fun({Pos, _}, {IoList, Md5}) ->
+                verify_md5(Fd, Pos, IoList, Md5)
+            end, PosList, DataMd5s),
+            {ok, Data};
+        Error ->
+            Error
+    end.
+
+
+append_terms(Fd, Terms) ->
+    append_terms(Fd, Terms, []).
+
+
+append_terms(Fd, Terms, Options) ->
+    Comp = couch_util:get_value(compression, Options, ?DEFAULT_COMPRESSION),
+    Bins = lists:map(fun(Term) ->
+        couch_compress:compress(Term, Comp)
+    end, Terms),
+    append_binaries(Fd, Bins).
+
+
+append_binaries(Fd, Bins) ->
+    WriteBins = lists:map(fun assemble_file_chunk/1, Bins),
+    ioq:call(Fd, {append_bins, WriteBins}, erlang:get(io_priority)).
+
+
 %%----------------------------------------------------------------------
 %% Purpose: The length of a file, in bytes.
 %% Returns: {ok, Bytes}
@@ -463,6 +485,30 @@ handle_call({pread_iolist, Pos}, _From, File) ->
         {reply, {ok, Iolist, <<>>}, File}
     end;
 
+handle_call({pread_iolists, PosL}, _From, File) ->
+    update_read_timestamp(),
+    LocNums = [{Pos, 4} || Pos <- PosL],
+    DataSizes = read_multi_raw_iolists_int(File, LocNums),
+    LocNums = lists:map(fun({LenIoList, NextPos}) ->
+        case iolist_to_binary(LenIoList) of
+            <<1:1/integer, Len:31/integer>> -> % an MD5-prefixed term
+                {NextPos, Len + 16};
+            <<0:1/integer, Len:31/integer>> ->
+                {NextPos, Len}
+        end
+    end, DataSizes),
+    {ok, Resps} = read_multi_raw_iolists_int(File, LocNums),
+    Extracted = lists:zipwith(fun({LenIoList, _}, {IoList, _}) ->
+        case iolist_to_binary(LenIoList) of
+            <<1:1/integer, _/binary>> ->
+                {Md5, IoList} = extract_md5(IoList),
+                {IoList, Md5};
+            <<0:1/integer, _/binary>> ->
+                {IoList, <<>>}
+        end
+    end, DataSizes, Resps),
+    {reply, {ok, Extracted}, File};
+
 handle_call(bytes, _From, #file{fd = Fd} = File) ->
     {reply, file:position(Fd, eof), File};
 
@@ -652,6 +698,31 @@ read_raw_iolist_int(#file{fd = Fd, pread_limit = Limit} = F, Pos, Len) ->
         {remove_block_prefixes(BlockOffset, RawBin), Size}
     end.
 
+
+read_multi_raw_iolists_int(#file{fd = Fd, pread_limit = Limit} = F, PosLens) ->
+    LocNums = lists:map(fun({Pos, Len}) ->
+        BlockOffset = Pos rem ?SIZE_BLOCK,
+        TotalBytes = calculate_total_read_len(BlockOffset, Len),
+        case Pos + TotalBytes of
+        Size when Size > F#file.eof ->
+            couch_stats:increment_counter([pread, exceed_eof]),
+            {_Fd, Filepath} = get(couch_file_fd),
+            throw({read_beyond_eof, Filepath});
+        Size when Size > Limit ->
+            couch_stats:increment_counter([pread, exceed_limit]),
+            {_Fd, Filepath} = get(couch_file_fd),
+            throw({exceed_pread_limit, Filepath, Limit});
+        _ ->
+            {Pos, TotalBytes}
+        end
+    end, PosLens),
+    {ok, Bins} = file:pread(Fd, LocNums),
+    lists:zipwith(fun({Pos, TotalBytes}, Bin) ->
+        <<RawBin:TotalBytes/binary>> = Bin,
+        {remove_block_prefixes(Pos rem ?SIZE_BLOCK, RawBin), Pos + TotalBytes}
+    end, LocNums, Bins).
+
+
 -spec extract_md5(iolist()) -> {binary(), iolist()}.
 extract_md5(FullIoList) ->
     {Md5List, IoList} = split_iolist(FullIoList, 16, []),
@@ -720,6 +791,22 @@ split_iolist([Byte | Rest], SplitAt, BeginAcc) when is_integer(Byte) ->
     split_iolist(Rest, SplitAt - 1, [Byte | BeginAcc]).
 
 
+
+verify_md5(_Fd, _Pos, IoList, <<>>) ->
+    IoList;
+
+verify_md5(Fd, Pos, IoList, Md5) ->
+    case crypto:hash(md5, IoList) of
+        Md5 -> IoList;
+        _ -> report_md5_error(Fd, Pos)
+    end.
+
+
+report_md5_error(Fd, Pos) ->
+    couch_log:emergency("File corruption in ~p at position ~B", [Fd, Pos]),
+    exit({file_corruption, <<"file corruption">>}).
+
+
 % System dbs aren't monitored by couch_stats_process_tracker
 is_idle(#file{is_sys=true}) ->
     case process_info(self(), monitored_by) of

-- 
To stop receiving notification emails like this one, please contact
"commits@couchdb.apache.org" <co...@couchdb.apache.org>.