You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@couchdb.apache.org by Andy Wenk <an...@nms.de> on 2014/02/11 09:13:31 UTC
Fwd: couch commit: updated refs/heads/2001-feature-external-size to d9d11a9
This is not a rant but a selfish wish :)
I wish all commit messages would be this explanatory like the one below. As
I am not really deep into the erlang code but would love to understand
more, a good way for me is to read all the commit messages. But mostly we
have oneliner. Reading the code is ok but with a explaining message, it is
clearer.
Maybe you remember this when you write your next message to help me and
others :)
Thanks a lot
Cheers
Andy
---------- Forwarded message ----------
From: <da...@apache.org>
Date: 10 February 2014 22:54
Subject: couch commit: updated refs/heads/2001-feature-external-size to
d9d11a9
To: commits@couchdb.apache.org
Updated Branches:
refs/heads/2001-feature-external-size [created] d9d11a943
Implement database external size calculations
This patch adds calculations to show the "external size" of a database
which is roughly a measure of how much disk space it would take to store
the contents of the database in flat files. It is used to calculate
rough compression ratios for capacity planning.
COUCHDB-2001
Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/d9d11a94
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/d9d11a94
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/d9d11a94
Branch: refs/heads/2001-feature-external-size
Commit: d9d11a943e2815897ee5b5b3d1b1eef4b417c1fa
Parents: 09c6556
Author: Paul J. Davis <pa...@gmail.com>
Authored: Mon Feb 10 14:03:47 2014 -0600
Committer: Paul J. Davis <pa...@gmail.com>
Committed: Mon Feb 10 15:32:21 2014 -0600
----------------------------------------------------------------------
include/couch_db.hrl | 5 +-
src/couch_btree.erl | 10 +-
src/couch_compress.erl | 10 ++
src/couch_db.erl | 56 +++++-----
src/couch_db_updater.erl | 237 ++++++++++++++++++++++++++----------------
5 files changed, 196 insertions(+), 122 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/d9d11a94/include/couch_db.hrl
----------------------------------------------------------------------
diff --git a/include/couch_db.hrl b/include/couch_db.hrl
index 2c015df..2ce5ebe 100644
--- a/include/couch_db.hrl
+++ b/include/couch_db.hrl
@@ -65,7 +65,7 @@
update_seq = 0,
deleted = false,
rev_tree = [],
- leafs_size = 0
+ sizes = {0, 0}
}).
-record(httpd,
@@ -251,6 +251,7 @@
deleted,
ptr,
seq,
- size = nil
+ sizes = {0, 0},
+ atts = []
}).
http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/d9d11a94/src/couch_btree.erl
----------------------------------------------------------------------
diff --git a/src/couch_btree.erl b/src/couch_btree.erl
index 9caceb8..ac5681d 100644
--- a/src/couch_btree.erl
+++ b/src/couch_btree.erl
@@ -61,8 +61,8 @@ final_reduce(#btree{reduce=Reduce}, Val) ->
final_reduce(Reduce, Val);
final_reduce(Reduce, {[], []}) ->
Reduce(reduce, []);
-final_reduce(_Bt, {[], [Red]}) ->
- Red;
+final_reduce(Reduce, {[], [Red]}) ->
+ Reduce(rereduce, [Red]);
final_reduce(Reduce, {[], Reductions}) ->
Reduce(rereduce, Reductions);
final_reduce(Reduce, {KVs, Reductions}) ->
@@ -92,14 +92,14 @@ fold_reduce(#btree{root=Root}=Bt, Fun, Acc, Options) ->
full_reduce(#btree{root=nil,reduce=Reduce}) ->
{ok, Reduce(reduce, [])};
-full_reduce(#btree{root=Root}) ->
- {ok, element(2, Root)}.
+full_reduce(#btree{root=Root, reduce=Reduce}) ->
+ {ok, Reduce(rereduce, [element(2, Root)])}.
size(#btree{root = nil}) ->
0;
size(#btree{root = {_P, _Red}}) ->
% pre 1.2 format
- nil;
+ undefined;
size(#btree{root = {_P, _Red, Size}}) ->
Size.
http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/d9d11a94/src/couch_compress.erl
----------------------------------------------------------------------
diff --git a/src/couch_compress.erl b/src/couch_compress.erl
index 6b47a7a..49876d8 100644
--- a/src/couch_compress.erl
+++ b/src/couch_compress.erl
@@ -14,6 +14,7 @@
-export([compress/2, decompress/1, is_compressed/2]).
-export([get_compression_method/0]).
+-export([uncompressed_length/1]).
-include_lib("couch/include/couch_db.hrl").
@@ -82,3 +83,12 @@ is_compressed(<<?TERM_PREFIX, _/binary>>, Method) ->
is_compressed(Term, _Method) when not is_binary(Term) ->
false.
+
+uncompressed_length(<<?SNAPPY_PREFIX, _/binary>> = Bin) ->
+ snappy:uncompressed_length(Bin);
+uncompressed_length(<<?COMPRESSED_TERM_PREFIX, _/binary>> = Bin) ->
+ <<131, 80, Size:4/big-unsigned-integer-unit:8, _/binary>> = Bin,
+ Size;
+uncompressed_length(<<?TERM_PREFIX, _/binary>> = Bin) ->
+ size(Bin).
+
http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/d9d11a94/src/couch_db.erl
----------------------------------------------------------------------
diff --git a/src/couch_db.erl b/src/couch_db.erl
index 32a0049..6432e8e 100644
--- a/src/couch_db.erl
+++ b/src/couch_db.erl
@@ -302,44 +302,48 @@ get_db_info(Db) ->
name=Name,
instance_start_time=StartTime,
committed_update_seq=CommittedUpdateSeq,
- id_tree = IdBtree,
- seq_tree = SeqBtree,
- local_tree = LocalBtree
+ id_tree = IdBtree
} = Db,
- {ok, Size} = couch_file:bytes(Fd),
- {ok, DbReduction} = couch_btree:full_reduce(IdBtree),
+ {ok, FileSize} = couch_file:bytes(Fd),
+ {ok, {Count, DelCount, Sizes}} = couch_btree:full_reduce(IdBtree),
+ {ActiveSize0, ExternalSize} = Sizes,
+ ActiveSize = active_size(Db, ActiveSize0),
InfoList = [
{db_name, Name},
- {doc_count, element(1, DbReduction)},
- {doc_del_count, element(2, DbReduction)},
+ {doc_count, Count},
+ {doc_del_count, DelCount},
{update_seq, SeqNum},
{purge_seq, couch_db:get_purge_seq(Db)},
{compact_running, Compactor/=nil},
- {disk_size, Size},
- {data_size, db_data_size(DbReduction, [SeqBtree, IdBtree,
LocalBtree])},
+ {disk_size, FileSize},
+ {data_size, ActiveSize},
+ {sizes, {[
+ {file, FileSize},
+ {active, ActiveSize},
+ {external, ExternalSize}
+ ]}},
{instance_start_time, StartTime},
{disk_format_version, DiskVersion},
{committed_update_seq, CommittedUpdateSeq}
],
{ok, InfoList}.
-db_data_size({_Count, _DelCount}, _Trees) ->
- % pre 1.2 format, upgraded on compaction
- null;
-db_data_size({_Count, _DelCount, nil}, _Trees) ->
- null;
-db_data_size({_Count, _DelCount, DocAndAttsSize}, Trees) ->
- sum_tree_sizes(DocAndAttsSize, Trees).
-
-sum_tree_sizes(Acc, []) ->
- Acc;
-sum_tree_sizes(Acc, [T | Rest]) ->
- case couch_btree:size(T) of
- nil ->
- null;
- Sz ->
- sum_tree_sizes(Acc + Sz, Rest)
- end.
+active_size(#db{}=Db, DocActiveSize) ->
+ Trees = [
+ Db#db.id_tree,
+ Db#db.seq_tree,
+ Db#db.local_tree
+ ],
+ lists:foldl(fun(T, Acc) ->
+ case couch_btree:size(T) of
+ _ when Acc == null ->
+ null;
+ undefined ->
+ null;
+ Size ->
+ Acc + Size
+ end
+ end, DocActiveSize, Trees).
get_design_docs(#db{name = <<"shards/", _:18/binary, DbName/binary>>}) ->
{_, Ref} = spawn_monitor(fun() -> exit(fabric:design_docs(DbName))
end),
http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/d9d11a94/src/couch_db_updater.erl
----------------------------------------------------------------------
diff --git a/src/couch_db_updater.erl b/src/couch_db_updater.erl
index 649826a..eb75177 100644
--- a/src/couch_db_updater.erl
+++ b/src/couch_db_updater.erl
@@ -342,40 +342,60 @@ collect_updates(GroupedDocsAcc, ClientsAcc,
MergeConflicts, FullCommit) ->
end.
rev_tree(DiskTree) ->
- couch_key_tree:mapfold(fun
- (_RevId, {IsDeleted, BodyPointer, UpdateSeq}, leaf, _Acc) ->
+ couch_key_tree:map(fun
+ (_RevId, {Del, Ptr, Seq}) ->
% pre 1.2 format, will be upgraded on compaction
- {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer,
seq=UpdateSeq}, nil};
- (_RevId, {IsDeleted, BodyPointer, UpdateSeq}, branch, Acc) ->
- {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer,
seq=UpdateSeq}, Acc};
- (_RevId, {IsDeleted, BodyPointer, UpdateSeq, Size}, leaf, Acc) ->
- Acc2 = sum_leaf_sizes(Acc, Size),
- {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer,
seq=UpdateSeq, size=Size}, Acc2};
- (_RevId, {IsDeleted, BodyPointer, UpdateSeq, Size}, branch, Acc) ->
- {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer,
seq=UpdateSeq, size=Size}, Acc};
- (_RevId, ?REV_MISSING, _Type, Acc) ->
- {?REV_MISSING, Acc}
- end, 0, DiskTree).
+ #leaf{deleted=?i2b(Del), ptr=Ptr, seq=Seq};
+ (_RevId, {Del, Ptr, Seq, Size}) ->
+ % Pre-bigcouch format, will be upgraded on compaction
+ #leaf{
+ deleted = ?i2b(Del),
+ ptr = Ptr,
+ seq = Seq,
+ sizes = {Size, 0},
+ atts = []
+ };
+ (_RevId, {Del, Ptr, Seq, Sizes, Atts}) ->
+ #leaf{
+ deleted = ?i2b(Del),
+ ptr = Ptr,
+ seq = Seq,
+ sizes = Sizes,
+ atts = Atts
+ };
+ (_RevId, ?REV_MISSING) ->
+ ?REV_MISSING
+ end, DiskTree).
disk_tree(RevTree) ->
couch_key_tree:map(fun
(_RevId, ?REV_MISSING) ->
?REV_MISSING;
- (_RevId, #leaf{deleted=IsDeleted, ptr=BodyPointer, seq=UpdateSeq,
size=Size}) ->
- {?b2i(IsDeleted), BodyPointer, UpdateSeq, Size}
+ (_RevId, #leaf{}=Leaf) ->
+ #leaf{
+ deleted = Del,
+ ptr = Ptr,
+ seq = Seq,
+ sizes = Sizes,
+ atts = Atts
+ } = Leaf,
+ {?b2i(Del), Ptr, Seq, upgrade_sizes(Sizes), Atts}
end, RevTree).
+upgrade_sizes({_, _} = Sizes) ->
+ Sizes;
+upgrade_sizes(S) when is_integer(S) ->
+ {S, 0}.
+
btree_by_seq_split(#full_doc_info{id=Id, update_seq=Seq, deleted=Del,
rev_tree=T}) ->
{Seq, {Id, ?b2i(Del), disk_tree(T)}}.
btree_by_seq_join(Seq, {Id, Del, DiskTree}) when is_integer(Del) ->
- {RevTree, LeafsSize} = rev_tree(DiskTree),
#full_doc_info{
id = Id,
update_seq = Seq,
deleted = ?i2b(Del),
- rev_tree = RevTree,
- leafs_size = LeafsSize
+ rev_tree = rev_tree(DiskTree)
};
btree_by_seq_join(KeySeq, {Id, RevInfos, DeletedRevInfos}) ->
% Older versions stored #doc_info records in the seq_tree.
@@ -389,49 +409,59 @@ btree_by_seq_join(KeySeq, {Id, RevInfos,
DeletedRevInfos}) ->
[#rev_info{rev=Rev,seq=Seq,deleted=true,body_sp = Bp} ||
{Rev, Seq, Bp} <- DeletedRevInfos]}.
-btree_by_id_split(#full_doc_info{id=Id, update_seq=Seq,
- deleted=Deleted, rev_tree=Tree}) ->
- {Id, {Seq, ?b2i(Deleted), disk_tree(Tree)}}.
+btree_by_id_split(#full_doc_info{}=Info) ->
+ #full_doc_info{
+ id = Id,
+ update_seq = Seq,
+ deleted = Del,
+ sizes = Sizes,
+ rev_tree = Tree
+ } = Info,
+ {Id, {Seq, ?b2i(Del), upgrade_sizes(Sizes), disk_tree(Tree)}}.
btree_by_id_join(Id, {HighSeq, Deleted, DiskTree}) ->
- {Tree, LeafsSize} = rev_tree(DiskTree),
+ % Upgrade from pre-BigCouch disk format
+ ActiveSize = couch_key_tree:fold(fun
+ (_RevId, {_Del, _Ptr, _Seq}, _, Acc) ->
+ Acc;
+ (_RevId, {_Del, _Ptr, _Seq, Size}, _, Acc) ->
+ Acc + Size;
+ (_RevId, {_Del, _Ptr, _Seq, Sizes, _Atts}, _, Acc) ->
+ {Active, _} = Sizes,
+ Active + Acc;
+ (_RevId, ?REV_MISSING, _, Acc) ->
+ Acc
+ end, 0, DiskTree),
+ btree_by_id_join(Id, {HighSeq, Deleted, {ActiveSize, 0}, DiskTree});
+
+btree_by_id_join(Id, {HighSeq, Deleted, Sizes, DiskTree}) ->
#full_doc_info{
id = Id,
update_seq = HighSeq,
deleted = ?i2b(Deleted),
- rev_tree = Tree,
- leafs_size = LeafsSize
+ sizes = Sizes,
+ rev_tree = rev_tree(DiskTree)
}.
btree_by_id_reduce(reduce, FullDocInfos) ->
- lists:foldl(
- fun(Info, {NotDeleted, Deleted, Size}) ->
- Size2 = sum_leaf_sizes(Size, Info#full_doc_info.leafs_size),
- case Info#full_doc_info.deleted of
- true ->
- {NotDeleted, Deleted + 1, Size2};
- false ->
- {NotDeleted + 1, Deleted, Size2}
- end
- end,
- {0, 0, 0}, FullDocInfos);
-btree_by_id_reduce(rereduce, Reds) ->
- lists:foldl(
- fun({NotDeleted, Deleted}, {AccNotDeleted, AccDeleted, _AccSize})
->
- % pre 1.2 format, will be upgraded on compaction
- {AccNotDeleted + NotDeleted, AccDeleted + Deleted, nil};
- ({NotDeleted, Deleted, Size}, {AccNotDeleted, AccDeleted,
AccSize}) ->
- AccSize2 = sum_leaf_sizes(AccSize, Size),
- {AccNotDeleted + NotDeleted, AccDeleted + Deleted, AccSize2}
- end,
- {0, 0, 0}, Reds).
-
-sum_leaf_sizes(nil, _) ->
- nil;
-sum_leaf_sizes(_, nil) ->
- nil;
-sum_leaf_sizes(Size1, Size2) ->
- Size1 + Size2.
+ lists:foldl(fun
+ (#full_doc_info{deleted=false, sizes=Sizes}, {NotDel, Del, SAcc})
->
+ {NotDel + 1, Del, reduce_sizes(Sizes, SAcc)};
+ (#full_doc_info{deleted=true, sizes=Sizes}, {NotDel, Del, SAcc}) ->
+ {NotDel, Del + 1, reduce_sizes(Sizes, SAcc)}
+ end, {0, 0, {0, 0}}, FullDocInfos);
+btree_by_id_reduce(rereduce, Reductions) ->
+ lists:foldl(fun
+ ({NotDel, Del}, {NDAcc, DAcc, SAcc}) ->
+ {NotDel + NDAcc, Del + DAcc, SAcc};
+ ({NotDel, Del, Sizes}, {NDAcc, DAcc, SAcc}) ->
+ {NotDel + NDAcc, Del + DAcc, reduce_sizes(Sizes, SAcc)}
+ end, {0, 0, {0, 0}}, Reductions).
+
+reduce_sizes({A1, E1}, {A2, E2}) ->
+ {A1 + A2, E1 + E2};
+reduce_sizes(S, {_, _} = Acc) when is_integer(S) ->
+ reduce_sizes({S, 0}, Acc).
btree_by_seq_reduce(reduce, DocInfos) ->
% count the number of documents
@@ -549,10 +579,15 @@ flush_trees(_Db, [], AccFlushedTrees) ->
flush_trees(#db{fd = Fd} = Db,
[InfoUnflushed | RestUnflushed], AccFlushed) ->
#full_doc_info{update_seq=UpdateSeq, rev_tree=Unflushed} =
InfoUnflushed,
- {Flushed, LeafsSize} = couch_key_tree:mapfold(
+ {Flushed, FinalAcc} = couch_key_tree:mapfold(
fun(_Rev, Value, Type, Acc) ->
case Value of
- #doc{deleted = IsDeleted, body = {summary, Summary, AttsFd}} ->
+ #doc{} = Doc ->
+ #doc{
+ deleted = IsDeleted,
+ body = {summary, Summary, AttsFd},
+ atts = Atts
+ } = Doc,
% this node value is actually an unwritten document
summary,
% write to disk.
% make sure the Fd in the written bins is the same Fd we
are
@@ -571,31 +606,44 @@ flush_trees(#db{fd = Fd} = Db,
" changed. Possibly retrying.", []),
throw(retry)
end,
- {ok, NewSummaryPointer, SummarySize} =
- couch_file:append_raw_chunk(Fd, Summary),
- TotalSize = lists:foldl(
- fun(#att{att_len = L}, A) -> A + L end,
- SummarySize, Value#doc.atts),
- NewValue = #leaf{deleted=IsDeleted, ptr=NewSummaryPointer,
- seq=UpdateSeq, size=TotalSize},
- case Type of
- leaf ->
- {NewValue, Acc + TotalSize};
- branch ->
- {NewValue, Acc}
- end;
- {_, _, _, LeafSize} when Type =:= leaf, LeafSize =/= nil ->
- {Value, Acc + LeafSize};
- _ ->
+ AttsInfo = lists:usort([
+ {P, L} || #att{data = {_, P}, att_len = L} <- Atts
+ ]),
+ [_, _, SummaryBin] = Summary,
+ ExternalSize =
couch_compress:uncompressed_length(SummaryBin),
+ {ok, NewPtr, ActiveSize}
+ = couch_file:append_raw_chunk(Fd, Summary),
+ Leaf = #leaf{
+ deleted = IsDeleted,
+ ptr = NewPtr,
+ seq = UpdateSeq,
+ sizes = {ActiveSize, ExternalSize},
+ atts = AttsInfo
+ },
+ {Leaf, add_sizes(Type, Leaf, Acc)};
+ #leaf{} = Leaf ->
+ {Value, add_sizes(Type, Leaf, Acc)};
+ ?REV_MISSING ->
{Value, Acc}
end
- end, 0, Unflushed),
+ end, {0, 0, []}, Unflushed),
+ {FinalAS, FinalES, FinalAtts} = FinalAcc,
+ TotalAttSize = lists:foldl(fun({_, S}, A) -> S + A end, 0, FinalAtts),
InfoFlushed = InfoUnflushed#full_doc_info{
rev_tree = Flushed,
- leafs_size = LeafsSize
+ sizes = {FinalAS + TotalAttSize, FinalES + TotalAttSize}
},
flush_trees(Db, RestUnflushed, [InfoFlushed | AccFlushed]).
+add_sizes(branch, _, Acc) ->
+ Acc;
+add_sizes(leaf, #leaf{sizes=Sizes, atts=AttSizes}, Acc) ->
+ {ActiveSize, ExternalSize} = upgrade_sizes(Sizes),
+ {ASAcc, ESAcc, AttsAcc} = Acc,
+ NewASAcc = ActiveSize + ASAcc,
+ NewESAcc = ExternalSize + ESAcc,
+ NewAttsAcc = lists:umerge(AttSizes, AttsAcc),
+ {NewASAcc, NewESAcc, NewAttsAcc}.
send_result(Client, Ref, NewResult) ->
% used to send a result to the client
@@ -896,23 +944,34 @@ copy_docs(Db, #db{fd = DestFd} = NewDb, MixedInfos,
Retry) ->
A =< B
end, merge_lookups(MixedInfos, LookupResults)),
- NewInfos1 = lists:map(
- fun(#full_doc_info{rev_tree=RevTree}=Info) ->
- Info#full_doc_info{rev_tree=couch_key_tree:map(
- fun(_, _, branch) ->
- ?REV_MISSING;
- (_Rev, #leaf{ptr=Sp}=Leaf, leaf) ->
- {_Body, AttsInfo} = Summary = copy_doc_attachments(
- Db, Sp, DestFd),
- SummaryChunk = make_doc_summary(NewDb, Summary),
- {ok, Pos, SummarySize} = couch_file:append_raw_chunk(
- DestFd, SummaryChunk),
- TotalLeafSize = lists:foldl(
- fun({_, _, _, AttLen, _, _, _, _}, S) -> S +
AttLen end,
- SummarySize, AttsInfo),
- Leaf#leaf{ptr=Pos, size=TotalLeafSize}
- end, RevTree)}
- end, NewInfos0),
+ NewInfos1 = lists:map(fun(Info) ->
+ {NewRevTree, FinalAcc} = couch_key_tree:mapfold(fun
+ (_Rev, #leaf{ptr=Sp}=Leaf, leaf, SizesAcc) ->
+ {Body, AttInfos} = copy_doc_attachments(Db, Sp, DestFd),
+ Summary = make_doc_summary(NewDb, {Body, AttInfos}),
+ [_, _, SummaryBin] = Summary,
+ ExternalSize =
couch_compress:uncompressed_length(SummaryBin),
+ {ok, Pos, ActiveSize}
+ = couch_file:append_raw_chunk(DestFd, Summary),
+ AttSizes = [{element(3, A), element(4, A)} || A <-
AttInfos],
+ NewLeaf = Leaf#leaf{
+ ptr = Pos,
+ sizes = {ActiveSize, ExternalSize},
+ atts = lists:usort(AttSizes)
+ },
+ {NewLeaf, add_sizes(leaf, NewLeaf, SizesAcc)};
+ (_Rev, _Value, branch, SizesAcc) ->
+ {?REV_MISSING, SizesAcc}
+ end, {0, 0, []}, Info#full_doc_info.rev_tree),
+ {FinalAS, FinalES, FinalAtts} = FinalAcc,
+ TotalAttSize = lists:foldl(fun({_, S}, A) -> S + A end, 0,
FinalAtts),
+ NewActiveSize = FinalAS + TotalAttSize,
+ NewExternalSize = FinalES + TotalAttSize,
+ Info#full_doc_info{
+ rev_tree = NewRevTree,
+ sizes = {NewActiveSize, NewExternalSize}
+ }
+ end, NewInfos0),
NewInfos = stem_full_doc_infos(Db, NewInfos1),
RemoveSeqs =
--
Andy Wenk
Hamburg - Germany
RockIt!
http://www.couchdb-buch.de
http://www.pg-praxisbuch.de
GPG fingerprint: C044 8322 9E12 1483 4FEC 9452 B65D 6BE3 9ED3 9588
https://people.apache.org/keys/committer/andywenk.asc
Re: couch commit: updated refs/heads/2001-feature-external-size to d9d11a9
Posted by Andy Wenk <an...@nms.de>.
awesome! Thanks Bob
On 11 February 2014 10:51, Robert Samuel Newson <rn...@apache.org> wrote:
> Clear commit messages are something we’ve committed to in the past and
> will be a required practice once we get the merges nailed down. The review
> process will include ensuring that commits follow the standard we’ve agreed
> to.
>
> B.
>
> On 11 Feb 2014, at 08:13, Andy Wenk <an...@nms.de> wrote:
>
> > This is not a rant but a selfish wish :)
> >
> > I wish all commit messages would be this explanatory like the one below.
> As
> > I am not really deep into the erlang code but would love to understand
> > more, a good way for me is to read all the commit messages. But mostly we
> > have oneliner. Reading the code is ok but with a explaining message, it
> is
> > clearer.
> >
> > Maybe you remember this when you write your next message to help me and
> > others :)
> >
> > Thanks a lot
> >
> > Cheers
> >
> > Andy
> >
> > ---------- Forwarded message ----------
> > From: <da...@apache.org>
> > Date: 10 February 2014 22:54
> > Subject: couch commit: updated refs/heads/2001-feature-external-size to
> > d9d11a9
> > To: commits@couchdb.apache.org
> >
> >
> > Updated Branches:
> > refs/heads/2001-feature-external-size [created] d9d11a943
> >
> >
> > Implement database external size calculations
> >
> > This patch adds calculations to show the "external size" of a database
> > which is roughly a measure of how much disk space it would take to store
> > the contents of the database in flat files. It is used to calculate
> > rough compression ratios for capacity planning.
> >
> > COUCHDB-2001
> >
> >
> > Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
> > Commit:
> http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/d9d11a94
> > Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/d9d11a94
> > Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/d9d11a94
> >
> > Branch: refs/heads/2001-feature-external-size
> > Commit: d9d11a943e2815897ee5b5b3d1b1eef4b417c1fa
> > Parents: 09c6556
> > Author: Paul J. Davis <pa...@gmail.com>
> > Authored: Mon Feb 10 14:03:47 2014 -0600
> > Committer: Paul J. Davis <pa...@gmail.com>
> > Committed: Mon Feb 10 15:32:21 2014 -0600
> >
> > ----------------------------------------------------------------------
> > include/couch_db.hrl | 5 +-
> > src/couch_btree.erl | 10 +-
> > src/couch_compress.erl | 10 ++
> > src/couch_db.erl | 56 +++++-----
> > src/couch_db_updater.erl | 237 ++++++++++++++++++++++++++----------------
> > 5 files changed, 196 insertions(+), 122 deletions(-)
> > ----------------------------------------------------------------------
> >
> >
> >
> http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/d9d11a94/include/couch_db.hrl
> > ----------------------------------------------------------------------
> > diff --git a/include/couch_db.hrl b/include/couch_db.hrl
> > index 2c015df..2ce5ebe 100644
> > --- a/include/couch_db.hrl
> > +++ b/include/couch_db.hrl
> > @@ -65,7 +65,7 @@
> > update_seq = 0,
> > deleted = false,
> > rev_tree = [],
> > - leafs_size = 0
> > + sizes = {0, 0}
> > }).
> >
> > -record(httpd,
> > @@ -251,6 +251,7 @@
> > deleted,
> > ptr,
> > seq,
> > - size = nil
> > + sizes = {0, 0},
> > + atts = []
> > }).
> >
> >
> >
> http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/d9d11a94/src/couch_btree.erl
> > ----------------------------------------------------------------------
> > diff --git a/src/couch_btree.erl b/src/couch_btree.erl
> > index 9caceb8..ac5681d 100644
> > --- a/src/couch_btree.erl
> > +++ b/src/couch_btree.erl
> > @@ -61,8 +61,8 @@ final_reduce(#btree{reduce=Reduce}, Val) ->
> > final_reduce(Reduce, Val);
> > final_reduce(Reduce, {[], []}) ->
> > Reduce(reduce, []);
> > -final_reduce(_Bt, {[], [Red]}) ->
> > - Red;
> > +final_reduce(Reduce, {[], [Red]}) ->
> > + Reduce(rereduce, [Red]);
> > final_reduce(Reduce, {[], Reductions}) ->
> > Reduce(rereduce, Reductions);
> > final_reduce(Reduce, {KVs, Reductions}) ->
> > @@ -92,14 +92,14 @@ fold_reduce(#btree{root=Root}=Bt, Fun, Acc, Options)
> ->
> >
> > full_reduce(#btree{root=nil,reduce=Reduce}) ->
> > {ok, Reduce(reduce, [])};
> > -full_reduce(#btree{root=Root}) ->
> > - {ok, element(2, Root)}.
> > +full_reduce(#btree{root=Root, reduce=Reduce}) ->
> > + {ok, Reduce(rereduce, [element(2, Root)])}.
> >
> > size(#btree{root = nil}) ->
> > 0;
> > size(#btree{root = {_P, _Red}}) ->
> > % pre 1.2 format
> > - nil;
> > + undefined;
> > size(#btree{root = {_P, _Red, Size}}) ->
> > Size.
> >
> >
> >
> http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/d9d11a94/src/couch_compress.erl
> > ----------------------------------------------------------------------
> > diff --git a/src/couch_compress.erl b/src/couch_compress.erl
> > index 6b47a7a..49876d8 100644
> > --- a/src/couch_compress.erl
> > +++ b/src/couch_compress.erl
> > @@ -14,6 +14,7 @@
> >
> > -export([compress/2, decompress/1, is_compressed/2]).
> > -export([get_compression_method/0]).
> > +-export([uncompressed_length/1]).
> >
> > -include_lib("couch/include/couch_db.hrl").
> >
> > @@ -82,3 +83,12 @@ is_compressed(<<?TERM_PREFIX, _/binary>>, Method) ->
> > is_compressed(Term, _Method) when not is_binary(Term) ->
> > false.
> >
> > +
> > +uncompressed_length(<<?SNAPPY_PREFIX, _/binary>> = Bin) ->
> > + snappy:uncompressed_length(Bin);
> > +uncompressed_length(<<?COMPRESSED_TERM_PREFIX, _/binary>> = Bin) ->
> > + <<131, 80, Size:4/big-unsigned-integer-unit:8, _/binary>> = Bin,
> > + Size;
> > +uncompressed_length(<<?TERM_PREFIX, _/binary>> = Bin) ->
> > + size(Bin).
> > +
> >
> >
> http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/d9d11a94/src/couch_db.erl
> > ----------------------------------------------------------------------
> > diff --git a/src/couch_db.erl b/src/couch_db.erl
> > index 32a0049..6432e8e 100644
> > --- a/src/couch_db.erl
> > +++ b/src/couch_db.erl
> > @@ -302,44 +302,48 @@ get_db_info(Db) ->
> > name=Name,
> > instance_start_time=StartTime,
> > committed_update_seq=CommittedUpdateSeq,
> > - id_tree = IdBtree,
> > - seq_tree = SeqBtree,
> > - local_tree = LocalBtree
> > + id_tree = IdBtree
> > } = Db,
> > - {ok, Size} = couch_file:bytes(Fd),
> > - {ok, DbReduction} = couch_btree:full_reduce(IdBtree),
> > + {ok, FileSize} = couch_file:bytes(Fd),
> > + {ok, {Count, DelCount, Sizes}} = couch_btree:full_reduce(IdBtree),
> > + {ActiveSize0, ExternalSize} = Sizes,
> > + ActiveSize = active_size(Db, ActiveSize0),
> > InfoList = [
> > {db_name, Name},
> > - {doc_count, element(1, DbReduction)},
> > - {doc_del_count, element(2, DbReduction)},
> > + {doc_count, Count},
> > + {doc_del_count, DelCount},
> > {update_seq, SeqNum},
> > {purge_seq, couch_db:get_purge_seq(Db)},
> > {compact_running, Compactor/=nil},
> > - {disk_size, Size},
> > - {data_size, db_data_size(DbReduction, [SeqBtree, IdBtree,
> > LocalBtree])},
> > + {disk_size, FileSize},
> > + {data_size, ActiveSize},
> > + {sizes, {[
> > + {file, FileSize},
> > + {active, ActiveSize},
> > + {external, ExternalSize}
> > + ]}},
> > {instance_start_time, StartTime},
> > {disk_format_version, DiskVersion},
> > {committed_update_seq, CommittedUpdateSeq}
> > ],
> > {ok, InfoList}.
> >
> > -db_data_size({_Count, _DelCount}, _Trees) ->
> > - % pre 1.2 format, upgraded on compaction
> > - null;
> > -db_data_size({_Count, _DelCount, nil}, _Trees) ->
> > - null;
> > -db_data_size({_Count, _DelCount, DocAndAttsSize}, Trees) ->
> > - sum_tree_sizes(DocAndAttsSize, Trees).
> > -
> > -sum_tree_sizes(Acc, []) ->
> > - Acc;
> > -sum_tree_sizes(Acc, [T | Rest]) ->
> > - case couch_btree:size(T) of
> > - nil ->
> > - null;
> > - Sz ->
> > - sum_tree_sizes(Acc + Sz, Rest)
> > - end.
> > +active_size(#db{}=Db, DocActiveSize) ->
> > + Trees = [
> > + Db#db.id_tree,
> > + Db#db.seq_tree,
> > + Db#db.local_tree
> > + ],
> > + lists:foldl(fun(T, Acc) ->
> > + case couch_btree:size(T) of
> > + _ when Acc == null ->
> > + null;
> > + undefined ->
> > + null;
> > + Size ->
> > + Acc + Size
> > + end
> > + end, DocActiveSize, Trees).
> >
> > get_design_docs(#db{name = <<"shards/", _:18/binary, DbName/binary>>}) ->
> > {_, Ref} = spawn_monitor(fun() -> exit(fabric:design_docs(DbName))
> > end),
> >
> >
> http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/d9d11a94/src/couch_db_updater.erl
> > ----------------------------------------------------------------------
> > diff --git a/src/couch_db_updater.erl b/src/couch_db_updater.erl
> > index 649826a..eb75177 100644
> > --- a/src/couch_db_updater.erl
> > +++ b/src/couch_db_updater.erl
> > @@ -342,40 +342,60 @@ collect_updates(GroupedDocsAcc, ClientsAcc,
> > MergeConflicts, FullCommit) ->
> > end.
> >
> > rev_tree(DiskTree) ->
> > - couch_key_tree:mapfold(fun
> > - (_RevId, {IsDeleted, BodyPointer, UpdateSeq}, leaf, _Acc) ->
> > + couch_key_tree:map(fun
> > + (_RevId, {Del, Ptr, Seq}) ->
> > % pre 1.2 format, will be upgraded on compaction
> > - {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer,
> > seq=UpdateSeq}, nil};
> > - (_RevId, {IsDeleted, BodyPointer, UpdateSeq}, branch, Acc) ->
> > - {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer,
> > seq=UpdateSeq}, Acc};
> > - (_RevId, {IsDeleted, BodyPointer, UpdateSeq, Size}, leaf, Acc)
> ->
> > - Acc2 = sum_leaf_sizes(Acc, Size),
> > - {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer,
> > seq=UpdateSeq, size=Size}, Acc2};
> > - (_RevId, {IsDeleted, BodyPointer, UpdateSeq, Size}, branch,
> Acc) ->
> > - {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer,
> > seq=UpdateSeq, size=Size}, Acc};
> > - (_RevId, ?REV_MISSING, _Type, Acc) ->
> > - {?REV_MISSING, Acc}
> > - end, 0, DiskTree).
> > + #leaf{deleted=?i2b(Del), ptr=Ptr, seq=Seq};
> > + (_RevId, {Del, Ptr, Seq, Size}) ->
> > + % Pre-bigcouch format, will be upgraded on compaction
> > + #leaf{
> > + deleted = ?i2b(Del),
> > + ptr = Ptr,
> > + seq = Seq,
> > + sizes = {Size, 0},
> > + atts = []
> > + };
> > + (_RevId, {Del, Ptr, Seq, Sizes, Atts}) ->
> > + #leaf{
> > + deleted = ?i2b(Del),
> > + ptr = Ptr,
> > + seq = Seq,
> > + sizes = Sizes,
> > + atts = Atts
> > + };
> > + (_RevId, ?REV_MISSING) ->
> > + ?REV_MISSING
> > + end, DiskTree).
> >
> > disk_tree(RevTree) ->
> > couch_key_tree:map(fun
> > (_RevId, ?REV_MISSING) ->
> > ?REV_MISSING;
> > - (_RevId, #leaf{deleted=IsDeleted, ptr=BodyPointer,
> seq=UpdateSeq,
> > size=Size}) ->
> > - {?b2i(IsDeleted), BodyPointer, UpdateSeq, Size}
> > + (_RevId, #leaf{}=Leaf) ->
> > + #leaf{
> > + deleted = Del,
> > + ptr = Ptr,
> > + seq = Seq,
> > + sizes = Sizes,
> > + atts = Atts
> > + } = Leaf,
> > + {?b2i(Del), Ptr, Seq, upgrade_sizes(Sizes), Atts}
> > end, RevTree).
> >
> > +upgrade_sizes({_, _} = Sizes) ->
> > + Sizes;
> > +upgrade_sizes(S) when is_integer(S) ->
> > + {S, 0}.
> > +
> > btree_by_seq_split(#full_doc_info{id=Id, update_seq=Seq, deleted=Del,
> > rev_tree=T}) ->
> > {Seq, {Id, ?b2i(Del), disk_tree(T)}}.
> >
> > btree_by_seq_join(Seq, {Id, Del, DiskTree}) when is_integer(Del) ->
> > - {RevTree, LeafsSize} = rev_tree(DiskTree),
> > #full_doc_info{
> > id = Id,
> > update_seq = Seq,
> > deleted = ?i2b(Del),
> > - rev_tree = RevTree,
> > - leafs_size = LeafsSize
> > + rev_tree = rev_tree(DiskTree)
> > };
> > btree_by_seq_join(KeySeq, {Id, RevInfos, DeletedRevInfos}) ->
> > % Older versions stored #doc_info records in the seq_tree.
> > @@ -389,49 +409,59 @@ btree_by_seq_join(KeySeq, {Id, RevInfos,
> > DeletedRevInfos}) ->
> > [#rev_info{rev=Rev,seq=Seq,deleted=true,body_sp = Bp} ||
> > {Rev, Seq, Bp} <- DeletedRevInfos]}.
> >
> > -btree_by_id_split(#full_doc_info{id=Id, update_seq=Seq,
> > - deleted=Deleted, rev_tree=Tree}) ->
> > - {Id, {Seq, ?b2i(Deleted), disk_tree(Tree)}}.
> > +btree_by_id_split(#full_doc_info{}=Info) ->
> > + #full_doc_info{
> > + id = Id,
> > + update_seq = Seq,
> > + deleted = Del,
> > + sizes = Sizes,
> > + rev_tree = Tree
> > + } = Info,
> > + {Id, {Seq, ?b2i(Del), upgrade_sizes(Sizes), disk_tree(Tree)}}.
> >
> > btree_by_id_join(Id, {HighSeq, Deleted, DiskTree}) ->
> > - {Tree, LeafsSize} = rev_tree(DiskTree),
> > + % Upgrade from pre-BigCouch disk format
> > + ActiveSize = couch_key_tree:fold(fun
> > + (_RevId, {_Del, _Ptr, _Seq}, _, Acc) ->
> > + Acc;
> > + (_RevId, {_Del, _Ptr, _Seq, Size}, _, Acc) ->
> > + Acc + Size;
> > + (_RevId, {_Del, _Ptr, _Seq, Sizes, _Atts}, _, Acc) ->
> > + {Active, _} = Sizes,
> > + Active + Acc;
> > + (_RevId, ?REV_MISSING, _, Acc) ->
> > + Acc
> > + end, 0, DiskTree),
> > + btree_by_id_join(Id, {HighSeq, Deleted, {ActiveSize, 0}, DiskTree});
> > +
> > +btree_by_id_join(Id, {HighSeq, Deleted, Sizes, DiskTree}) ->
> > #full_doc_info{
> > id = Id,
> > update_seq = HighSeq,
> > deleted = ?i2b(Deleted),
> > - rev_tree = Tree,
> > - leafs_size = LeafsSize
> > + sizes = Sizes,
> > + rev_tree = rev_tree(DiskTree)
> > }.
> >
> > btree_by_id_reduce(reduce, FullDocInfos) ->
> > - lists:foldl(
> > - fun(Info, {NotDeleted, Deleted, Size}) ->
> > - Size2 = sum_leaf_sizes(Size, Info#full_doc_info.leafs_size),
> > - case Info#full_doc_info.deleted of
> > - true ->
> > - {NotDeleted, Deleted + 1, Size2};
> > - false ->
> > - {NotDeleted + 1, Deleted, Size2}
> > - end
> > - end,
> > - {0, 0, 0}, FullDocInfos);
> > -btree_by_id_reduce(rereduce, Reds) ->
> > - lists:foldl(
> > - fun({NotDeleted, Deleted}, {AccNotDeleted, AccDeleted,
> _AccSize})
> > ->
> > - % pre 1.2 format, will be upgraded on compaction
> > - {AccNotDeleted + NotDeleted, AccDeleted + Deleted, nil};
> > - ({NotDeleted, Deleted, Size}, {AccNotDeleted, AccDeleted,
> > AccSize}) ->
> > - AccSize2 = sum_leaf_sizes(AccSize, Size),
> > - {AccNotDeleted + NotDeleted, AccDeleted + Deleted, AccSize2}
> > - end,
> > - {0, 0, 0}, Reds).
> > -
> > -sum_leaf_sizes(nil, _) ->
> > - nil;
> > -sum_leaf_sizes(_, nil) ->
> > - nil;
> > -sum_leaf_sizes(Size1, Size2) ->
> > - Size1 + Size2.
> > + lists:foldl(fun
> > + (#full_doc_info{deleted=false, sizes=Sizes}, {NotDel, Del,
> SAcc})
> > ->
> > + {NotDel + 1, Del, reduce_sizes(Sizes, SAcc)};
> > + (#full_doc_info{deleted=true, sizes=Sizes}, {NotDel, Del,
> SAcc}) ->
> > + {NotDel, Del + 1, reduce_sizes(Sizes, SAcc)}
> > + end, {0, 0, {0, 0}}, FullDocInfos);
> > +btree_by_id_reduce(rereduce, Reductions) ->
> > + lists:foldl(fun
> > + ({NotDel, Del}, {NDAcc, DAcc, SAcc}) ->
> > + {NotDel + NDAcc, Del + DAcc, SAcc};
> > + ({NotDel, Del, Sizes}, {NDAcc, DAcc, SAcc}) ->
> > + {NotDel + NDAcc, Del + DAcc, reduce_sizes(Sizes, SAcc)}
> > + end, {0, 0, {0, 0}}, Reductions).
> > +
> > +reduce_sizes({A1, E1}, {A2, E2}) ->
> > + {A1 + A2, E1 + E2};
> > +reduce_sizes(S, {_, _} = Acc) when is_integer(S) ->
> > + reduce_sizes({S, 0}, Acc).
> >
> > btree_by_seq_reduce(reduce, DocInfos) ->
> > % count the number of documents
> > @@ -549,10 +579,15 @@ flush_trees(_Db, [], AccFlushedTrees) ->
> > flush_trees(#db{fd = Fd} = Db,
> > [InfoUnflushed | RestUnflushed], AccFlushed) ->
> > #full_doc_info{update_seq=UpdateSeq, rev_tree=Unflushed} =
> > InfoUnflushed,
> > - {Flushed, LeafsSize} = couch_key_tree:mapfold(
> > + {Flushed, FinalAcc} = couch_key_tree:mapfold(
> > fun(_Rev, Value, Type, Acc) ->
> > case Value of
> > - #doc{deleted = IsDeleted, body = {summary, Summary,
> AttsFd}} ->
> > + #doc{} = Doc ->
> > + #doc{
> > + deleted = IsDeleted,
> > + body = {summary, Summary, AttsFd},
> > + atts = Atts
> > + } = Doc,
> > % this node value is actually an unwritten document
> > summary,
> > % write to disk.
> > % make sure the Fd in the written bins is the same Fd we
> > are
> > @@ -571,31 +606,44 @@ flush_trees(#db{fd = Fd} = Db,
> > " changed. Possibly retrying.", []),
> > throw(retry)
> > end,
> > - {ok, NewSummaryPointer, SummarySize} =
> > - couch_file:append_raw_chunk(Fd, Summary),
> > - TotalSize = lists:foldl(
> > - fun(#att{att_len = L}, A) -> A + L end,
> > - SummarySize, Value#doc.atts),
> > - NewValue = #leaf{deleted=IsDeleted,
> ptr=NewSummaryPointer,
> > - seq=UpdateSeq, size=TotalSize},
> > - case Type of
> > - leaf ->
> > - {NewValue, Acc + TotalSize};
> > - branch ->
> > - {NewValue, Acc}
> > - end;
> > - {_, _, _, LeafSize} when Type =:= leaf, LeafSize =/= nil ->
> > - {Value, Acc + LeafSize};
> > - _ ->
> > + AttsInfo = lists:usort([
> > + {P, L} || #att{data = {_, P}, att_len = L} <-
> Atts
> > + ]),
> > + [_, _, SummaryBin] = Summary,
> > + ExternalSize =
> > couch_compress:uncompressed_length(SummaryBin),
> > + {ok, NewPtr, ActiveSize}
> > + = couch_file:append_raw_chunk(Fd, Summary),
> > + Leaf = #leaf{
> > + deleted = IsDeleted,
> > + ptr = NewPtr,
> > + seq = UpdateSeq,
> > + sizes = {ActiveSize, ExternalSize},
> > + atts = AttsInfo
> > + },
> > + {Leaf, add_sizes(Type, Leaf, Acc)};
> > + #leaf{} = Leaf ->
> > + {Value, add_sizes(Type, Leaf, Acc)};
> > + ?REV_MISSING ->
> > {Value, Acc}
> > end
> > - end, 0, Unflushed),
> > + end, {0, 0, []}, Unflushed),
> > + {FinalAS, FinalES, FinalAtts} = FinalAcc,
> > + TotalAttSize = lists:foldl(fun({_, S}, A) -> S + A end, 0,
> FinalAtts),
> > InfoFlushed = InfoUnflushed#full_doc_info{
> > rev_tree = Flushed,
> > - leafs_size = LeafsSize
> > + sizes = {FinalAS + TotalAttSize, FinalES + TotalAttSize}
> > },
> > flush_trees(Db, RestUnflushed, [InfoFlushed | AccFlushed]).
> >
> > +add_sizes(branch, _, Acc) ->
> > + Acc;
> > +add_sizes(leaf, #leaf{sizes=Sizes, atts=AttSizes}, Acc) ->
> > + {ActiveSize, ExternalSize} = upgrade_sizes(Sizes),
> > + {ASAcc, ESAcc, AttsAcc} = Acc,
> > + NewASAcc = ActiveSize + ASAcc,
> > + NewESAcc = ExternalSize + ESAcc,
> > + NewAttsAcc = lists:umerge(AttSizes, AttsAcc),
> > + {NewASAcc, NewESAcc, NewAttsAcc}.
> >
> > send_result(Client, Ref, NewResult) ->
> > % used to send a result to the client
> > @@ -896,23 +944,34 @@ copy_docs(Db, #db{fd = DestFd} = NewDb, MixedInfos,
> > Retry) ->
> > A =< B
> > end, merge_lookups(MixedInfos, LookupResults)),
> >
> > - NewInfos1 = lists:map(
> > - fun(#full_doc_info{rev_tree=RevTree}=Info) ->
> > - Info#full_doc_info{rev_tree=couch_key_tree:map(
> > - fun(_, _, branch) ->
> > - ?REV_MISSING;
> > - (_Rev, #leaf{ptr=Sp}=Leaf, leaf) ->
> > - {_Body, AttsInfo} = Summary = copy_doc_attachments(
> > - Db, Sp, DestFd),
> > - SummaryChunk = make_doc_summary(NewDb, Summary),
> > - {ok, Pos, SummarySize} =
> couch_file:append_raw_chunk(
> > - DestFd, SummaryChunk),
> > - TotalLeafSize = lists:foldl(
> > - fun({_, _, _, AttLen, _, _, _, _}, S) -> S +
> > AttLen end,
> > - SummarySize, AttsInfo),
> > - Leaf#leaf{ptr=Pos, size=TotalLeafSize}
> > - end, RevTree)}
> > - end, NewInfos0),
> > + NewInfos1 = lists:map(fun(Info) ->
> > + {NewRevTree, FinalAcc} = couch_key_tree:mapfold(fun
> > + (_Rev, #leaf{ptr=Sp}=Leaf, leaf, SizesAcc) ->
> > + {Body, AttInfos} = copy_doc_attachments(Db, Sp, DestFd),
> > + Summary = make_doc_summary(NewDb, {Body, AttInfos}),
> > + [_, _, SummaryBin] = Summary,
> > + ExternalSize =
> > couch_compress:uncompressed_length(SummaryBin),
> > + {ok, Pos, ActiveSize}
> > + = couch_file:append_raw_chunk(DestFd, Summary),
> > + AttSizes = [{element(3, A), element(4, A)} || A <-
> > AttInfos],
> > + NewLeaf = Leaf#leaf{
> > + ptr = Pos,
> > + sizes = {ActiveSize, ExternalSize},
> > + atts = lists:usort(AttSizes)
> > + },
> > + {NewLeaf, add_sizes(leaf, NewLeaf, SizesAcc)};
> > + (_Rev, _Value, branch, SizesAcc) ->
> > + {?REV_MISSING, SizesAcc}
> > + end, {0, 0, []}, Info#full_doc_info.rev_tree),
> > + {FinalAS, FinalES, FinalAtts} = FinalAcc,
> > + TotalAttSize = lists:foldl(fun({_, S}, A) -> S + A end, 0,
> > FinalAtts),
> > + NewActiveSize = FinalAS + TotalAttSize,
> > + NewExternalSize = FinalES + TotalAttSize,
> > + Info#full_doc_info{
> > + rev_tree = NewRevTree,
> > + sizes = {NewActiveSize, NewExternalSize}
> > + }
> > + end, NewInfos0),
> >
> > NewInfos = stem_full_doc_infos(Db, NewInfos1),
> > RemoveSeqs =
> >
> >
> >
> >
> > --
> > Andy Wenk
> > Hamburg - Germany
> > RockIt!
> >
> > http://www.couchdb-buch.de
> > http://www.pg-praxisbuch.de
> >
> > GPG fingerprint: C044 8322 9E12 1483 4FEC 9452 B65D 6BE3 9ED3 9588
> >
> > https://people.apache.org/keys/committer/andywenk.asc
>
>
--
Andy Wenk
Hamburg - Germany
RockIt!
http://www.couchdb-buch.de
http://www.pg-praxisbuch.de
GPG fingerprint: C044 8322 9E12 1483 4FEC 9452 B65D 6BE3 9ED3 9588
https://people.apache.org/keys/committer/andywenk.asc
Re: couch commit: updated refs/heads/2001-feature-external-size to d9d11a9
Posted by Robert Samuel Newson <rn...@apache.org>.
Clear commit messages are something we’ve committed to in the past and will be a required practice once we get the merges nailed down. The review process will include ensuring that commits follow the standard we’ve agreed to.
B.
On 11 Feb 2014, at 08:13, Andy Wenk <an...@nms.de> wrote:
> This is not a rant but a selfish wish :)
>
> I wish all commit messages would be this explanatory like the one below. As
> I am not really deep into the erlang code but would love to understand
> more, a good way for me is to read all the commit messages. But mostly we
> have oneliner. Reading the code is ok but with a explaining message, it is
> clearer.
>
> Maybe you remember this when you write your next message to help me and
> others :)
>
> Thanks a lot
>
> Cheers
>
> Andy
>
> ---------- Forwarded message ----------
> From: <da...@apache.org>
> Date: 10 February 2014 22:54
> Subject: couch commit: updated refs/heads/2001-feature-external-size to
> d9d11a9
> To: commits@couchdb.apache.org
>
>
> Updated Branches:
> refs/heads/2001-feature-external-size [created] d9d11a943
>
>
> Implement database external size calculations
>
> This patch adds calculations to show the "external size" of a database
> which is roughly a measure of how much disk space it would take to store
> the contents of the database in flat files. It is used to calculate
> rough compression ratios for capacity planning.
>
> COUCHDB-2001
>
>
> Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
> Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/d9d11a94
> Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/d9d11a94
> Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/d9d11a94
>
> Branch: refs/heads/2001-feature-external-size
> Commit: d9d11a943e2815897ee5b5b3d1b1eef4b417c1fa
> Parents: 09c6556
> Author: Paul J. Davis <pa...@gmail.com>
> Authored: Mon Feb 10 14:03:47 2014 -0600
> Committer: Paul J. Davis <pa...@gmail.com>
> Committed: Mon Feb 10 15:32:21 2014 -0600
>
> ----------------------------------------------------------------------
> include/couch_db.hrl | 5 +-
> src/couch_btree.erl | 10 +-
> src/couch_compress.erl | 10 ++
> src/couch_db.erl | 56 +++++-----
> src/couch_db_updater.erl | 237 ++++++++++++++++++++++++++----------------
> 5 files changed, 196 insertions(+), 122 deletions(-)
> ----------------------------------------------------------------------
>
>
> http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/d9d11a94/include/couch_db.hrl
> ----------------------------------------------------------------------
> diff --git a/include/couch_db.hrl b/include/couch_db.hrl
> index 2c015df..2ce5ebe 100644
> --- a/include/couch_db.hrl
> +++ b/include/couch_db.hrl
> @@ -65,7 +65,7 @@
> update_seq = 0,
> deleted = false,
> rev_tree = [],
> - leafs_size = 0
> + sizes = {0, 0}
> }).
>
> -record(httpd,
> @@ -251,6 +251,7 @@
> deleted,
> ptr,
> seq,
> - size = nil
> + sizes = {0, 0},
> + atts = []
> }).
>
>
> http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/d9d11a94/src/couch_btree.erl
> ----------------------------------------------------------------------
> diff --git a/src/couch_btree.erl b/src/couch_btree.erl
> index 9caceb8..ac5681d 100644
> --- a/src/couch_btree.erl
> +++ b/src/couch_btree.erl
> @@ -61,8 +61,8 @@ final_reduce(#btree{reduce=Reduce}, Val) ->
> final_reduce(Reduce, Val);
> final_reduce(Reduce, {[], []}) ->
> Reduce(reduce, []);
> -final_reduce(_Bt, {[], [Red]}) ->
> - Red;
> +final_reduce(Reduce, {[], [Red]}) ->
> + Reduce(rereduce, [Red]);
> final_reduce(Reduce, {[], Reductions}) ->
> Reduce(rereduce, Reductions);
> final_reduce(Reduce, {KVs, Reductions}) ->
> @@ -92,14 +92,14 @@ fold_reduce(#btree{root=Root}=Bt, Fun, Acc, Options) ->
>
> full_reduce(#btree{root=nil,reduce=Reduce}) ->
> {ok, Reduce(reduce, [])};
> -full_reduce(#btree{root=Root}) ->
> - {ok, element(2, Root)}.
> +full_reduce(#btree{root=Root, reduce=Reduce}) ->
> + {ok, Reduce(rereduce, [element(2, Root)])}.
>
> size(#btree{root = nil}) ->
> 0;
> size(#btree{root = {_P, _Red}}) ->
> % pre 1.2 format
> - nil;
> + undefined;
> size(#btree{root = {_P, _Red, Size}}) ->
> Size.
>
>
> http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/d9d11a94/src/couch_compress.erl
> ----------------------------------------------------------------------
> diff --git a/src/couch_compress.erl b/src/couch_compress.erl
> index 6b47a7a..49876d8 100644
> --- a/src/couch_compress.erl
> +++ b/src/couch_compress.erl
> @@ -14,6 +14,7 @@
>
> -export([compress/2, decompress/1, is_compressed/2]).
> -export([get_compression_method/0]).
> +-export([uncompressed_length/1]).
>
> -include_lib("couch/include/couch_db.hrl").
>
> @@ -82,3 +83,12 @@ is_compressed(<<?TERM_PREFIX, _/binary>>, Method) ->
> is_compressed(Term, _Method) when not is_binary(Term) ->
> false.
>
> +
> +uncompressed_length(<<?SNAPPY_PREFIX, _/binary>> = Bin) ->
> + snappy:uncompressed_length(Bin);
> +uncompressed_length(<<?COMPRESSED_TERM_PREFIX, _/binary>> = Bin) ->
> + <<131, 80, Size:4/big-unsigned-integer-unit:8, _/binary>> = Bin,
> + Size;
> +uncompressed_length(<<?TERM_PREFIX, _/binary>> = Bin) ->
> + size(Bin).
> +
>
> http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/d9d11a94/src/couch_db.erl
> ----------------------------------------------------------------------
> diff --git a/src/couch_db.erl b/src/couch_db.erl
> index 32a0049..6432e8e 100644
> --- a/src/couch_db.erl
> +++ b/src/couch_db.erl
> @@ -302,44 +302,48 @@ get_db_info(Db) ->
> name=Name,
> instance_start_time=StartTime,
> committed_update_seq=CommittedUpdateSeq,
> - id_tree = IdBtree,
> - seq_tree = SeqBtree,
> - local_tree = LocalBtree
> + id_tree = IdBtree
> } = Db,
> - {ok, Size} = couch_file:bytes(Fd),
> - {ok, DbReduction} = couch_btree:full_reduce(IdBtree),
> + {ok, FileSize} = couch_file:bytes(Fd),
> + {ok, {Count, DelCount, Sizes}} = couch_btree:full_reduce(IdBtree),
> + {ActiveSize0, ExternalSize} = Sizes,
> + ActiveSize = active_size(Db, ActiveSize0),
> InfoList = [
> {db_name, Name},
> - {doc_count, element(1, DbReduction)},
> - {doc_del_count, element(2, DbReduction)},
> + {doc_count, Count},
> + {doc_del_count, DelCount},
> {update_seq, SeqNum},
> {purge_seq, couch_db:get_purge_seq(Db)},
> {compact_running, Compactor/=nil},
> - {disk_size, Size},
> - {data_size, db_data_size(DbReduction, [SeqBtree, IdBtree,
> LocalBtree])},
> + {disk_size, FileSize},
> + {data_size, ActiveSize},
> + {sizes, {[
> + {file, FileSize},
> + {active, ActiveSize},
> + {external, ExternalSize}
> + ]}},
> {instance_start_time, StartTime},
> {disk_format_version, DiskVersion},
> {committed_update_seq, CommittedUpdateSeq}
> ],
> {ok, InfoList}.
>
> -db_data_size({_Count, _DelCount}, _Trees) ->
> - % pre 1.2 format, upgraded on compaction
> - null;
> -db_data_size({_Count, _DelCount, nil}, _Trees) ->
> - null;
> -db_data_size({_Count, _DelCount, DocAndAttsSize}, Trees) ->
> - sum_tree_sizes(DocAndAttsSize, Trees).
> -
> -sum_tree_sizes(Acc, []) ->
> - Acc;
> -sum_tree_sizes(Acc, [T | Rest]) ->
> - case couch_btree:size(T) of
> - nil ->
> - null;
> - Sz ->
> - sum_tree_sizes(Acc + Sz, Rest)
> - end.
> +active_size(#db{}=Db, DocActiveSize) ->
> + Trees = [
> + Db#db.id_tree,
> + Db#db.seq_tree,
> + Db#db.local_tree
> + ],
> + lists:foldl(fun(T, Acc) ->
> + case couch_btree:size(T) of
> + _ when Acc == null ->
> + null;
> + undefined ->
> + null;
> + Size ->
> + Acc + Size
> + end
> + end, DocActiveSize, Trees).
>
> get_design_docs(#db{name = <<"shards/", _:18/binary, DbName/binary>>}) ->
> {_, Ref} = spawn_monitor(fun() -> exit(fabric:design_docs(DbName))
> end),
>
> http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/d9d11a94/src/couch_db_updater.erl
> ----------------------------------------------------------------------
> diff --git a/src/couch_db_updater.erl b/src/couch_db_updater.erl
> index 649826a..eb75177 100644
> --- a/src/couch_db_updater.erl
> +++ b/src/couch_db_updater.erl
> @@ -342,40 +342,60 @@ collect_updates(GroupedDocsAcc, ClientsAcc,
> MergeConflicts, FullCommit) ->
> end.
>
> rev_tree(DiskTree) ->
> - couch_key_tree:mapfold(fun
> - (_RevId, {IsDeleted, BodyPointer, UpdateSeq}, leaf, _Acc) ->
> + couch_key_tree:map(fun
> + (_RevId, {Del, Ptr, Seq}) ->
> % pre 1.2 format, will be upgraded on compaction
> - {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer,
> seq=UpdateSeq}, nil};
> - (_RevId, {IsDeleted, BodyPointer, UpdateSeq}, branch, Acc) ->
> - {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer,
> seq=UpdateSeq}, Acc};
> - (_RevId, {IsDeleted, BodyPointer, UpdateSeq, Size}, leaf, Acc) ->
> - Acc2 = sum_leaf_sizes(Acc, Size),
> - {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer,
> seq=UpdateSeq, size=Size}, Acc2};
> - (_RevId, {IsDeleted, BodyPointer, UpdateSeq, Size}, branch, Acc) ->
> - {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer,
> seq=UpdateSeq, size=Size}, Acc};
> - (_RevId, ?REV_MISSING, _Type, Acc) ->
> - {?REV_MISSING, Acc}
> - end, 0, DiskTree).
> + #leaf{deleted=?i2b(Del), ptr=Ptr, seq=Seq};
> + (_RevId, {Del, Ptr, Seq, Size}) ->
> + % Pre-bigcouch format, will be upgraded on compaction
> + #leaf{
> + deleted = ?i2b(Del),
> + ptr = Ptr,
> + seq = Seq,
> + sizes = {Size, 0},
> + atts = []
> + };
> + (_RevId, {Del, Ptr, Seq, Sizes, Atts}) ->
> + #leaf{
> + deleted = ?i2b(Del),
> + ptr = Ptr,
> + seq = Seq,
> + sizes = Sizes,
> + atts = Atts
> + };
> + (_RevId, ?REV_MISSING) ->
> + ?REV_MISSING
> + end, DiskTree).
>
> disk_tree(RevTree) ->
> couch_key_tree:map(fun
> (_RevId, ?REV_MISSING) ->
> ?REV_MISSING;
> - (_RevId, #leaf{deleted=IsDeleted, ptr=BodyPointer, seq=UpdateSeq,
> size=Size}) ->
> - {?b2i(IsDeleted), BodyPointer, UpdateSeq, Size}
> + (_RevId, #leaf{}=Leaf) ->
> + #leaf{
> + deleted = Del,
> + ptr = Ptr,
> + seq = Seq,
> + sizes = Sizes,
> + atts = Atts
> + } = Leaf,
> + {?b2i(Del), Ptr, Seq, upgrade_sizes(Sizes), Atts}
> end, RevTree).
>
> +upgrade_sizes({_, _} = Sizes) ->
> + Sizes;
> +upgrade_sizes(S) when is_integer(S) ->
> + {S, 0}.
> +
> btree_by_seq_split(#full_doc_info{id=Id, update_seq=Seq, deleted=Del,
> rev_tree=T}) ->
> {Seq, {Id, ?b2i(Del), disk_tree(T)}}.
>
> btree_by_seq_join(Seq, {Id, Del, DiskTree}) when is_integer(Del) ->
> - {RevTree, LeafsSize} = rev_tree(DiskTree),
> #full_doc_info{
> id = Id,
> update_seq = Seq,
> deleted = ?i2b(Del),
> - rev_tree = RevTree,
> - leafs_size = LeafsSize
> + rev_tree = rev_tree(DiskTree)
> };
> btree_by_seq_join(KeySeq, {Id, RevInfos, DeletedRevInfos}) ->
> % Older versions stored #doc_info records in the seq_tree.
> @@ -389,49 +409,59 @@ btree_by_seq_join(KeySeq, {Id, RevInfos,
> DeletedRevInfos}) ->
> [#rev_info{rev=Rev,seq=Seq,deleted=true,body_sp = Bp} ||
> {Rev, Seq, Bp} <- DeletedRevInfos]}.
>
> -btree_by_id_split(#full_doc_info{id=Id, update_seq=Seq,
> - deleted=Deleted, rev_tree=Tree}) ->
> - {Id, {Seq, ?b2i(Deleted), disk_tree(Tree)}}.
> +btree_by_id_split(#full_doc_info{}=Info) ->
> + #full_doc_info{
> + id = Id,
> + update_seq = Seq,
> + deleted = Del,
> + sizes = Sizes,
> + rev_tree = Tree
> + } = Info,
> + {Id, {Seq, ?b2i(Del), upgrade_sizes(Sizes), disk_tree(Tree)}}.
>
> btree_by_id_join(Id, {HighSeq, Deleted, DiskTree}) ->
> - {Tree, LeafsSize} = rev_tree(DiskTree),
> + % Upgrade from pre-BigCouch disk format
> + ActiveSize = couch_key_tree:fold(fun
> + (_RevId, {_Del, _Ptr, _Seq}, _, Acc) ->
> + Acc;
> + (_RevId, {_Del, _Ptr, _Seq, Size}, _, Acc) ->
> + Acc + Size;
> + (_RevId, {_Del, _Ptr, _Seq, Sizes, _Atts}, _, Acc) ->
> + {Active, _} = Sizes,
> + Active + Acc;
> + (_RevId, ?REV_MISSING, _, Acc) ->
> + Acc
> + end, 0, DiskTree),
> + btree_by_id_join(Id, {HighSeq, Deleted, {ActiveSize, 0}, DiskTree});
> +
> +btree_by_id_join(Id, {HighSeq, Deleted, Sizes, DiskTree}) ->
> #full_doc_info{
> id = Id,
> update_seq = HighSeq,
> deleted = ?i2b(Deleted),
> - rev_tree = Tree,
> - leafs_size = LeafsSize
> + sizes = Sizes,
> + rev_tree = rev_tree(DiskTree)
> }.
>
> btree_by_id_reduce(reduce, FullDocInfos) ->
> - lists:foldl(
> - fun(Info, {NotDeleted, Deleted, Size}) ->
> - Size2 = sum_leaf_sizes(Size, Info#full_doc_info.leafs_size),
> - case Info#full_doc_info.deleted of
> - true ->
> - {NotDeleted, Deleted + 1, Size2};
> - false ->
> - {NotDeleted + 1, Deleted, Size2}
> - end
> - end,
> - {0, 0, 0}, FullDocInfos);
> -btree_by_id_reduce(rereduce, Reds) ->
> - lists:foldl(
> - fun({NotDeleted, Deleted}, {AccNotDeleted, AccDeleted, _AccSize})
> ->
> - % pre 1.2 format, will be upgraded on compaction
> - {AccNotDeleted + NotDeleted, AccDeleted + Deleted, nil};
> - ({NotDeleted, Deleted, Size}, {AccNotDeleted, AccDeleted,
> AccSize}) ->
> - AccSize2 = sum_leaf_sizes(AccSize, Size),
> - {AccNotDeleted + NotDeleted, AccDeleted + Deleted, AccSize2}
> - end,
> - {0, 0, 0}, Reds).
> -
> -sum_leaf_sizes(nil, _) ->
> - nil;
> -sum_leaf_sizes(_, nil) ->
> - nil;
> -sum_leaf_sizes(Size1, Size2) ->
> - Size1 + Size2.
> + lists:foldl(fun
> + (#full_doc_info{deleted=false, sizes=Sizes}, {NotDel, Del, SAcc})
> ->
> + {NotDel + 1, Del, reduce_sizes(Sizes, SAcc)};
> + (#full_doc_info{deleted=true, sizes=Sizes}, {NotDel, Del, SAcc}) ->
> + {NotDel, Del + 1, reduce_sizes(Sizes, SAcc)}
> + end, {0, 0, {0, 0}}, FullDocInfos);
> +btree_by_id_reduce(rereduce, Reductions) ->
> + lists:foldl(fun
> + ({NotDel, Del}, {NDAcc, DAcc, SAcc}) ->
> + {NotDel + NDAcc, Del + DAcc, SAcc};
> + ({NotDel, Del, Sizes}, {NDAcc, DAcc, SAcc}) ->
> + {NotDel + NDAcc, Del + DAcc, reduce_sizes(Sizes, SAcc)}
> + end, {0, 0, {0, 0}}, Reductions).
> +
> +reduce_sizes({A1, E1}, {A2, E2}) ->
> + {A1 + A2, E1 + E2};
> +reduce_sizes(S, {_, _} = Acc) when is_integer(S) ->
> + reduce_sizes({S, 0}, Acc).
>
> btree_by_seq_reduce(reduce, DocInfos) ->
> % count the number of documents
> @@ -549,10 +579,15 @@ flush_trees(_Db, [], AccFlushedTrees) ->
> flush_trees(#db{fd = Fd} = Db,
> [InfoUnflushed | RestUnflushed], AccFlushed) ->
> #full_doc_info{update_seq=UpdateSeq, rev_tree=Unflushed} =
> InfoUnflushed,
> - {Flushed, LeafsSize} = couch_key_tree:mapfold(
> + {Flushed, FinalAcc} = couch_key_tree:mapfold(
> fun(_Rev, Value, Type, Acc) ->
> case Value of
> - #doc{deleted = IsDeleted, body = {summary, Summary, AttsFd}} ->
> + #doc{} = Doc ->
> + #doc{
> + deleted = IsDeleted,
> + body = {summary, Summary, AttsFd},
> + atts = Atts
> + } = Doc,
> % this node value is actually an unwritten document
> summary,
> % write to disk.
> % make sure the Fd in the written bins is the same Fd we
> are
> @@ -571,31 +606,44 @@ flush_trees(#db{fd = Fd} = Db,
> " changed. Possibly retrying.", []),
> throw(retry)
> end,
> - {ok, NewSummaryPointer, SummarySize} =
> - couch_file:append_raw_chunk(Fd, Summary),
> - TotalSize = lists:foldl(
> - fun(#att{att_len = L}, A) -> A + L end,
> - SummarySize, Value#doc.atts),
> - NewValue = #leaf{deleted=IsDeleted, ptr=NewSummaryPointer,
> - seq=UpdateSeq, size=TotalSize},
> - case Type of
> - leaf ->
> - {NewValue, Acc + TotalSize};
> - branch ->
> - {NewValue, Acc}
> - end;
> - {_, _, _, LeafSize} when Type =:= leaf, LeafSize =/= nil ->
> - {Value, Acc + LeafSize};
> - _ ->
> + AttsInfo = lists:usort([
> + {P, L} || #att{data = {_, P}, att_len = L} <- Atts
> + ]),
> + [_, _, SummaryBin] = Summary,
> + ExternalSize =
> couch_compress:uncompressed_length(SummaryBin),
> + {ok, NewPtr, ActiveSize}
> + = couch_file:append_raw_chunk(Fd, Summary),
> + Leaf = #leaf{
> + deleted = IsDeleted,
> + ptr = NewPtr,
> + seq = UpdateSeq,
> + sizes = {ActiveSize, ExternalSize},
> + atts = AttsInfo
> + },
> + {Leaf, add_sizes(Type, Leaf, Acc)};
> + #leaf{} = Leaf ->
> + {Value, add_sizes(Type, Leaf, Acc)};
> + ?REV_MISSING ->
> {Value, Acc}
> end
> - end, 0, Unflushed),
> + end, {0, 0, []}, Unflushed),
> + {FinalAS, FinalES, FinalAtts} = FinalAcc,
> + TotalAttSize = lists:foldl(fun({_, S}, A) -> S + A end, 0, FinalAtts),
> InfoFlushed = InfoUnflushed#full_doc_info{
> rev_tree = Flushed,
> - leafs_size = LeafsSize
> + sizes = {FinalAS + TotalAttSize, FinalES + TotalAttSize}
> },
> flush_trees(Db, RestUnflushed, [InfoFlushed | AccFlushed]).
>
> +add_sizes(branch, _, Acc) ->
> + Acc;
> +add_sizes(leaf, #leaf{sizes=Sizes, atts=AttSizes}, Acc) ->
> + {ActiveSize, ExternalSize} = upgrade_sizes(Sizes),
> + {ASAcc, ESAcc, AttsAcc} = Acc,
> + NewASAcc = ActiveSize + ASAcc,
> + NewESAcc = ExternalSize + ESAcc,
> + NewAttsAcc = lists:umerge(AttSizes, AttsAcc),
> + {NewASAcc, NewESAcc, NewAttsAcc}.
>
> send_result(Client, Ref, NewResult) ->
> % used to send a result to the client
> @@ -896,23 +944,34 @@ copy_docs(Db, #db{fd = DestFd} = NewDb, MixedInfos,
> Retry) ->
> A =< B
> end, merge_lookups(MixedInfos, LookupResults)),
>
> - NewInfos1 = lists:map(
> - fun(#full_doc_info{rev_tree=RevTree}=Info) ->
> - Info#full_doc_info{rev_tree=couch_key_tree:map(
> - fun(_, _, branch) ->
> - ?REV_MISSING;
> - (_Rev, #leaf{ptr=Sp}=Leaf, leaf) ->
> - {_Body, AttsInfo} = Summary = copy_doc_attachments(
> - Db, Sp, DestFd),
> - SummaryChunk = make_doc_summary(NewDb, Summary),
> - {ok, Pos, SummarySize} = couch_file:append_raw_chunk(
> - DestFd, SummaryChunk),
> - TotalLeafSize = lists:foldl(
> - fun({_, _, _, AttLen, _, _, _, _}, S) -> S +
> AttLen end,
> - SummarySize, AttsInfo),
> - Leaf#leaf{ptr=Pos, size=TotalLeafSize}
> - end, RevTree)}
> - end, NewInfos0),
> + NewInfos1 = lists:map(fun(Info) ->
> + {NewRevTree, FinalAcc} = couch_key_tree:mapfold(fun
> + (_Rev, #leaf{ptr=Sp}=Leaf, leaf, SizesAcc) ->
> + {Body, AttInfos} = copy_doc_attachments(Db, Sp, DestFd),
> + Summary = make_doc_summary(NewDb, {Body, AttInfos}),
> + [_, _, SummaryBin] = Summary,
> + ExternalSize =
> couch_compress:uncompressed_length(SummaryBin),
> + {ok, Pos, ActiveSize}
> + = couch_file:append_raw_chunk(DestFd, Summary),
> + AttSizes = [{element(3, A), element(4, A)} || A <-
> AttInfos],
> + NewLeaf = Leaf#leaf{
> + ptr = Pos,
> + sizes = {ActiveSize, ExternalSize},
> + atts = lists:usort(AttSizes)
> + },
> + {NewLeaf, add_sizes(leaf, NewLeaf, SizesAcc)};
> + (_Rev, _Value, branch, SizesAcc) ->
> + {?REV_MISSING, SizesAcc}
> + end, {0, 0, []}, Info#full_doc_info.rev_tree),
> + {FinalAS, FinalES, FinalAtts} = FinalAcc,
> + TotalAttSize = lists:foldl(fun({_, S}, A) -> S + A end, 0,
> FinalAtts),
> + NewActiveSize = FinalAS + TotalAttSize,
> + NewExternalSize = FinalES + TotalAttSize,
> + Info#full_doc_info{
> + rev_tree = NewRevTree,
> + sizes = {NewActiveSize, NewExternalSize}
> + }
> + end, NewInfos0),
>
> NewInfos = stem_full_doc_infos(Db, NewInfos1),
> RemoveSeqs =
>
>
>
>
> --
> Andy Wenk
> Hamburg - Germany
> RockIt!
>
> http://www.couchdb-buch.de
> http://www.pg-praxisbuch.de
>
> GPG fingerprint: C044 8322 9E12 1483 4FEC 9452 B65D 6BE3 9ED3 9588
>
> https://people.apache.org/keys/committer/andywenk.asc