You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by da...@apache.org on 2014/02/05 00:06:40 UTC

[27/50] couch commit: updated refs/heads/import to c3116d7

Introduce a #leaf{} record in revision trees

This is substantially based on work by Bob Dionne (a452a4a) to introduce
the data size calculations at Cloudant. There's quite a bit of conflict
in code and actual behavior between this work and what Filipe wrote for
CouchDB. This new record should ease the transition of merging both
behaviors.

An important thing to note is that this record is only ever in RAM and
not written to disk so we don't have to worry about record upgrades
though we will have to maintain upgrade info that Filipe and Bob both
introduced (which is fairly straightforward).


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/15b84c05
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/15b84c05
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/15b84c05

Branch: refs/heads/import
Commit: 15b84c051eca732be0644e6af7a3a134bbf329b5
Parents: 22fdbe2
Author: Robert Newson <rn...@apache.org>
Authored: Sun Mar 10 16:12:28 2013 -0500
Committer: Paul J. Davis <pa...@gmail.com>
Committed: Tue Feb 4 17:03:24 2014 -0600

----------------------------------------------------------------------
 include/couch_db.hrl     |  9 +++++++++
 src/couch_db.erl         | 22 +++++++---------------
 src/couch_db_updater.erl | 40 +++++++++++++++++-----------------------
 src/couch_doc.erl        | 13 ++++++++-----
 src/couch_util.erl       | 11 +++++++++++
 5 files changed, 52 insertions(+), 43 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/15b84c05/include/couch_db.hrl
----------------------------------------------------------------------
diff --git a/include/couch_db.hrl b/include/couch_db.hrl
index 77006e4..61a59f7 100644
--- a/include/couch_db.hrl
+++ b/include/couch_db.hrl
@@ -23,6 +23,8 @@
 
 -define(b2l(V), binary_to_list(V)).
 -define(l2b(V), list_to_binary(V)).
+-define(i2b(V), couch_util:integer_to_boolean(V)).
+-define(b2i(V), couch_util:boolean_to_integer(V)).
 -define(term_to_bin(T), term_to_binary(T, [{minor_version, 1}])).
 -define(term_size(T),
     try
@@ -271,3 +273,10 @@
     stop_fun
 }).
 
+-record(leaf,  {
+    deleted,
+    ptr,
+    seq,
+    size = nil
+}).
+

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/15b84c05/src/couch_db.erl
----------------------------------------------------------------------
diff --git a/src/couch_db.erl b/src/couch_db.erl
index 7734c7c..e4e8cca 100644
--- a/src/couch_db.erl
+++ b/src/couch_db.erl
@@ -580,7 +580,7 @@ prep_and_validate_update(Db, #doc{id=Id,revs={RevStart, Revs}}=Doc,
     case Revs of
     [PrevRev|_] ->
         case dict:find({RevStart, PrevRev}, LeafRevsDict) of
-        {ok, {Deleted, DiskSp, DiskRevs}} ->
+        {ok, {#leaf{deleted=Deleted, ptr=DiskSp}, DiskRevs}} ->
             case couch_doc:has_stubs(Doc) of
             true ->
                 DiskDoc = make_doc(Db, Id, Deleted, DiskSp, DiskRevs),
@@ -643,12 +643,8 @@ prep_and_validate_updates(Db, [DocBucket|RestBuckets],
         AllowConflict, AccPrepped, AccErrors) ->
     Leafs = couch_key_tree:get_all_leafs(OldRevTree),
     LeafRevsDict = dict:from_list([
-        begin
-            Deleted = element(1, LeafVal),
-            Sp = element(2, LeafVal),
-            {{Start, RevId}, {Deleted, Sp, Revs}}
-        end ||
-        {LeafVal, {Start, [RevId | _]} = Revs} <- Leafs
+        {{Start, RevId}, {Leaf, Revs}} ||
+        {Leaf, {Start, [RevId | _]} = Revs} <- Leafs
     ]),
     {PreppedBucket, AccErrors3} = lists:foldl(
         fun({Doc, Ref}, {Docs2Acc, AccErrors2}) ->
@@ -895,9 +891,7 @@ make_first_doc_on_disk(Db, Id, Pos, [{_Rev, #doc{}} | RestPath]) ->
     make_first_doc_on_disk(Db, Id, Pos-1, RestPath);
 make_first_doc_on_disk(Db, Id, Pos, [{_Rev, ?REV_MISSING}|RestPath]) ->
     make_first_doc_on_disk(Db, Id, Pos - 1, RestPath);
-make_first_doc_on_disk(Db, Id, Pos, [{_Rev, RevValue} |_]=DocPath) ->
-    IsDel = element(1, RevValue),
-    Sp = element(2, RevValue),
+make_first_doc_on_disk(Db, Id, Pos, [{_Rev, #leaf{deleted=IsDel, ptr=Sp}} |_]=DocPath) ->
     Revs = [Rev || {Rev, _} <- DocPath],
     make_doc(Db, Id, IsDel, Sp, {Pos, Revs}).
 
@@ -1243,9 +1237,7 @@ open_doc_revs_int(Db, IdRevs, Options) ->
                     ?REV_MISSING ->
                         % we have the rev in our list but know nothing about it
                         {{not_found, missing}, {Pos, Rev}};
-                    RevValue ->
-                        IsDeleted = element(1, RevValue),
-                        SummaryPtr = element(2, RevValue),
+                    #leaf{deleted=IsDeleted, ptr=SummaryPtr} ->
                         {ok, make_doc(Db, Id, IsDeleted, SummaryPtr, FoundRevPath)}
                     end
                 end, FoundRevs),
@@ -1297,8 +1289,8 @@ doc_meta_info(#doc_info{high_seq=Seq,revs=[#rev_info{rev=Rev}|RestInfo]}, RevTre
         [{revs_info, Pos, lists:map(
             fun({Rev1, ?REV_MISSING}) ->
                 {Rev1, missing};
-            ({Rev1, RevValue}) ->
-                case element(1, RevValue) of
+            ({Rev1, Leaf}) ->
+                case Leaf#leaf.deleted of
                 true ->
                     {Rev1, deleted};
                 false ->

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/15b84c05/src/couch_db_updater.erl
----------------------------------------------------------------------
diff --git a/src/couch_db_updater.erl b/src/couch_db_updater.erl
index c64911e..cc48ef8 100644
--- a/src/couch_db_updater.erl
+++ b/src/couch_db_updater.erl
@@ -158,10 +158,8 @@ handle_call({purge_docs, IdRevs}, _From, Db) ->
     {DocInfoToUpdate, NewSeq} = lists:mapfoldl(
         fun(#full_doc_info{rev_tree=Tree}=FullInfo, SeqAcc) ->
             Tree2 = couch_key_tree:map_leafs(
-                fun(_RevId, LeafVal) ->
-                    IsDeleted = element(1, LeafVal),
-                    BodyPointer = element(2, LeafVal),
-                    {IsDeleted, BodyPointer, SeqAcc + 1}
+                fun(_RevId, Leaf) ->
+                    Leaf#leaf{seq=SeqAcc+1}
                 end, Tree),
             {FullInfo#full_doc_info{rev_tree=Tree2}, SeqAcc + 1}
         end, LastSeq, FullDocInfoToUpdate),
@@ -340,37 +338,35 @@ rev_tree(DiskTree) ->
     couch_key_tree:mapfold(fun
         (_RevId, {IsDeleted, BodyPointer, UpdateSeq}, leaf, _Acc) ->
             % pre 1.2 format, will be upgraded on compaction
-            {{IsDeleted == 1, BodyPointer, UpdateSeq, nil}, nil};
+            {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer, seq=UpdateSeq}, nil};
         (_RevId, {IsDeleted, BodyPointer, UpdateSeq}, branch, Acc) ->
-            {{IsDeleted == 1, BodyPointer, UpdateSeq, nil}, Acc};
+            {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer, seq=UpdateSeq}, Acc};
         (_RevId, {IsDeleted, BodyPointer, UpdateSeq, Size}, leaf, Acc) ->
             Acc2 = sum_leaf_sizes(Acc, Size),
-            {{IsDeleted == 1, BodyPointer, UpdateSeq, Size}, Acc2};
+            {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer, seq=UpdateSeq, size=Size}, Acc2};
         (_RevId, {IsDeleted, BodyPointer, UpdateSeq, Size}, branch, Acc) ->
-            {{IsDeleted == 1, BodyPointer, UpdateSeq, Size}, Acc};
+            {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer, seq=UpdateSeq, size=Size}, Acc};
         (_RevId, ?REV_MISSING, _Type, Acc) ->
             {?REV_MISSING, Acc}
-    end, DiskTree).
+    end, 0, DiskTree).
 
 disk_tree(RevTree) ->
     couch_key_tree:map(fun
         (_RevId, ?REV_MISSING) ->
             ?REV_MISSING;
-        (_RevId, {IsDeleted, BodyPointer, UpdateSeq}) ->
-            {if IsDeleted -> 1; true -> 0 end, BodyPointer, UpdateSeq, nil};
-        (_RevId, {IsDeleted, BodyPointer, UpdateSeq, Size}) ->
-            {if IsDeleted -> 1; true -> 0 end, BodyPointer, UpdateSeq, Size}
+        (_RevId, #leaf{deleted=IsDeleted, ptr=BodyPointer, seq=UpdateSeq, size=Size}) ->
+            {?b2i(IsDeleted), BodyPointer, UpdateSeq, Size}
     end, RevTree).
 
 btree_by_seq_split(#full_doc_info{id=Id, update_seq=Seq, deleted=Del, rev_tree=T}) ->
-    {Seq, {Id, if Del -> 1; true -> 0 end, disk_tree(T)}}.
+    {Seq, {Id, ?b2i(Del), disk_tree(T)}}.
 
 btree_by_seq_join(Seq, {Id, Del, DiskTree}) when is_integer(Del) ->
     {RevTree, LeafsSize} = rev_tree(DiskTree),
     #full_doc_info{
         id = Id,
         update_seq = Seq,
-        deleted = (Del == 1),
+        deleted = ?i2b(Del),
         rev_tree = RevTree,
         leafs_size = LeafsSize
     };
@@ -388,14 +384,14 @@ btree_by_seq_join(KeySeq, {Id, RevInfos, DeletedRevInfos}) ->
 
 btree_by_id_split(#full_doc_info{id=Id, update_seq=Seq,
         deleted=Deleted, rev_tree=Tree}) ->
-    {Id, {Seq, if Deleted -> 1; true -> 0 end, disk_tree(Tree)}}.
+    {Id, {Seq, ?b2i(Deleted), disk_tree(Tree)}}.
 
 btree_by_id_join(Id, {HighSeq, Deleted, DiskTree}) ->
     {Tree, LeafsSize} = rev_tree(DiskTree),
     #full_doc_info{
         id = Id,
         update_seq = HighSeq,
-        deleted = (Deleted == 1),
+        deleted = ?i2b(Deleted),
         rev_tree = Tree,
         leafs_size = LeafsSize
     }.
@@ -573,7 +569,8 @@ flush_trees(#db{fd = Fd} = Db,
                 TotalSize = lists:foldl(
                     fun(#att{att_len = L}, A) -> A + L end,
                     SummarySize, Value#doc.atts),
-                NewValue = {IsDeleted, NewSummaryPointer, UpdateSeq, TotalSize},
+                NewValue = #leaf{deleted=IsDeleted, ptr=NewSummaryPointer,
+                                 seq=UpdateSeq, size=TotalSize},
                 case Type of
                 leaf ->
                     {NewValue, Acc + TotalSize};
@@ -899,10 +896,7 @@ copy_docs(Db, #db{fd = DestFd} = NewDb, MixedInfos, Retry) ->
             Info#full_doc_info{rev_tree=couch_key_tree:map(
                 fun(_, _, branch) ->
                     ?REV_MISSING;
-                (_Rev, LeafVal, leaf) ->
-                    IsDel = element(1, LeafVal),
-                    Sp = element(2, LeafVal),
-                    Seq = element(3, LeafVal),
+                (_Rev, #leaf{ptr=Sp}=Leaf, leaf) ->
                     {_Body, AttsInfo} = Summary = copy_doc_attachments(
                         Db, Sp, DestFd),
                     SummaryChunk = make_doc_summary(NewDb, Summary),
@@ -911,7 +905,7 @@ copy_docs(Db, #db{fd = DestFd} = NewDb, MixedInfos, Retry) ->
                     TotalLeafSize = lists:foldl(
                         fun({_, _, _, AttLen, _, _, _, _}, S) -> S + AttLen end,
                         SummarySize, AttsInfo),
-                    {IsDel, Pos, Seq, TotalLeafSize}
+                    Leaf#leaf{ptr=Pos, size=TotalLeafSize}
                 end, RevTree)}
         end, NewInfos0),
 

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/15b84c05/src/couch_doc.erl
----------------------------------------------------------------------
diff --git a/src/couch_doc.erl b/src/couch_doc.erl
index bcf79d2..6f2ca9b 100644
--- a/src/couch_doc.erl
+++ b/src/couch_doc.erl
@@ -330,7 +330,10 @@ max_seq(Tree, UpdateSeq) ->
             {_Deleted, _DiskPos, OldTreeSeq} ->
                 % Older versions didn't track data sizes.
                 erlang:max(MaxOldSeq, OldTreeSeq);
-            {_Deleted, _DiskPos, OldTreeSeq, _Size} ->
+            {_Deleted, _DiskPos, OldTreeSeq, _Size} -> % necessary clause?
+                % Older versions didn't store #leaf records.
+                erlang:max(MaxOldSeq, OldTreeSeq);
+            #leaf{seq=OldTreeSeq} ->
                 erlang:max(MaxOldSeq, OldTreeSeq);
             _ ->
                 MaxOldSeq
@@ -341,11 +344,11 @@ max_seq(Tree, UpdateSeq) ->
 to_doc_info_path(#full_doc_info{id=Id,rev_tree=Tree,update_seq=FDISeq}) ->
     RevInfosAndPath = [
         {#rev_info{
-            deleted = element(1, LeafVal),
-            body_sp = element(2, LeafVal),
-            seq = element(3, LeafVal),
+            deleted = Leaf#leaf.deleted,
+            body_sp = Leaf#leaf.ptr,
+            seq = Leaf#leaf.seq,
             rev = {Pos, RevId}
-        }, Path} || {LeafVal, {Pos, [RevId | _]} = Path} <-
+        }, Path} || {Leaf, {Pos, [RevId | _]} = Path} <-
             couch_key_tree:get_all_leafs(Tree)
     ],
     SortedRevInfosAndPath = lists:sort(

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/15b84c05/src/couch_util.erl
----------------------------------------------------------------------
diff --git a/src/couch_util.erl b/src/couch_util.erl
index 3556d36..d09211a 100644
--- a/src/couch_util.erl
+++ b/src/couch_util.erl
@@ -29,6 +29,7 @@
 -export([encode_doc_id/1]).
 -export([with_db/2]).
 -export([rfc1123_date/0, rfc1123_date/1]).
+-export([integer_to_boolean/1, boolean_to_integer/1]).
 
 -include_lib("couch/include/couch_db.hrl").
 
@@ -487,3 +488,13 @@ month(9) -> "Sep";
 month(10) -> "Oct";
 month(11) -> "Nov";
 month(12) -> "Dec".
+
+integer_to_boolean(1) ->
+    true;
+integer_to_boolean(0) ->
+    false.
+
+boolean_to_integer(true) ->
+    1;
+boolean_to_integer(false) ->
+    0.