You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by fd...@apache.org on 2011/05/13 13:44:23 UTC

svn commit: r1102684 - in /couchdb/trunk: etc/couchdb/default.ini.tpl.in src/couchdb/couch_db_updater.erl src/couchdb/couch_view_compactor.erl

Author: fdmanana
Date: Fri May 13 11:44:22 2011
New Revision: 1102684

URL: http://svn.apache.org/viewvc?rev=1102684&view=rev
Log:
Configurable database and view compaction parameters

These parameters are now configurable as byte quantities.
Larger buffer sizes can significantly decrease final file sizes and
in some cases make the compaction process faster or slower.

Closes COUCHDB-1142.


Modified:
    couchdb/trunk/etc/couchdb/default.ini.tpl.in
    couchdb/trunk/src/couchdb/couch_db_updater.erl
    couchdb/trunk/src/couchdb/couch_view_compactor.erl

Modified: couchdb/trunk/etc/couchdb/default.ini.tpl.in
URL: http://svn.apache.org/viewvc/couchdb/trunk/etc/couchdb/default.ini.tpl.in?rev=1102684&r1=1102683&r2=1102684&view=diff
==============================================================================
--- couchdb/trunk/etc/couchdb/default.ini.tpl.in (original)
+++ couchdb/trunk/etc/couchdb/default.ini.tpl.in Fri May 13 11:44:22 2011
@@ -20,6 +20,15 @@ uri_file = %localstaterundir%/couch.uri
 ;                lowest compression ratio) to 9 (slowest, highest compression ratio)
 file_compression = snappy
 
+[database_compaction]
+; larger buffer sizes can originate smaller files
+doc_buffer_size = 524288 ; value in bytes
+checkpoint_after = 5242880 ; checkpoint after every N bytes were written
+
+[view_compaction]
+; larger buffer sizes can originate smaller files
+keyvalue_buffer_size = 2097152 ; value in bytes
+
 [httpd]
 port = 5984
 bind_address = 127.0.0.1

Modified: couchdb/trunk/src/couchdb/couch_db_updater.erl
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/couchdb/couch_db_updater.erl?rev=1102684&r1=1102683&r2=1102684&view=diff
==============================================================================
--- couchdb/trunk/src/couchdb/couch_db_updater.erl (original)
+++ couchdb/trunk/src/couchdb/couch_db_updater.erl Fri May 13 11:44:22 2011
@@ -878,32 +878,48 @@ copy_compact(Db, NewDb0, Retry) ->
     FsyncOptions = [Op || Op <- NewDb0#db.fsync_options, Op == before_header],
     NewDb = NewDb0#db{fsync_options=FsyncOptions},
     TotalChanges = couch_db:count_changes_since(Db, NewDb#db.update_seq),
+    BufferSize = list_to_integer(
+        couch_config:get("database_compaction", "doc_buffer_size", "524288")),
+    CheckpointAfter = couch_util:to_integer(
+        couch_config:get("database_compaction", "checkpoint_after",
+            BufferSize * 10)),
+
     EnumBySeqFun =
-    fun(#doc_info{high_seq=Seq}=DocInfo, _Offset, {AccNewDb, AccUncopied, TotalCopied}) ->
-        couch_task_status:update("Copied ~p of ~p changes (~p%)",
-                [TotalCopied, TotalChanges, (TotalCopied*100) div TotalChanges]),
-        if TotalCopied rem 1000 =:= 0 ->
-            NewDb2 = copy_docs(Db, AccNewDb, lists:reverse([DocInfo | AccUncopied]), Retry),
-            if TotalCopied rem 10000 =:= 0 ->
-                {ok, {commit_data(NewDb2#db{update_seq=Seq}), [], TotalCopied + 1}};
+    fun(#doc_info{high_seq=Seq}=DocInfo, _Offset,
+        {AccNewDb, AccUncopied, AccUncopiedSize, AccCopiedSize, TotalCopied}) ->
+
+        AccUncopiedSize2 = AccUncopiedSize + byte_size(?term_to_bin(DocInfo)),
+        if AccUncopiedSize2 >= BufferSize ->
+            NewDb2 = copy_docs(
+                Db, AccNewDb, lists:reverse([DocInfo | AccUncopied]), Retry),
+            TotalCopied2 = TotalCopied + 1 + length(AccUncopied),
+            couch_task_status:update("Copied ~p of ~p changes (~p%)",
+                [TotalCopied2, TotalChanges, (TotalCopied2 * 100) div TotalChanges]),
+            AccCopiedSize2 = AccCopiedSize + AccUncopiedSize2,
+            if AccCopiedSize2 >= CheckpointAfter ->
+                {ok, {commit_data(NewDb2#db{update_seq = Seq}), [],
+                    0, 0, TotalCopied2}};
             true ->
-                {ok, {NewDb2#db{update_seq=Seq}, [], TotalCopied + 1}}
+                {ok, {NewDb2#db{update_seq = Seq}, [],
+                    0, AccCopiedSize2, TotalCopied2}}
             end;
         true ->
-            {ok, {AccNewDb, [DocInfo | AccUncopied], TotalCopied + 1}}
+            {ok, {AccNewDb, [DocInfo | AccUncopied], AccUncopiedSize2,
+                AccCopiedSize, TotalCopied}}
         end
     end,
 
     couch_task_status:set_update_frequency(500),
 
-    {ok, _, {NewDb2, Uncopied, TotalChanges}} =
+    {ok, _, {NewDb2, Uncopied, _, _, ChangesDone}} =
         couch_btree:foldl(Db#db.docinfo_by_seq_btree, EnumBySeqFun,
-            {NewDb, [], 0},
+            {NewDb, [], 0, 0, 0},
             [{start_key, NewDb#db.update_seq + 1}]),
 
     couch_task_status:update("Flushing"),
 
     NewDb3 = copy_docs(Db, NewDb2, lists:reverse(Uncopied), Retry),
+    TotalChanges = ChangesDone + length(Uncopied),
 
     % copy misc header values
     if NewDb3#db.security /= Db#db.security ->

Modified: couchdb/trunk/src/couchdb/couch_view_compactor.erl
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/couchdb/couch_view_compactor.erl?rev=1102684&r1=1102683&r2=1102684&view=diff
==============================================================================
--- couchdb/trunk/src/couchdb/couch_view_compactor.erl (original)
+++ couchdb/trunk/src/couchdb/couch_view_compactor.erl Fri May 13 11:44:22 2011
@@ -48,27 +48,31 @@ compact_group(Group, EmptyGroup) ->
     DbName = couch_db:name(Db),
     TaskName = <<DbName/binary, ShortName/binary>>,
     couch_task_status:add_task(<<"View Group Compaction">>, TaskName, <<"">>),
+    BufferSize = list_to_integer(
+        couch_config:get("view_compaction", "keyvalue_buffer_size", "2097152")),
 
-    Fun = fun({DocId, _ViewIdKeys} = KV, {Bt, Acc, TotalCopied, LastId}) ->
+    Fun = fun({DocId, _ViewIdKeys} = KV,
+            {Bt, Acc, AccSize, TotalCopied, LastId}) ->
         if DocId =:= LastId -> % COUCHDB-999
             Msg = "Duplicates of ~s detected in ~s ~s - rebuild required",
             exit(io_lib:format(Msg, [DocId, DbName, GroupId]));
         true -> ok end,
-        if TotalCopied rem 10000 =:= 0 ->
+        AccSize2 = AccSize + byte_size(?term_to_bin(KV)),
+        if AccSize2 >= BufferSize ->
+            {ok, Bt2} = couch_btree:add(Bt, lists:reverse([KV|Acc])),
             couch_task_status:update("Copied ~p of ~p Ids (~p%)",
                 [TotalCopied, Count, (TotalCopied*100) div Count]),
-            {ok, Bt2} = couch_btree:add(Bt, lists:reverse([KV|Acc])),
-            {ok, {Bt2, [], TotalCopied+1, DocId}};
+            {ok, {Bt2, [], 0, TotalCopied + 1 + length(Acc), DocId}};
         true ->
-            {ok, {Bt, [KV|Acc], TotalCopied+1, DocId}}
+            {ok, {Bt, [KV|Acc], AccSize2, TotalCopied, DocId}}
         end
     end,
-    {ok, _, {Bt3, Uncopied, _Total, _LastId}} = couch_btree:foldl(IdBtree, Fun,
-        {EmptyIdBtree, [], 0, nil}),
+    {ok, _, {Bt3, Uncopied, _, _Total, _LastId}} = couch_btree:foldl(
+        IdBtree, Fun, {EmptyIdBtree, [], 0, 0, nil}),
     {ok, NewIdBtree} = couch_btree:add(Bt3, lists:reverse(Uncopied)),
 
     NewViews = lists:map(fun({View, EmptyView}) ->
-        compact_view(View, EmptyView)
+        compact_view(View, EmptyView, BufferSize)
     end, lists:zip(Views, EmptyViews)),
 
     NewGroup = EmptyGroup#group{
@@ -81,23 +85,25 @@ compact_group(Group, EmptyGroup) ->
     gen_server:cast(Pid, {compact_done, NewGroup}).
 
 %% @spec compact_view(View, EmptyView, Retry) -> CompactView
-compact_view(View, EmptyView) ->
+compact_view(View, EmptyView, BufferSize) ->
     {ok, Count} = couch_view:get_row_count(View),
 
     %% Key is {Key,DocId}
-    Fun = fun(KV, {Bt, Acc, TotalCopied}) ->
-        if TotalCopied rem 10000 =:= 0 ->
-            couch_task_status:update("View #~p: copied ~p of ~p KVs (~p%)",
-                [View#view.id_num, TotalCopied, Count, (TotalCopied*100) div Count]),
+    Fun = fun(KV, {Bt, Acc, AccSize, TotalCopied}) ->
+        AccSize2 = AccSize + byte_size(?term_to_bin(KV)),
+        if AccSize2 >= BufferSize ->
             {ok, Bt2} = couch_btree:add(Bt, lists:reverse([KV|Acc])),
-            {ok, {Bt2, [], TotalCopied + 1}};
+            couch_task_status:update("View #~p: copied ~p of ~p KVs (~p%)",
+                [View#view.id_num, TotalCopied, Count,
+                    (TotalCopied*100) div Count]),
+            {ok, {Bt2, [], 0, TotalCopied + 1 + length(Acc)}};
         true ->
-            {ok, {Bt, [KV|Acc], TotalCopied + 1}}
+            {ok, {Bt, [KV|Acc], AccSize2, TotalCopied}}
         end
     end,
 
-    {ok, _, {Bt3, Uncopied, _Total}} = couch_btree:foldl(View#view.btree, Fun,
-        {EmptyView#view.btree, [], 0}),
+    {ok, _, {Bt3, Uncopied, _, _Total}} = couch_btree:foldl(
+        View#view.btree, Fun, {EmptyView#view.btree, [], 0, 0}),
     {ok, NewBt} = couch_btree:add(Bt3, lists:reverse(Uncopied)),
     EmptyView#view{btree = NewBt}.