You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by da...@apache.org on 2008/04/23 02:25:29 UTC

svn commit: r650705 - in /incubator/couchdb/trunk: share/www/ src/couchdb/

Author: damien
Date: Tue Apr 22 17:25:23 2008
New Revision: 650705

URL: http://svn.apache.org/viewvc?rev=650705&view=rev
Log:
Replicator optmizations and fix for unnecessary document copy during re-replication

Modified:
    incubator/couchdb/trunk/share/www/replicator.html
    incubator/couchdb/trunk/src/couchdb/couch.app.tpl.in
    incubator/couchdb/trunk/src/couchdb/couch_db.erl
    incubator/couchdb/trunk/src/couchdb/couch_httpd.erl
    incubator/couchdb/trunk/src/couchdb/couch_key_tree.erl
    incubator/couchdb/trunk/src/couchdb/couch_rep.erl
    incubator/couchdb/trunk/src/couchdb/couch_server_sup.erl
    incubator/couchdb/trunk/src/couchdb/couch_util.erl

Modified: incubator/couchdb/trunk/share/www/replicator.html
URL: http://svn.apache.org/viewvc/incubator/couchdb/trunk/share/www/replicator.html?rev=650705&r1=650704&r2=650705&view=diff
==============================================================================
--- incubator/couchdb/trunk/share/www/replicator.html [utf-8] (original)
+++ incubator/couchdb/trunk/share/www/replicator.html [utf-8] Tue Apr 22 17:25:23 2008
@@ -82,12 +82,8 @@
           $.couch.replicate(source, target, {
             success: function(resp) {
               $.each(resp.history, function(idx, record) {
-                $("<tr><th></th><td class='seq'></td>" +
-                  "<td class='read'></td><td class='copied'></td></tr>")
-                  .find("th").text(record.start_time).end()
-                  .find("td.seq").text(record.start_last_seq + "–" + record.end_last_seq).end()
-                  .find("td.read").text(record.docs_read + " (" + record.read_errors + " errors)").end()
-                  .find("td.copied").text(record.docs_copied + " (" + record.copy_errors + " errors)").end()
+                $("<tr><th></th></tr>")
+                  .find("th").text(JSON.stringify(record)).end()
                   .appendTo("#records tbody.content");
               });
               $("#records tbody tr").removeClass("odd").filter(":odd").addClass("odd");
@@ -136,10 +132,7 @@
       <table id="records" class="listing">
         <caption>Replication History</caption>
         <thead><tr>
-          <th>When</th>
-          <th>Sequences</th>
-          <th>Documents read</th>
-          <th>Documents copied</th>
+          <th>Event</th>
         </tr></thead>
         <tbody class="content"></tbody>
         <tbody class="footer"><tr>
@@ -148,5 +141,6 @@
       </table>
 
     </div>
+    <div id="dump"></div>
   </body>
 </html>

Modified: incubator/couchdb/trunk/src/couchdb/couch.app.tpl.in
URL: http://svn.apache.org/viewvc/incubator/couchdb/trunk/src/couchdb/couch.app.tpl.in?rev=650705&r1=650704&r2=650705&view=diff
==============================================================================
--- incubator/couchdb/trunk/src/couchdb/couch.app.tpl.in (original)
+++ incubator/couchdb/trunk/src/couchdb/couch.app.tpl.in Tue Apr 22 17:25:23 2008
@@ -21,7 +21,6 @@
                         couch_rep]},
               {registered,[couch_server,
                            couch_server_sup,
-                           couch_util,
                            couch_view,
                            couch_query_servers,
                            couch_ft_query]},

Modified: incubator/couchdb/trunk/src/couchdb/couch_db.erl
URL: http://svn.apache.org/viewvc/incubator/couchdb/trunk/src/couchdb/couch_db.erl?rev=650705&r1=650704&r2=650705&view=diff
==============================================================================
--- incubator/couchdb/trunk/src/couchdb/couch_db.erl (original)
+++ incubator/couchdb/trunk/src/couchdb/couch_db.erl Tue Apr 22 17:25:23 2008
@@ -133,7 +133,8 @@
     end.
 
 open_doc_revs(MainPid, Id, Revs, Options) ->
-    open_doc_revs_int(get_db(MainPid), Id, Revs, Options).
+    [Result] = open_doc_revs_int(get_db(MainPid), [{Id, Revs}], Options),
+    Result.
 
 get_missing_revs(MainPid, IdRevsList) ->
     Ids = [Id1 || {Id1, _Revs} <- IdRevsList],
@@ -148,7 +149,9 @@
             end
         end,
         IdRevsList, FullDocInfoResults),
-    {ok, Results}.
+    % strip out the non-missing ids
+    Missing = [{Id, Revs} || {Id, Revs} <- Results, Revs /= []],
+    {ok, Missing}.
 
 get_doc_info(Db, Id) ->
     case get_full_doc_info(Db, Id) of
@@ -563,38 +566,44 @@
     {ok, Db} = gen_server:call(MainPid, get_db),
     Db.
 
-open_doc_revs_int(Db, Id, Revs, Options) ->
-    case get_full_doc_info(Db, Id) of
-    {ok, #full_doc_info{rev_tree=RevTree}} ->
-        {FoundRevs, MissingRevs} =
-        case Revs of
-        all ->
-            {couch_key_tree:get_all_leafs(RevTree), []};
-        _ ->
-            case lists:member(latest, Options) of
-            true ->
-                couch_key_tree:get_key_leafs(RevTree, Revs);
-            false ->
-                couch_key_tree:get(RevTree, Revs)
+open_doc_revs_int(Db, IdRevs, Options) ->
+    Ids = [Id || {Id, _Revs} <- IdRevs],
+    LookupResults = get_full_doc_infos(Db, Ids),
+    lists:zipwith(
+        fun({Id, Revs}, Lookup) ->
+            case Lookup of
+            {ok, #full_doc_info{rev_tree=RevTree}} ->
+                {FoundRevs, MissingRevs} =
+                case Revs of
+                all ->
+                    {couch_key_tree:get_all_leafs(RevTree), []};
+                _ ->
+                    case lists:member(latest, Options) of
+                    true ->
+                        couch_key_tree:get_key_leafs(RevTree, Revs);
+                    false ->
+                        couch_key_tree:get(RevTree, Revs)
+                    end
+                end,
+                FoundResults =
+                lists:map(fun({Rev, Value, FoundRevPath}) ->
+                    case Value of
+                    ?REV_MISSING ->
+                        % we have the rev in our list but know nothing about it
+                        {{not_found, missing}, Rev};
+                    {IsDeleted, SummaryPtr} ->
+                        {ok, make_doc(Db, Id, IsDeleted, SummaryPtr, FoundRevPath)}
+                    end
+                end, FoundRevs),
+                Results = FoundResults ++ [{{not_found, missing}, MissingRev} || MissingRev <- MissingRevs],
+                {ok, Results};
+            not_found when Revs == all ->
+                {ok, []};
+            not_found ->
+                {ok, [{{not_found, missing}, Rev} || Rev <- Revs]}
             end
         end,
-        FoundResults =
-        lists:map(fun({Rev, Value, FoundRevPath}) ->
-            case Value of
-            ?REV_MISSING ->
-                % we have the rev in our list but know nothing about it
-                {{not_found, missing}, Rev};
-            {IsDeleted, SummaryPtr} ->
-                {ok, make_doc(Db, Id, IsDeleted, SummaryPtr, FoundRevPath)}
-            end
-        end, FoundRevs),
-        Results = FoundResults ++ [{{not_found, missing}, MissingRev} || MissingRev <- MissingRevs],
-        {ok, Results};
-    not_found when Revs == all ->
-        {ok, []};
-    not_found ->
-        {ok, [{{not_found, missing}, Rev} || Rev <- Revs]}
-    end.
+        IdRevs, LookupResults).
 
 open_doc_int(Db, ?LOCAL_DOC_PREFIX ++ _ = Id, _Options) ->
     case couch_btree:lookup(Db#db.local_docs_btree, [Id]) of

Modified: incubator/couchdb/trunk/src/couchdb/couch_httpd.erl
URL: http://svn.apache.org/viewvc/incubator/couchdb/trunk/src/couchdb/couch_httpd.erl?rev=650705&r1=650704&r2=650705&view=diff
==============================================================================
--- incubator/couchdb/trunk/src/couchdb/couch_httpd.erl (original)
+++ incubator/couchdb/trunk/src/couchdb/couch_httpd.erl Tue Apr 22 17:25:23 2008
@@ -76,9 +76,9 @@
     ]).
 
 handle_request(Req, DocumentRoot, Method, Path) ->
-    Start = erlang:now(),
+    % Start = erlang:now(),
     X = handle_request0(Req, DocumentRoot, Method, Path),
-    io:format("now_diff:~p~n", [timer:now_diff(erlang:now(), Start)]),
+    % io:format("now_diff:~p~n", [timer:now_diff(erlang:now(), Start)]),
     X.
     
 handle_request0(Req, DocumentRoot, Method, Path) ->

Modified: incubator/couchdb/trunk/src/couchdb/couch_key_tree.erl
URL: http://svn.apache.org/viewvc/incubator/couchdb/trunk/src/couchdb/couch_key_tree.erl?rev=650705&r1=650704&r2=650705&view=diff
==============================================================================
--- incubator/couchdb/trunk/src/couchdb/couch_key_tree.erl (original)
+++ incubator/couchdb/trunk/src/couchdb/couch_key_tree.erl Tue Apr 22 17:25:23 2008
@@ -52,7 +52,7 @@
 find_missing([], Keys) ->
     Keys;
 find_missing([{Key, _, SubTree} | RestTree], Keys) ->
-    SrcKeys2 = Keys -- Key,
+    SrcKeys2 = Keys -- [Key],
     SrcKeys3 = find_missing(SubTree, SrcKeys2),
     find_missing(RestTree, SrcKeys3).
     

Modified: incubator/couchdb/trunk/src/couchdb/couch_rep.erl
URL: http://svn.apache.org/viewvc/incubator/couchdb/trunk/src/couchdb/couch_rep.erl?rev=650705&r1=650704&r2=650705&view=diff
==============================================================================
--- incubator/couchdb/trunk/src/couchdb/couch_rep.erl (original)
+++ incubator/couchdb/trunk/src/couchdb/couch_rep.erl Tue Apr 22 17:25:23 2008
@@ -16,14 +16,6 @@
 
 -export([replicate/2, replicate/3]).
 
--record(stats, {
-    docs_read=0,
-    read_errors=0,
-    docs_copied=0,
-    copy_errors=0
-    }).
-
-
 url_encode([H|T]) ->
     if
     H >= $a, $z >= H ->
@@ -87,7 +79,8 @@
     false -> SeqNum0
     end,
 
-    {NewSeqNum, Stats} = pull_rep(DbTgt, DbSrc, SeqNum, #stats{}),
+    {NewSeqNum, Stats} = pull_rep(DbTgt, DbSrc, SeqNum),
+    
     case NewSeqNum == SeqNum andalso OldRepHistoryProps /= [] of
     true ->
         % nothing changed, don't record results
@@ -98,11 +91,7 @@
                 [{"start_time", StartTime},
                 {"end_time", httpd_util:rfc1123_date()},
                 {"start_last_seq", SeqNum},
-                {"end_last_seq", NewSeqNum},
-                {"docs_read", Stats#stats.docs_read},
-                {"read_errors", Stats#stats.read_errors},
-                {"docs_copied", Stats#stats.docs_copied},
-                {"copy_errors", Stats#stats.copy_errors}]}
+                {"end_last_seq", NewSeqNum} | Stats]}
             | tuple_to_list(proplists:get_value("history", OldRepHistoryProps, {}))],
         % something changed, record results
         NewRepHistory =
@@ -116,34 +105,89 @@
         {ok, NewRepHistory}
     end.
 
-pull_rep(DbTarget, DbSource, SourceSeqNum, Stats) ->
+pull_rep(DbTarget, DbSource, SourceSeqNum) ->
+    Parent = self(),
+    SaveDocsPid = spawn_link(fun() ->
+            save_docs_loop(Parent, DbTarget, 0) end),
+    OpenDocsPid = spawn_link(fun() ->
+            open_doc_revs_loop(Parent, DbSource, SaveDocsPid, 0) end),
+    MissingRevsPid = spawn_link(fun() ->
+            get_missing_revs_loop(Parent, DbTarget, OpenDocsPid, 0, 0) end),
     {ok, NewSeq} = enum_docs_since(DbSource, SourceSeqNum,
-        fun(#doc_info{update_seq=Seq}=SrcDocInfo, _, {_, AccStats}) ->
-            Stats2 = maybe_save_docs(DbTarget, DbSource, SrcDocInfo, AccStats),
-            {ok, {Seq, Stats2}}
-        end, {SourceSeqNum, Stats}),
-    NewSeq.
-
-
-maybe_save_docs(DbTarget, DbSource,
-        #doc_info{id=Id, rev=Rev, conflict_revs=Conflicts, deleted_conflict_revs=DelConflicts},
-        Stats) ->
-    SrcRevs = [Rev | Conflicts] ++ DelConflicts,
-    {ok, [{Id, MissingRevs}]} = get_missing_revs(DbTarget, [{Id, SrcRevs}]),
-
-    case MissingRevs of
-    [] ->
-        Stats;
-    _Else ->
+        fun(SrcDocInfo, _, _) ->
+            #doc_info{id=Id,
+                rev=Rev,
+                conflict_revs=Conflicts,
+                deleted_conflict_revs=DelConflicts,
+                update_seq=Seq} = SrcDocInfo,
+            SrcRevs = [Rev | Conflicts] ++ DelConflicts,
+            MissingRevsPid !  {Id, SrcRevs}, % send to the missing revs process
+            {ok, Seq}
+        end, SourceSeqNum),
+    MissingRevsPid ! shutdown,
+    receive {done, MissingRevsPid, Stats1} -> ok end,
+    
+    OpenDocsPid ! shutdown,
+    receive {done, OpenDocsPid, Stats2} -> ok end,
+    
+    SaveDocsPid ! shutdown,
+    receive {done, SaveDocsPid, Stats3} -> ok end,
+    
+    {NewSeq, Stats1 ++ Stats2 ++ Stats3}.
+
+
+receive_id_revs() ->
+    receive
+    {Id, Revs} ->
+        [{Id, Revs} | receive_id_revs()]
+    after 1 ->
+        []
+    end.
+
+get_missing_revs_loop(Parent, DbTarget, OpenDocsPid, RevsChecked, MissingFound) ->
+    receive
+    {Id, Revs} ->
+        Changed = [{Id, Revs} | receive_id_revs()],
+        {ok, Missing} = get_missing_revs(DbTarget, Changed),
+        [OpenDocsPid ! {Id0, MissingRevs} || {Id0, MissingRevs} <- Missing],
+        get_missing_revs_loop(Parent, DbTarget, OpenDocsPid,
+                RevsChecked + length(Changed),
+                MissingFound + length(Missing));
+    shutdown ->
+        Parent ! {done, self(), [{missing_checked, RevsChecked},
+                                 {missing_found, MissingFound}]}
+    end.
+    
+
+open_doc_revs_loop(Parent, DbSource, SaveDocsPid, DocsRead) ->
+    receive
+    {Id, MissingRevs} ->
         {ok, DocResults} = open_doc_revs(DbSource, Id, MissingRevs, [latest]),
         % only save successful reads
         Docs = [RevDoc || {ok, RevDoc} <- DocResults],
-        ok = save_docs(DbTarget, Docs, []),
+        SaveDocsPid ! Docs,
+        open_doc_revs_loop(Parent, DbSource, SaveDocsPid, DocsRead + length(Docs));
+    shutdown ->
+        Parent ! {done, self(), [{docs_read, DocsRead}]}
+    end.
+
 
-        Stats#stats{
-            docs_read=Stats#stats.docs_read + length(Docs),
-            read_errors=Stats#stats.read_errors + length(DocResults) - length(Docs),
-            docs_copied=Stats#stats.docs_copied + length(Docs)}
+receive_docs() ->
+    receive
+    Docs when is_list(Docs) ->
+        Docs ++ receive_docs()
+    after 1 ->
+        []
+    end.
+        
+save_docs_loop(Parent, DbTarget, DocsWritten) ->
+    receive
+    Docs0 when is_list(Docs0)  ->
+        Docs = Docs0 ++ receive_docs(),
+        ok = save_docs(DbTarget, Docs, []),
+        save_docs_loop(Parent, DbTarget, DocsWritten + length(Docs));
+    shutdown ->
+        Parent ! {done, self(), [{docs_written, DocsWritten}]}
     end.
 
 

Modified: incubator/couchdb/trunk/src/couchdb/couch_server_sup.erl
URL: http://svn.apache.org/viewvc/incubator/couchdb/trunk/src/couchdb/couch_server_sup.erl?rev=650705&r1=650704&r2=650705&view=diff
==============================================================================
--- incubator/couchdb/trunk/src/couchdb/couch_server_sup.erl (original)
+++ incubator/couchdb/trunk/src/couchdb/couch_server_sup.erl Tue Apr 22 17:25:23 2008
@@ -97,12 +97,6 @@
             brutal_kill,
             worker,
             [couch_server]},
-        {couch_util,
-            {couch_util, start_link, [UtilDriverDir]},
-            permanent,
-            brutal_kill,
-            worker,
-            [couch_util]},
         {couch_query_servers,
             {couch_query_servers, start_link, [QueryServers]},
             permanent,
@@ -146,6 +140,7 @@
     io:format("couch ~s (LogLevel=~s)~n", [couch_server:get_version(), LogLevel]),
     io:format("~s~n", [ConsoleStartupMsg]),
     
+    couch_util:start_driver(UtilDriverDir),
     
     % ensure these applications are running
     application:start(inets),

Modified: incubator/couchdb/trunk/src/couchdb/couch_util.erl
URL: http://svn.apache.org/viewvc/incubator/couchdb/trunk/src/couchdb/couch_util.erl?rev=650705&r1=650704&r2=650705&view=diff
==============================================================================
--- incubator/couchdb/trunk/src/couchdb/couch_util.erl (original)
+++ incubator/couchdb/trunk/src/couchdb/couch_util.erl Tue Apr 22 17:25:23 2008
@@ -11,32 +11,25 @@
 % the License.
 
 -module(couch_util).
--behaviour(gen_server).
 
--export([start_link/0,start_link/1]).
+-export([start_driver/1]).
 -export([parse_ini/1,should_flush/0, should_flush/1]).
 -export([new_uuid/0, rand32/0, implode/2, collate/2, collate/3]).
--export([abs_pathname/1,abs_pathname/2, trim/1, ascii_lower/1, test/0]).
+-export([abs_pathname/1,abs_pathname/2, trim/1, ascii_lower/1]).
 -export([encodeBase64/1, decodeBase64/1]).
 
--export([init/1, terminate/2, handle_call/3]).
--export([handle_cast/2,code_change/3,handle_info/2]).
 
 % arbitrarily chosen amount of memory to use before flushing to disk
 -define(FLUSH_MAX_MEM, 10000000).
 
-start_link() ->
-    start_link("").
-
-start_link("") ->
-    start_link(filename:join(code:priv_dir(couch), "lib"));
-start_link(LibDir) ->
+start_driver("") ->
+    start_driver(filename:join(code:priv_dir(couch), "lib"));
+start_driver(LibDir) ->
     case erl_ddll:load_driver(LibDir, "couch_erl_driver") of
     ok -> ok;
     {error, already_loaded} -> ok;
-    {error, ErrorDesc} -> exit({error, ErrorDesc})
-    end,
-    gen_server:start_link({local, couch_util}, couch_util, [], []).
+    Error -> exit(Error)
+    end.
 
 
 new_uuid() ->
@@ -45,9 +38,7 @@
 to_hex([]) ->
     [];
 to_hex([H|T]) ->
-    Digit1 = H div 16,
-    Digit2 = H rem 16,
-    [to_digit(Digit1), to_digit(Digit2) | to_hex(T)].
+    [to_digit(H div 16), to_digit(H rem 16) | to_hex(T)].
 
 to_digit(N) when N < 10 ->
     $0 + N;
@@ -195,33 +186,6 @@
         end, {"", []}, Lines),
     {ok, lists:reverse(ParsedIniValues)}.
 
-init([]) ->
-    {A,B,C} = erlang:now(),
-    random:seed(A,B,C),
-    {ok, dummy_server}.
-
-terminate(_Reason, _Server) ->
-    ok.
-
-handle_call(rand32, _From, Server) ->
-    {reply, rand32_int(), Server}.
-
-handle_cast(_Msg, State) ->
-    {noreply,State}.
-
-code_change(_OldVsn, State, _Extra) ->
-    {ok, State}.
-
-handle_info(_Info, State) ->
-    {noreply, State}.
-
-
-
-
-
-rand32_int() ->
-    random:uniform(16#FFFFFFFF + 1) - 1.
-
 drv_port() ->
     case get(couch_drv_port) of
     undefined ->
@@ -331,7 +295,3 @@
     62*?st(C,43) + ?st(C,47) + (C-59)*?st(C,48) - 69*?st(C,65) - 6*?st(C,97).
 
 
-
-test() ->
-    start_link("debug"),
-    collate("a","b",[]).