You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by va...@apache.org on 2017/09/28 19:58:50 UTC

[couchdb] branch master updated (30fcd7b -> b756b77)

This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/couchdb.git.


    from 30fcd7b  Add convenience remsh bash script
     new 2684561  Clean up replicator logs
     new b756b77  Replace replication start multi-line log statement

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../src/couch_replicator_scheduler_job.erl         | 167 +++++++++++++++++----
 1 file changed, 141 insertions(+), 26 deletions(-)

-- 
To stop receiving notification emails like this one, please contact
['"commits@couchdb.apache.org" <co...@couchdb.apache.org>'].

[couchdb] 02/02: Replace replication start multi-line log statement

Posted by va...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit b756b77f414707b6ead79e0595702786cfef1c39
Author: Nick Vatamaniuc <va...@apache.org>
AuthorDate: Thu Sep 28 15:17:10 2017 -0400

    Replace replication start multi-line log statement
    
    Use a shorter and more informative single line string:
    
    ```
    Starting replication f9a503bf456a4779fd07901a6dbdb501+continuous+create_target (http://adm:*****@127.0.0.1:15984/a/ -> http://adm:*****@127.0.0.1:15984/bar/) from doc _replicator:my_rep2 worker_procesess:4 worker_batch_size:500 session_id:b4df2a53e33fb6441d82a584a8888f85
    ```
    
    For replication from _replicate endpoint, doc info is skipped and it is clearly
    indicated a `_replicate` replication:
    
    ```
    Starting replication aa0aa3244d7886842189980108178651+continuous+create_target (http://adm:*****@localhost:15984/a/ -> http://adm:*****@localhost:15984/t/) from _replicate endpoint worker_procesess:4 worker_batch_size:500 session_id:6fee11dafc3d8efa6497c67ecadac35d
    ```
    
    Also remove redundant `starting new replication...` log.
---
 .../src/couch_replicator_scheduler_job.erl         | 47 ++++++++++++----------
 1 file changed, 26 insertions(+), 21 deletions(-)

diff --git a/src/couch_replicator/src/couch_replicator_scheduler_job.erl b/src/couch_replicator/src/couch_replicator_scheduler_job.erl
index bc74387..e7ce576 100644
--- a/src/couch_replicator/src/couch_replicator_scheduler_job.erl
+++ b/src/couch_replicator/src/couch_replicator_scheduler_job.erl
@@ -95,8 +95,6 @@ start_link(#rep{id = {BaseId, Ext}, source = Src, target = Tgt} = Rep) ->
 
     case gen_server:start_link(ServerName, ?MODULE, Rep, []) of
         {ok, Pid} ->
-            couch_log:notice("starting new replication `~s` at ~p (`~s` -> `~s`)",
-                             [RepChildId, Pid, Source, Target]),
             {ok, Pid};
         {error, Reason} ->
             couch_log:warning("failed to start replication `~s` (`~s` -> `~s`)",
@@ -184,24 +182,7 @@ do_init(#rep{options = Options, id = {BaseId, Ext}, user_ctx=UserCtx} = Rep) ->
     % cancel_replication/1) and then start the replication again, but this is
     % unfortunately not immune to race conditions.
 
-    couch_log:notice("Replication `~p` is using:~n"
-        "~c~p worker processes~n"
-        "~ca worker batch size of ~p~n"
-        "~c~p HTTP connections~n"
-        "~ca connection timeout of ~p milliseconds~n"
-        "~c~p retries per request~n"
-        "~csocket options are: ~s~s",
-        [BaseId ++ Ext, $\t, NumWorkers, $\t, BatchSize, $\t,
-            MaxConns, $\t, get_value(connection_timeout, Options),
-            $\t, get_value(retries, Options),
-            $\t, io_lib:format("~p", [get_value(socket_options, Options)]),
-            case StartSeq of
-            ?LOWEST_SEQ ->
-                "";
-            _ ->
-                io_lib:format("~n~csource start sequence ~p", [$\t, StartSeq])
-            end]),
-
+    log_replication_start(State),
     couch_log:debug("Worker pids are: ~p", [Workers]),
 
     doc_update_triggered(Rep),
@@ -465,7 +446,6 @@ format_status(_Opt, [_PDict, State]) ->
     #rep{
        id = RepId,
        options = Options,
-       type = Type,
        doc_id = DocId,
        db_name = DbName
     } = RepDetails,
@@ -1043,6 +1023,31 @@ replication_start_error(Error) ->
     Error.
 
 
+log_replication_start(#rep_state{rep_details = Rep} = RepState) ->
+    #rep{
+       id = {BaseId, Ext},
+       doc_id = DocId,
+       db_name = DbName,
+       options = Options
+    } = Rep,
+    Id = BaseId ++ Ext,
+    Workers = get_value(worker_processes, Options),
+    BatchSize = get_value(worker_batch_size, Options),
+    #rep_state{
+       source_name = Source,  % credentials already stripped
+       target_name = Target,  % credentials already stripped
+       session_id = Sid
+    } = RepState,
+    From = case DbName of
+        ShardName when is_binary(ShardName) ->
+            io_lib:format("from doc ~s:~s", [mem3:dbname(ShardName), DocId]);
+        _ ->
+            "from _replicate endpoint"
+    end,
+    Msg = "Starting replication ~s (~s -> ~s) ~s worker_procesess:~p"
+        " worker_batch_size:~p session_id:~s",
+    couch_log:notice(Msg, [Id, Source, Target, From, Workers, BatchSize, Sid]).
+
 
 -ifdef(TEST).
 

-- 
To stop receiving notification emails like this one, please contact
"commits@couchdb.apache.org" <co...@couchdb.apache.org>.

[couchdb] 01/02: Clean up replicator logs

Posted by va...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 2684561e38378ee084d25ce09634c6ef783ea5ac
Author: Nick Vatamaniuc <va...@apache.org>
AuthorDate: Thu Sep 28 13:19:52 2017 -0400

    Clean up replicator logs
    
    Previously replicator was unnesessarily verbose during crashes. This commit
    reduces the vorbosity and make the error messages more helpful.
    
    Most of the replication failures happen in the startup phase when both target
    and source are opened. That's a good place to handle common errors, and there
    were a few already handled (db not found, lack of authorization). This commit
    adds another other common one - inability to resolve endpoint host names. This
    covers cases were there user mistypes the host name or there is a DNS issue.
    
    Also during the startup phase, if an error occurs a stacktrace was logged in
    addition to the whole state of the #rep{} record. Most of the rep record and
    the stack are not that useful compared to how much noise it generates. So
    instead, log only a few relevant fields from #rep{} and only the top 2 stack
    frames. Combined with dns lookup failure this change results in almost a 4x
    (2KB vs 500B) reduction in log noise while providing better debugging
    information.
    
    One last source of excessive log noise the dumping of the full replicator job
    state during crashes. This included both the #rep and the #rep_state records.
    Those have a lot of redundnat information, and since they are dumped as tuples,
    it was hard to use and find the values of each individual field. In this case
    `format_status/2` was improved to dump only a selected set of field along with
    their names. This results in another 3x reduction in log noise.
---
 .../src/couch_replicator_scheduler_job.erl         | 122 ++++++++++++++++++++-
 1 file changed, 116 insertions(+), 6 deletions(-)

diff --git a/src/couch_replicator/src/couch_replicator_scheduler_job.erl b/src/couch_replicator/src/couch_replicator_scheduler_job.erl
index 08a2488..bc74387 100644
--- a/src/couch_replicator/src/couch_replicator_scheduler_job.erl
+++ b/src/couch_replicator/src/couch_replicator_scheduler_job.erl
@@ -363,10 +363,11 @@ handle_info(timeout, InitArgs) ->
             {stop, {shutdown, max_backoff}, {error, InitArgs}};
         Class:Error ->
             ShutdownReason = {error, replication_start_error(Error)},
+            StackTop2 = lists:sublist(erlang:get_stacktrace(), 2),
             % Shutdown state is a hack as it is not really the state of the
             % gen_server (it failed to initialize, so it doesn't have one).
             % Shutdown state is used to pass extra info about why start failed.
-            ShutdownState = {error, Class, erlang:get_stacktrace(), InitArgs},
+            ShutdownState = {error, Class, StackTop2, InitArgs},
             {stop, {shutdown, ShutdownReason}, ShutdownState}
     end.
 
@@ -399,11 +400,20 @@ terminate({shutdown, max_backoff}, {error, InitArgs}) ->
     couch_replicator_notifier:notify({error, RepId, max_backoff});
 
 terminate({shutdown, {error, Error}}, {error, Class, Stack, InitArgs}) ->
-    #rep{id=RepId} = InitArgs,
+    #rep{
+        id = {BaseId, Ext} = RepId,
+        source = Source0,
+        target = Target0,
+        doc_id = DocId,
+        db_name = DbName
+    } = InitArgs,
+    Source = couch_replicator_api_wrap:db_uri(Source0),
+    Target = couch_replicator_api_wrap:db_uri(Target0),
+    RepIdStr = BaseId ++ Ext,
+    Msg = "~p:~p: Replication ~s failed to start ~p -> ~p doc ~p:~p stack:~p",
+    couch_log:error(Msg, [Class, Error, RepIdStr, Source, Target, DbName,
+        DocId, Stack]),
     couch_stats:increment_counter([couch_replicator, failed_starts]),
-    CleanInitArgs = rep_strip_creds(InitArgs),
-    couch_log:error("~p:~p: Replication failed to start for args ~p: ~p",
-             [Class, Error, CleanInitArgs, Stack]),
     couch_replicator_notifier:notify({error, RepId, Error});
 
 terminate({shutdown, max_backoff}, State) ->
@@ -441,7 +451,38 @@ code_change(_OldVsn, #rep_state{}=State, _Extra) ->
 
 
 format_status(_Opt, [_PDict, State]) ->
-    [{data, [{"State", state_strip_creds(State)}]}].
+    #rep_state{
+       source = Source,
+       target = Target,
+       rep_details = RepDetails,
+       start_seq = StartSeq,
+       source_seq = SourceSeq,
+       committed_seq = CommitedSeq,
+       current_through_seq = ThroughSeq,
+       highest_seq_done = HighestSeqDone,
+       session_id = SessionId
+    } = state_strip_creds(State),
+    #rep{
+       id = RepId,
+       options = Options,
+       type = Type,
+       doc_id = DocId,
+       db_name = DbName
+    } = RepDetails,
+    [
+        {rep_id, RepId},
+        {source, couch_replicator_api_wrap:db_uri(Source)},
+        {target, couch_replicator_api_wrap:db_uri(Target)},
+        {db_name, DbName},
+        {doc_id, DocId},
+        {options, Options},
+        {session_id, SessionId},
+        {start_seq, StartSeq},
+        {source_seq, SourceSeq},
+        {committed_seq, CommitedSeq},
+        {current_through_seq, ThroughSeq},
+        {highest_seq_done, HighestSeqDone}
+    ].
 
 
 startup_jitter() ->
@@ -989,5 +1030,74 @@ replication_start_error({unauthorized, DbUri}) ->
     {unauthorized, <<"unauthorized to access or create database ", DbUri/binary>>};
 replication_start_error({db_not_found, DbUri}) ->
     {db_not_found, <<"could not open ", DbUri/binary>>};
+replication_start_error({http_request_failed, _Method, Url0,
+        {error, {error, {conn_failed, {error, nxdomain}}}}}) ->
+    Url = ?l2b(couch_util:url_strip_password(Url0)),
+    {nxdomain, <<"could not resolve ", Url/binary>>};
+replication_start_error({http_request_failed, Method0, Url0,
+        {error, {code, Code}}}) when is_integer(Code) ->
+    Url = ?l2b(couch_util:url_strip_password(Url0)),
+    Method = ?l2b(Method0),
+    {http_error_code, Code, <<Method/binary, " ", Url/binary>>};
 replication_start_error(Error) ->
     Error.
+
+
+
+-ifdef(TEST).
+
+-include_lib("eunit/include/eunit.hrl").
+
+
+replication_start_error_test() ->
+    ?assertEqual({unauthorized, <<"unauthorized to access or create database"
+        " http://x/y">>}, replication_start_error({unauthorized,
+        <<"http://x/y">>})),
+    ?assertEqual({db_not_found, <<"could not open http://x/y">>},
+        replication_start_error({db_not_found, <<"http://x/y">>})),
+    ?assertEqual({nxdomain,<<"could not resolve http://x/y">>},
+        replication_start_error({http_request_failed, "GET", "http://x/y",
+        {error, {error, {conn_failed, {error, nxdomain}}}}})),
+    ?assertEqual({http_error_code,503,<<"GET http://x/y">>},
+        replication_start_error({http_request_failed, "GET", "http://x/y",
+        {error, {code, 503}}})).
+
+
+scheduler_job_format_status_test() ->
+    Source = <<"http://u:p@h1/d1">>,
+    Target = <<"http://u:p@h2/d2">>,
+    Rep = #rep{
+        id = {"base", "+ext"},
+        source = couch_replicator_docs:parse_rep_db(Source, [], []),
+        target = couch_replicator_docs:parse_rep_db(Target, [], []),
+        options = [{create_target, true}],
+        doc_id = <<"mydoc">>,
+        db_name = <<"mydb">>
+    },
+    State = #rep_state{
+        rep_details = Rep,
+        source = Rep#rep.source,
+        target = Rep#rep.target,
+        session_id = <<"a">>,
+        start_seq = <<"1">>,
+        source_seq = <<"2">>,
+        committed_seq = <<"3">>,
+        current_through_seq = <<"4">>,
+        highest_seq_done = <<"5">>
+    },
+    Format = format_status(opts_ignored, [pdict, State]),
+    ?assertEqual("http://u:*****@h1/d1/", proplists:get_value(source, Format)),
+    ?assertEqual("http://u:*****@h2/d2/", proplists:get_value(target, Format)),
+    ?assertEqual({"base", "+ext"}, proplists:get_value(rep_id, Format)),
+    ?assertEqual([{create_target, true}], proplists:get_value(options, Format)),
+    ?assertEqual(<<"mydoc">>, proplists:get_value(doc_id, Format)),
+    ?assertEqual(<<"mydb">>, proplists:get_value(db_name, Format)),
+    ?assertEqual(<<"a">>, proplists:get_value(session_id, Format)),
+    ?assertEqual(<<"1">>, proplists:get_value(start_seq, Format)),
+    ?assertEqual(<<"2">>, proplists:get_value(source_seq, Format)),
+    ?assertEqual(<<"3">>, proplists:get_value(committed_seq, Format)),
+    ?assertEqual(<<"4">>, proplists:get_value(current_through_seq, Format)),
+    ?assertEqual(<<"5">>, proplists:get_value(highest_seq_done, Format)).
+
+
+-endif.

-- 
To stop receiving notification emails like this one, please contact
"commits@couchdb.apache.org" <co...@couchdb.apache.org>.