You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by va...@apache.org on 2017/09/28 19:58:51 UTC

[couchdb] 01/02: Clean up replicator logs

This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 2684561e38378ee084d25ce09634c6ef783ea5ac
Author: Nick Vatamaniuc <va...@apache.org>
AuthorDate: Thu Sep 28 13:19:52 2017 -0400

    Clean up replicator logs
    
    Previously replicator was unnesessarily verbose during crashes. This commit
    reduces the vorbosity and make the error messages more helpful.
    
    Most of the replication failures happen in the startup phase when both target
    and source are opened. That's a good place to handle common errors, and there
    were a few already handled (db not found, lack of authorization). This commit
    adds another other common one - inability to resolve endpoint host names. This
    covers cases were there user mistypes the host name or there is a DNS issue.
    
    Also during the startup phase, if an error occurs a stacktrace was logged in
    addition to the whole state of the #rep{} record. Most of the rep record and
    the stack are not that useful compared to how much noise it generates. So
    instead, log only a few relevant fields from #rep{} and only the top 2 stack
    frames. Combined with dns lookup failure this change results in almost a 4x
    (2KB vs 500B) reduction in log noise while providing better debugging
    information.
    
    One last source of excessive log noise the dumping of the full replicator job
    state during crashes. This included both the #rep and the #rep_state records.
    Those have a lot of redundnat information, and since they are dumped as tuples,
    it was hard to use and find the values of each individual field. In this case
    `format_status/2` was improved to dump only a selected set of field along with
    their names. This results in another 3x reduction in log noise.
---
 .../src/couch_replicator_scheduler_job.erl         | 122 ++++++++++++++++++++-
 1 file changed, 116 insertions(+), 6 deletions(-)

diff --git a/src/couch_replicator/src/couch_replicator_scheduler_job.erl b/src/couch_replicator/src/couch_replicator_scheduler_job.erl
index 08a2488..bc74387 100644
--- a/src/couch_replicator/src/couch_replicator_scheduler_job.erl
+++ b/src/couch_replicator/src/couch_replicator_scheduler_job.erl
@@ -363,10 +363,11 @@ handle_info(timeout, InitArgs) ->
             {stop, {shutdown, max_backoff}, {error, InitArgs}};
         Class:Error ->
             ShutdownReason = {error, replication_start_error(Error)},
+            StackTop2 = lists:sublist(erlang:get_stacktrace(), 2),
             % Shutdown state is a hack as it is not really the state of the
             % gen_server (it failed to initialize, so it doesn't have one).
             % Shutdown state is used to pass extra info about why start failed.
-            ShutdownState = {error, Class, erlang:get_stacktrace(), InitArgs},
+            ShutdownState = {error, Class, StackTop2, InitArgs},
             {stop, {shutdown, ShutdownReason}, ShutdownState}
     end.
 
@@ -399,11 +400,20 @@ terminate({shutdown, max_backoff}, {error, InitArgs}) ->
     couch_replicator_notifier:notify({error, RepId, max_backoff});
 
 terminate({shutdown, {error, Error}}, {error, Class, Stack, InitArgs}) ->
-    #rep{id=RepId} = InitArgs,
+    #rep{
+        id = {BaseId, Ext} = RepId,
+        source = Source0,
+        target = Target0,
+        doc_id = DocId,
+        db_name = DbName
+    } = InitArgs,
+    Source = couch_replicator_api_wrap:db_uri(Source0),
+    Target = couch_replicator_api_wrap:db_uri(Target0),
+    RepIdStr = BaseId ++ Ext,
+    Msg = "~p:~p: Replication ~s failed to start ~p -> ~p doc ~p:~p stack:~p",
+    couch_log:error(Msg, [Class, Error, RepIdStr, Source, Target, DbName,
+        DocId, Stack]),
     couch_stats:increment_counter([couch_replicator, failed_starts]),
-    CleanInitArgs = rep_strip_creds(InitArgs),
-    couch_log:error("~p:~p: Replication failed to start for args ~p: ~p",
-             [Class, Error, CleanInitArgs, Stack]),
     couch_replicator_notifier:notify({error, RepId, Error});
 
 terminate({shutdown, max_backoff}, State) ->
@@ -441,7 +451,38 @@ code_change(_OldVsn, #rep_state{}=State, _Extra) ->
 
 
 format_status(_Opt, [_PDict, State]) ->
-    [{data, [{"State", state_strip_creds(State)}]}].
+    #rep_state{
+       source = Source,
+       target = Target,
+       rep_details = RepDetails,
+       start_seq = StartSeq,
+       source_seq = SourceSeq,
+       committed_seq = CommitedSeq,
+       current_through_seq = ThroughSeq,
+       highest_seq_done = HighestSeqDone,
+       session_id = SessionId
+    } = state_strip_creds(State),
+    #rep{
+       id = RepId,
+       options = Options,
+       type = Type,
+       doc_id = DocId,
+       db_name = DbName
+    } = RepDetails,
+    [
+        {rep_id, RepId},
+        {source, couch_replicator_api_wrap:db_uri(Source)},
+        {target, couch_replicator_api_wrap:db_uri(Target)},
+        {db_name, DbName},
+        {doc_id, DocId},
+        {options, Options},
+        {session_id, SessionId},
+        {start_seq, StartSeq},
+        {source_seq, SourceSeq},
+        {committed_seq, CommitedSeq},
+        {current_through_seq, ThroughSeq},
+        {highest_seq_done, HighestSeqDone}
+    ].
 
 
 startup_jitter() ->
@@ -989,5 +1030,74 @@ replication_start_error({unauthorized, DbUri}) ->
     {unauthorized, <<"unauthorized to access or create database ", DbUri/binary>>};
 replication_start_error({db_not_found, DbUri}) ->
     {db_not_found, <<"could not open ", DbUri/binary>>};
+replication_start_error({http_request_failed, _Method, Url0,
+        {error, {error, {conn_failed, {error, nxdomain}}}}}) ->
+    Url = ?l2b(couch_util:url_strip_password(Url0)),
+    {nxdomain, <<"could not resolve ", Url/binary>>};
+replication_start_error({http_request_failed, Method0, Url0,
+        {error, {code, Code}}}) when is_integer(Code) ->
+    Url = ?l2b(couch_util:url_strip_password(Url0)),
+    Method = ?l2b(Method0),
+    {http_error_code, Code, <<Method/binary, " ", Url/binary>>};
 replication_start_error(Error) ->
     Error.
+
+
+
+-ifdef(TEST).
+
+-include_lib("eunit/include/eunit.hrl").
+
+
+replication_start_error_test() ->
+    ?assertEqual({unauthorized, <<"unauthorized to access or create database"
+        " http://x/y">>}, replication_start_error({unauthorized,
+        <<"http://x/y">>})),
+    ?assertEqual({db_not_found, <<"could not open http://x/y">>},
+        replication_start_error({db_not_found, <<"http://x/y">>})),
+    ?assertEqual({nxdomain,<<"could not resolve http://x/y">>},
+        replication_start_error({http_request_failed, "GET", "http://x/y",
+        {error, {error, {conn_failed, {error, nxdomain}}}}})),
+    ?assertEqual({http_error_code,503,<<"GET http://x/y">>},
+        replication_start_error({http_request_failed, "GET", "http://x/y",
+        {error, {code, 503}}})).
+
+
+scheduler_job_format_status_test() ->
+    Source = <<"http://u:p@h1/d1">>,
+    Target = <<"http://u:p@h2/d2">>,
+    Rep = #rep{
+        id = {"base", "+ext"},
+        source = couch_replicator_docs:parse_rep_db(Source, [], []),
+        target = couch_replicator_docs:parse_rep_db(Target, [], []),
+        options = [{create_target, true}],
+        doc_id = <<"mydoc">>,
+        db_name = <<"mydb">>
+    },
+    State = #rep_state{
+        rep_details = Rep,
+        source = Rep#rep.source,
+        target = Rep#rep.target,
+        session_id = <<"a">>,
+        start_seq = <<"1">>,
+        source_seq = <<"2">>,
+        committed_seq = <<"3">>,
+        current_through_seq = <<"4">>,
+        highest_seq_done = <<"5">>
+    },
+    Format = format_status(opts_ignored, [pdict, State]),
+    ?assertEqual("http://u:*****@h1/d1/", proplists:get_value(source, Format)),
+    ?assertEqual("http://u:*****@h2/d2/", proplists:get_value(target, Format)),
+    ?assertEqual({"base", "+ext"}, proplists:get_value(rep_id, Format)),
+    ?assertEqual([{create_target, true}], proplists:get_value(options, Format)),
+    ?assertEqual(<<"mydoc">>, proplists:get_value(doc_id, Format)),
+    ?assertEqual(<<"mydb">>, proplists:get_value(db_name, Format)),
+    ?assertEqual(<<"a">>, proplists:get_value(session_id, Format)),
+    ?assertEqual(<<"1">>, proplists:get_value(start_seq, Format)),
+    ?assertEqual(<<"2">>, proplists:get_value(source_seq, Format)),
+    ?assertEqual(<<"3">>, proplists:get_value(committed_seq, Format)),
+    ?assertEqual(<<"4">>, proplists:get_value(current_through_seq, Format)),
+    ?assertEqual(<<"5">>, proplists:get_value(highest_seq_done, Format)).
+
+
+-endif.

-- 
To stop receiving notification emails like this one, please contact
"commits@couchdb.apache.org" <co...@couchdb.apache.org>.