You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@couchdb.apache.org by GitBox <gi...@apache.org> on 2020/01/07 09:20:41 UTC

[GitHub] [couchdb] willholley opened a new pull request #2410: Mango metrics

willholley opened a new pull request #2410: Mango metrics
URL: https://github.com/apache/couchdb/pull/2410
 
 
   <!-- Thank you for your contribution!
   
        Please file this form by replacing the Markdown comments
        with your text. If a section needs no action - remove it.
   
        Also remember, that CouchDB uses the Review-Then-Commit (RTC) model
        of code collaboration. Positive feedback is represented +1 from committers
        and negative is a -1. The -1 also means veto, and needs to be addressed
        to proceed. Once there are no objections, the PR can be merged by a
        CouchDB committer.
   
        See: http://couchdb.apache.org/bylaws.html#decisions for more info. -->
   
   ## Overview
   
   Improve instrumentation of Mango with the following metrics:
   
    * `mango.query_invalid_index`: number of mango queries that generated an invalid index warning
    * `mango, docs_examined`: number of documents examined by mango queries (the same value returned by execution_stats for individual queries)
    * `mango,quorum_docs_examined`: number of documents examined by mango queries, using cluster quorum
    * `mango.results_returned`: number of rows returned by mango queries
    * `mango.query_time`: histogram of the length of time processing mango queries
    * `mango.too_many_docs_scanned`: number of mango queries that generated an index scan warning
   
   ## Testing recommendations
   
   Make some Mango queries and call `_node/_local/_stats/mango` to view the metrics.
   
   ## Related Issues or Pull Requests
   
    * Prior art: https://github.com/apache/couchdb/issues/1913
    * Documentation PR: https://github.com/apache/couchdb-documentation/pull/469
   
   ## Checklist
   
   - [ ] Code is written and works correctly
   - [ ] Changes are covered by tests
   - [ ] Any new configurable parameters are documented in `rel/overlay/etc/default.ini`
   - [ ] A PR for documentation changes has been made in https://github.com/apache/couchdb-documentation
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [couchdb] garrensmith commented on a change in pull request #2410: Mango metrics

Posted by GitBox <gi...@apache.org>.
garrensmith commented on a change in pull request #2410: Mango metrics
URL: https://github.com/apache/couchdb/pull/2410#discussion_r364258212
 
 

 ##########
 File path: src/mango/src/mango_cursor.erl
 ##########
 @@ -146,44 +146,87 @@ group_indexes_by_type(Indexes) ->
     end, ?CURSOR_MODULES).
 
 
-maybe_add_warning(UserFun, #cursor{index = Index, opts = Opts}, UserAcc) ->
-    NoIndexWarning = case Index#idx.type of
-        <<"special">> ->
-            <<"no matching index found, create an index to optimize query time">>;
+% warn if the _all_docs index was used to fulfil a query
+no_index_warning(#idx{type = Type}) when Type =:= <<"special">> ->
+    couch_stats:increment_counter([mango, unindexed_queries]),
+    [<<"No matching index found, create an index to optimize query time.">>];
+no_index_warning(_) ->
+    [].
+
+
+% warn if user specified an index which doesn't exist or isn't valid
+% for the selector.
+% In this scenario, Mango will ignore the index hint and auto-select an index.
+invalid_index_warning(Index, Opts) ->
+    UseIndex = lists:keyfind(use_index, 1, Opts),
+    invalid_index_warning_int(Index, UseIndex).
+
+
+invalid_index_warning_int(Index, {use_index, [DesignId]}) ->
+    case filter_indexes([Index], DesignId) of
+        [] ->
+            couch_stats:increment_counter([mango, query_invalid_index]),
+            Reason = fmt("_design/~s was not used because it does not contain a valid index for this query.",
+                [ddoc_name(DesignId)]),
+            [Reason];
         _ ->
-            ok
-    end,
+            []
+    end;
+invalid_index_warning_int(Index, {use_index, [DesignId, ViewName]}) ->
+    case filter_indexes([Index], DesignId, ViewName) of
+        [] ->
+            couch_stats:increment_counter([mango, query_invalid_index]),
+            Reason = fmt("_design/~s, ~s was not used because it is not a valid index for this query.",
+                [ddoc_name(DesignId), ViewName]),
+            [Reason];
+        _ ->
+            []
+    end;
+invalid_index_warning_int(_, _) ->
+    [].
 
-    UseIndexInvalidWarning = case lists:keyfind(use_index, 1, Opts) of
-        {use_index, []} ->
-            NoIndexWarning;
-        {use_index, [DesignId]} ->
-            case filter_indexes([Index], DesignId) of
-                [] ->
-                    fmt("_design/~s was not used because it does not contain a valid index for this query.", 
-                        [ddoc_name(DesignId)]);
-                _ ->
-                    NoIndexWarning
-            end;
-        {use_index, [DesignId, ViewName]} ->
-            case filter_indexes([Index], DesignId, ViewName) of
-                [] ->
-                    fmt("_design/~s, ~s was not used because it is not a valid index for this query.", 
-                        [ddoc_name(DesignId), ViewName]);
-                _ ->
-                    NoIndexWarning
-            end
-    end,
 
-    maybe_add_warning_int(UseIndexInvalidWarning, UserFun, UserAcc).
+% warn if a large number of documents needed to be scanned per result
+% returned, implying a lot of in-memory filtering
+index_scan_warning(#execution_stats {
+                    totalDocsExamined = Docs,
+                    totalQuorumDocsExamined = DocsQuorum,
+                    resultsReturned = ResultCount
+                }) ->
+    % Docs and DocsQuorum are mutually exclusive so it's safe to sum them
+    DocsScanned = Docs + DocsQuorum,
+    Ratio = index_scan_ratio(DocsScanned, ResultCount),
+    Threshold = config:get("mango", "index_scan_warning_threshold", 10),
+    index_scan_warning_int(Threshold, Ratio).
 
 
-maybe_add_warning_int(ok, _, UserAcc) ->
-   UserAcc;
+index_scan_ratio(DocsScanned, 0) ->
+    DocsScanned;
+index_scan_ratio(DocsScanned, ResultCount) ->
+    DocsScanned / ResultCount.
 
-maybe_add_warning_int(Warning, UserFun, UserAcc) ->
-    couch_stats:increment_counter([mango, unindexed_queries]),
-    Arg = {add_key, warning, Warning},
+
+index_scan_warning_int(Threshold, Ratio) when is_integer(Threshold), Threshold > 0, Ratio > Threshold ->
 
 Review comment:
   Rather make this a case statement in the `index_scan_warning`
   
   ```erlang
   case Threshold > 0 andalso Ratio > Threshold of
     true -> ...error;
     false -> []
   end.
   ```
   
   ```

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [couchdb] willholley commented on issue #2410: Mango metrics

Posted by GitBox <gi...@apache.org>.
willholley commented on issue #2410: Mango metrics
URL: https://github.com/apache/couchdb/pull/2410#issuecomment-572239115
 
 
   thanks for the review @garrensmith. I think the latest push addresses your comments.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [couchdb] garrensmith commented on a change in pull request #2410: Mango metrics

Posted by GitBox <gi...@apache.org>.
garrensmith commented on a change in pull request #2410: Mango metrics
URL: https://github.com/apache/couchdb/pull/2410#discussion_r364259555
 
 

 ##########
 File path: src/mango/src/mango_cursor.erl
 ##########
 @@ -146,44 +146,87 @@ group_indexes_by_type(Indexes) ->
     end, ?CURSOR_MODULES).
 
 
-maybe_add_warning(UserFun, #cursor{index = Index, opts = Opts}, UserAcc) ->
-    NoIndexWarning = case Index#idx.type of
-        <<"special">> ->
-            <<"no matching index found, create an index to optimize query time">>;
+% warn if the _all_docs index was used to fulfil a query
+no_index_warning(#idx{type = Type}) when Type =:= <<"special">> ->
+    couch_stats:increment_counter([mango, unindexed_queries]),
+    [<<"No matching index found, create an index to optimize query time.">>];
+no_index_warning(_) ->
+    [].
+
+
+% warn if user specified an index which doesn't exist or isn't valid
+% for the selector.
+% In this scenario, Mango will ignore the index hint and auto-select an index.
+invalid_index_warning(Index, Opts) ->
+    UseIndex = lists:keyfind(use_index, 1, Opts),
+    invalid_index_warning_int(Index, UseIndex).
+
+
+invalid_index_warning_int(Index, {use_index, [DesignId]}) ->
+    case filter_indexes([Index], DesignId) of
+        [] ->
+            couch_stats:increment_counter([mango, query_invalid_index]),
+            Reason = fmt("_design/~s was not used because it does not contain a valid index for this query.",
+                [ddoc_name(DesignId)]),
+            [Reason];
         _ ->
-            ok
-    end,
+            []
+    end;
+invalid_index_warning_int(Index, {use_index, [DesignId, ViewName]}) ->
+    case filter_indexes([Index], DesignId, ViewName) of
+        [] ->
+            couch_stats:increment_counter([mango, query_invalid_index]),
+            Reason = fmt("_design/~s, ~s was not used because it is not a valid index for this query.",
+                [ddoc_name(DesignId), ViewName]),
+            [Reason];
+        _ ->
+            []
+    end;
+invalid_index_warning_int(_, _) ->
+    [].
 
-    UseIndexInvalidWarning = case lists:keyfind(use_index, 1, Opts) of
-        {use_index, []} ->
-            NoIndexWarning;
-        {use_index, [DesignId]} ->
-            case filter_indexes([Index], DesignId) of
-                [] ->
-                    fmt("_design/~s was not used because it does not contain a valid index for this query.", 
-                        [ddoc_name(DesignId)]);
-                _ ->
-                    NoIndexWarning
-            end;
-        {use_index, [DesignId, ViewName]} ->
-            case filter_indexes([Index], DesignId, ViewName) of
-                [] ->
-                    fmt("_design/~s, ~s was not used because it is not a valid index for this query.", 
-                        [ddoc_name(DesignId), ViewName]);
-                _ ->
-                    NoIndexWarning
-            end
-    end,
 
-    maybe_add_warning_int(UseIndexInvalidWarning, UserFun, UserAcc).
+% warn if a large number of documents needed to be scanned per result
+% returned, implying a lot of in-memory filtering
+index_scan_warning(#execution_stats {
+                    totalDocsExamined = Docs,
+                    totalQuorumDocsExamined = DocsQuorum,
+                    resultsReturned = ResultCount
+                }) ->
+    % Docs and DocsQuorum are mutually exclusive so it's safe to sum them
+    DocsScanned = Docs + DocsQuorum,
+    Ratio = index_scan_ratio(DocsScanned, ResultCount),
+    Threshold = config:get("mango", "index_scan_warning_threshold", 10),
+    index_scan_warning_int(Threshold, Ratio).
 
 
-maybe_add_warning_int(ok, _, UserAcc) ->
-   UserAcc;
+index_scan_ratio(DocsScanned, 0) ->
+    DocsScanned;
+index_scan_ratio(DocsScanned, ResultCount) ->
+    DocsScanned / ResultCount.
 
-maybe_add_warning_int(Warning, UserFun, UserAcc) ->
-    couch_stats:increment_counter([mango, unindexed_queries]),
-    Arg = {add_key, warning, Warning},
+
+index_scan_warning_int(Threshold, Ratio) when is_integer(Threshold), Threshold > 0, Ratio > Threshold ->
+    couch_stats:increment_counter([mango, too_many_docs_scanned]),
+    Reason = <<"The number of documents examined is high in proportion to the number of results returned. Consider adding a more specific index to improve this.">>,
+    [Reason];
+index_scan_warning_int(_, _) ->
+    [].
+
+
+maybe_add_warning(UserFun, #cursor{index = Index, opts = Opts}, Stats, UserAcc) ->
+    W0 = invalid_index_warning(Index, Opts),
+    W1 = no_index_warning(Index),
+    W2 = index_scan_warning(Stats),
+    Warnings = lists:append([W0, W1, W2]),
+    maybe_add_warning_int(Warnings, UserFun, UserAcc).
+
+
+maybe_add_warning_int([], _, UserAcc) ->
+   UserAcc;
+maybe_add_warning_int(Warnings, UserFun, UserAcc) ->
+    WarningStr = lists:join(<<"\n">>, Warnings),
+    Arg = {add_key, warning, WarningStr},
     {_Go, UserAcc0} = UserFun(Arg, UserAcc),
 
 Review comment:
   This should be UserAcc1

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [couchdb] garrensmith commented on a change in pull request #2410: Mango metrics

Posted by GitBox <gi...@apache.org>.
garrensmith commented on a change in pull request #2410: Mango metrics
URL: https://github.com/apache/couchdb/pull/2410#discussion_r364628967
 
 

 ##########
 File path: src/mango/src/mango_cursor.erl
 ##########
 @@ -146,46 +146,86 @@ group_indexes_by_type(Indexes) ->
     end, ?CURSOR_MODULES).
 
 
-maybe_add_warning(UserFun, #cursor{index = Index, opts = Opts}, UserAcc) ->
-    NoIndexWarning = case Index#idx.type of
-        <<"special">> ->
-            <<"no matching index found, create an index to optimize query time">>;
-        _ ->
-            ok
-    end,
+% warn if the _all_docs index was used to fulfil a query
+no_index_warning(#idx{type = Type}) when Type =:= <<"special">> ->
+    couch_stats:increment_counter([mango, unindexed_queries]),
+    [<<"No matching index found, create an index to optimize query time.">>];
+no_index_warning(_) ->
+    [].
 
-    UseIndexInvalidWarning = case lists:keyfind(use_index, 1, Opts) of
-        {use_index, []} ->
-            NoIndexWarning;
-        {use_index, [DesignId]} ->
-            case filter_indexes([Index], DesignId) of
-                [] ->
-                    fmt("_design/~s was not used because it does not contain a valid index for this query.", 
-                        [ddoc_name(DesignId)]);
-                _ ->
-                    NoIndexWarning
-            end;
-        {use_index, [DesignId, ViewName]} ->
-            case filter_indexes([Index], DesignId, ViewName) of
-                [] ->
-                    fmt("_design/~s, ~s was not used because it is not a valid index for this query.", 
-                        [ddoc_name(DesignId), ViewName]);
-                _ ->
-                    NoIndexWarning
-            end
-    end,
 
-    maybe_add_warning_int(UseIndexInvalidWarning, UserFun, UserAcc).
+% warn if user specified an index which doesn't exist or isn't valid
+% for the selector.
+% In this scenario, Mango will ignore the index hint and auto-select an index.
+invalid_index_warning(Index, Opts) ->
+    UseIndex = lists:keyfind(use_index, 1, Opts),
+    invalid_index_warning_int(Index, UseIndex).
 
 
-maybe_add_warning_int(ok, _, UserAcc) ->
-   UserAcc;
+invalid_index_warning_int(Index, {use_index, [DesignId]}) ->
+    case filter_indexes([Index], DesignId) of
 
 Review comment:
   This is fine, but you can, if you want, also write this as:
   
   ```erlang
      filter_indexes([Index], DesignId) /= [] -> []; true ->
         couch_stats:increment_counter([mango, query_invalid_index]),
               Reason = fmt("_design/~s was not used because it does not contain a valid index for this query.",
                   [ddoc_name(DesignId)]),
               [Reason];
           end,
    ```
   Sometimes its nice to use an if statement like that when there really is only 1 branch

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [couchdb] garrensmith commented on a change in pull request #2410: Mango metrics

Posted by GitBox <gi...@apache.org>.
garrensmith commented on a change in pull request #2410: Mango metrics
URL: https://github.com/apache/couchdb/pull/2410#discussion_r364254180
 
 

 ##########
 File path: src/couch/priv/stats_descriptions.cfg
 ##########
 @@ -302,3 +302,35 @@
     {type, counter},
     {desc, <<"number of mango queries that could not use an index">>}
 ]}.
+{[mango, query_invalid_index], [
+    {type, counter},
+    {desc, <<"number of mango queries that generated an invalid index warning">>}
+]}.
+{[mango, too_many_docs_scanned], [
+    {type, counter},
+    {desc, <<"number of mango queries that generated an index scan warning">>}
+}}.
+{[mango, docs_examined], [
+    {type, counter},
+    {desc, <<"number of documents examined by mango queries coordinated by this node">>}
+]}.
+{[mango, quorum_docs_examined], [
+    {type, counter},
+    {desc, <<"number of documents examined by mango queries, using cluster quorum">>}
+]}.
+{[mango, results_returned], [
+    {type, counter},
+    {desc, <<"number of rows returned by mango queries">>}
+]}.
+{[mango, query_time], [
+    {type, histogram},
+    {desc, <<"length of time processing a mango query">>}
+]}.
+{[mango, too_many_docs_scanned], [
 
 Review comment:
   Is this a duplicate of the one on line 309?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [couchdb] wohali commented on issue #2410: Mango metrics

Posted by GitBox <gi...@apache.org>.
wohali commented on issue #2410: Mango metrics
URL: https://github.com/apache/couchdb/pull/2410#issuecomment-575287193
 
 
   Looks nice. If you want this in 3.0, now's the time to merge it.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [couchdb] garrensmith commented on a change in pull request #2410: Mango metrics

Posted by GitBox <gi...@apache.org>.
garrensmith commented on a change in pull request #2410: Mango metrics
URL: https://github.com/apache/couchdb/pull/2410#discussion_r364628178
 
 

 ##########
 File path: src/mango/src/mango_cursor.erl
 ##########
 @@ -146,46 +146,86 @@ group_indexes_by_type(Indexes) ->
     end, ?CURSOR_MODULES).
 
 
-maybe_add_warning(UserFun, #cursor{index = Index, opts = Opts}, UserAcc) ->
-    NoIndexWarning = case Index#idx.type of
-        <<"special">> ->
-            <<"no matching index found, create an index to optimize query time">>;
-        _ ->
-            ok
-    end,
+% warn if the _all_docs index was used to fulfil a query
+no_index_warning(#idx{type = Type}) when Type =:= <<"special">> ->
+    couch_stats:increment_counter([mango, unindexed_queries]),
+    [<<"No matching index found, create an index to optimize query time.">>];
+no_index_warning(_) ->
+    [].
 
-    UseIndexInvalidWarning = case lists:keyfind(use_index, 1, Opts) of
-        {use_index, []} ->
-            NoIndexWarning;
-        {use_index, [DesignId]} ->
-            case filter_indexes([Index], DesignId) of
-                [] ->
-                    fmt("_design/~s was not used because it does not contain a valid index for this query.", 
-                        [ddoc_name(DesignId)]);
-                _ ->
-                    NoIndexWarning
-            end;
-        {use_index, [DesignId, ViewName]} ->
-            case filter_indexes([Index], DesignId, ViewName) of
-                [] ->
-                    fmt("_design/~s, ~s was not used because it is not a valid index for this query.", 
-                        [ddoc_name(DesignId), ViewName]);
-                _ ->
-                    NoIndexWarning
-            end
-    end,
 
-    maybe_add_warning_int(UseIndexInvalidWarning, UserFun, UserAcc).
+% warn if user specified an index which doesn't exist or isn't valid
+% for the selector.
+% In this scenario, Mango will ignore the index hint and auto-select an index.
+invalid_index_warning(Index, Opts) ->
+    UseIndex = lists:keyfind(use_index, 1, Opts),
+    invalid_index_warning_int(Index, UseIndex).
 
 
-maybe_add_warning_int(ok, _, UserAcc) ->
-   UserAcc;
+invalid_index_warning_int(Index, {use_index, [DesignId]}) ->
+    case filter_indexes([Index], DesignId) of
+        [] ->
+            couch_stats:increment_counter([mango, query_invalid_index]),
+            Reason = fmt("_design/~s was not used because it does not contain a valid index for this query.",
+                [ddoc_name(DesignId)]),
+            [Reason];
+        _ ->
+            []
+    end;
 
 Review comment:
   Sorry, super nitpicky, but can you add a space between each function here. 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [couchdb] willholley merged pull request #2410: Mango metrics

Posted by GitBox <gi...@apache.org>.
willholley merged pull request #2410: Mango metrics
URL: https://github.com/apache/couchdb/pull/2410
 
 
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [couchdb] garrensmith commented on a change in pull request #2410: Mango metrics

Posted by GitBox <gi...@apache.org>.
garrensmith commented on a change in pull request #2410: Mango metrics
URL: https://github.com/apache/couchdb/pull/2410#discussion_r364629799
 
 

 ##########
 File path: src/mango/src/mango_cursor.erl
 ##########
 @@ -146,46 +146,86 @@ group_indexes_by_type(Indexes) ->
     end, ?CURSOR_MODULES).
 
 
-maybe_add_warning(UserFun, #cursor{index = Index, opts = Opts}, UserAcc) ->
-    NoIndexWarning = case Index#idx.type of
-        <<"special">> ->
-            <<"no matching index found, create an index to optimize query time">>;
-        _ ->
-            ok
-    end,
+% warn if the _all_docs index was used to fulfil a query
+no_index_warning(#idx{type = Type}) when Type =:= <<"special">> ->
+    couch_stats:increment_counter([mango, unindexed_queries]),
+    [<<"No matching index found, create an index to optimize query time.">>];
+no_index_warning(_) ->
+    [].
 
-    UseIndexInvalidWarning = case lists:keyfind(use_index, 1, Opts) of
-        {use_index, []} ->
-            NoIndexWarning;
-        {use_index, [DesignId]} ->
-            case filter_indexes([Index], DesignId) of
-                [] ->
-                    fmt("_design/~s was not used because it does not contain a valid index for this query.", 
-                        [ddoc_name(DesignId)]);
-                _ ->
-                    NoIndexWarning
-            end;
-        {use_index, [DesignId, ViewName]} ->
-            case filter_indexes([Index], DesignId, ViewName) of
-                [] ->
-                    fmt("_design/~s, ~s was not used because it is not a valid index for this query.", 
-                        [ddoc_name(DesignId), ViewName]);
-                _ ->
-                    NoIndexWarning
-            end
-    end,
 
-    maybe_add_warning_int(UseIndexInvalidWarning, UserFun, UserAcc).
+% warn if user specified an index which doesn't exist or isn't valid
+% for the selector.
+% In this scenario, Mango will ignore the index hint and auto-select an index.
+invalid_index_warning(Index, Opts) ->
+    UseIndex = lists:keyfind(use_index, 1, Opts),
+    invalid_index_warning_int(Index, UseIndex).
 
 
-maybe_add_warning_int(ok, _, UserAcc) ->
-   UserAcc;
+invalid_index_warning_int(Index, {use_index, [DesignId]}) ->
+    case filter_indexes([Index], DesignId) of
+        [] ->
+            couch_stats:increment_counter([mango, query_invalid_index]),
+            Reason = fmt("_design/~s was not used because it does not contain a valid index for this query.",
+                [ddoc_name(DesignId)]),
+            [Reason];
+        _ ->
+            []
+    end;
+invalid_index_warning_int(Index, {use_index, [DesignId, ViewName]}) ->
+    case filter_indexes([Index], DesignId, ViewName) of
+        [] ->
+            couch_stats:increment_counter([mango, query_invalid_index]),
+            Reason = fmt("_design/~s, ~s was not used because it is not a valid index for this query.",
+                [ddoc_name(DesignId), ViewName]),
+            [Reason];
+        _ ->
+            []
+    end;
+invalid_index_warning_int(_, _) ->
+    [].
+
+
+% warn if a large number of documents needed to be scanned per result
+% returned, implying a lot of in-memory filtering
+index_scan_warning(#execution_stats {
+                    totalDocsExamined = Docs,
+                    totalQuorumDocsExamined = DocsQuorum,
+                    resultsReturned = ResultCount
+                }) ->
+    % Docs and DocsQuorum are mutually exclusive so it's safe to sum them
+    DocsScanned = Docs + DocsQuorum,
+    Ratio = calculate_index_scan_ratio(DocsScanned, ResultCount),
+    Threshold = config:get_integer("mango", "index_scan_warning_threshold", 10),
+    case Threshold > 0 andalso Ratio > Threshold of
+        true ->
+            couch_stats:increment_counter([mango, too_many_docs_scanned]),
+            Reason = <<"The number of documents examined is high in proportion to the number of results returned. Consider adding a more specific index to improve this.">>,
+            [Reason];
+        false -> []
+    end.
 
-maybe_add_warning_int(Warning, UserFun, UserAcc) ->
-    couch_stats:increment_counter([mango, unindexed_queries]),
-    Arg = {add_key, warning, Warning},
-    {_Go, UserAcc0} = UserFun(Arg, UserAcc),
-    UserAcc0.
+
+calculate_index_scan_ratio(DocsScanned, 0) ->
+    DocsScanned;
+calculate_index_scan_ratio(DocsScanned, ResultCount) ->
+    DocsScanned / ResultCount.
+
+
+maybe_add_warning(UserFun, #cursor{index = Index, opts = Opts}, Stats, UserAcc) ->
 
 Review comment:
   Can you move this to the top so it follows emilio https://github.com/cloudant-labs/emilio/blob/master/priv/documentation/420/description.md

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [couchdb] garrensmith commented on a change in pull request #2410: Mango metrics

Posted by GitBox <gi...@apache.org>.
garrensmith commented on a change in pull request #2410: Mango metrics
URL: https://github.com/apache/couchdb/pull/2410#discussion_r364259879
 
 

 ##########
 File path: src/mango/src/mango_cursor.erl
 ##########
 @@ -146,44 +146,87 @@ group_indexes_by_type(Indexes) ->
     end, ?CURSOR_MODULES).
 
 
-maybe_add_warning(UserFun, #cursor{index = Index, opts = Opts}, UserAcc) ->
-    NoIndexWarning = case Index#idx.type of
-        <<"special">> ->
-            <<"no matching index found, create an index to optimize query time">>;
+% warn if the _all_docs index was used to fulfil a query
+no_index_warning(#idx{type = Type}) when Type =:= <<"special">> ->
+    couch_stats:increment_counter([mango, unindexed_queries]),
+    [<<"No matching index found, create an index to optimize query time.">>];
+no_index_warning(_) ->
+    [].
+
+
+% warn if user specified an index which doesn't exist or isn't valid
+% for the selector.
+% In this scenario, Mango will ignore the index hint and auto-select an index.
+invalid_index_warning(Index, Opts) ->
+    UseIndex = lists:keyfind(use_index, 1, Opts),
+    invalid_index_warning_int(Index, UseIndex).
+
+
+invalid_index_warning_int(Index, {use_index, [DesignId]}) ->
+    case filter_indexes([Index], DesignId) of
+        [] ->
+            couch_stats:increment_counter([mango, query_invalid_index]),
+            Reason = fmt("_design/~s was not used because it does not contain a valid index for this query.",
+                [ddoc_name(DesignId)]),
+            [Reason];
         _ ->
-            ok
-    end,
+            []
+    end;
+invalid_index_warning_int(Index, {use_index, [DesignId, ViewName]}) ->
+    case filter_indexes([Index], DesignId, ViewName) of
+        [] ->
+            couch_stats:increment_counter([mango, query_invalid_index]),
+            Reason = fmt("_design/~s, ~s was not used because it is not a valid index for this query.",
+                [ddoc_name(DesignId), ViewName]),
+            [Reason];
+        _ ->
+            []
+    end;
+invalid_index_warning_int(_, _) ->
+    [].
 
-    UseIndexInvalidWarning = case lists:keyfind(use_index, 1, Opts) of
-        {use_index, []} ->
-            NoIndexWarning;
-        {use_index, [DesignId]} ->
-            case filter_indexes([Index], DesignId) of
-                [] ->
-                    fmt("_design/~s was not used because it does not contain a valid index for this query.", 
-                        [ddoc_name(DesignId)]);
-                _ ->
-                    NoIndexWarning
-            end;
-        {use_index, [DesignId, ViewName]} ->
-            case filter_indexes([Index], DesignId, ViewName) of
-                [] ->
-                    fmt("_design/~s, ~s was not used because it is not a valid index for this query.", 
-                        [ddoc_name(DesignId), ViewName]);
-                _ ->
-                    NoIndexWarning
-            end
-    end,
 
-    maybe_add_warning_int(UseIndexInvalidWarning, UserFun, UserAcc).
+% warn if a large number of documents needed to be scanned per result
+% returned, implying a lot of in-memory filtering
+index_scan_warning(#execution_stats {
+                    totalDocsExamined = Docs,
+                    totalQuorumDocsExamined = DocsQuorum,
+                    resultsReturned = ResultCount
+                }) ->
+    % Docs and DocsQuorum are mutually exclusive so it's safe to sum them
+    DocsScanned = Docs + DocsQuorum,
+    Ratio = index_scan_ratio(DocsScanned, ResultCount),
+    Threshold = config:get("mango", "index_scan_warning_threshold", 10),
+    index_scan_warning_int(Threshold, Ratio).
 
 
-maybe_add_warning_int(ok, _, UserAcc) ->
-   UserAcc;
+index_scan_ratio(DocsScanned, 0) ->
+    DocsScanned;
+index_scan_ratio(DocsScanned, ResultCount) ->
+    DocsScanned / ResultCount.
 
-maybe_add_warning_int(Warning, UserFun, UserAcc) ->
-    couch_stats:increment_counter([mango, unindexed_queries]),
-    Arg = {add_key, warning, Warning},
+
+index_scan_warning_int(Threshold, Ratio) when is_integer(Threshold), Threshold > 0, Ratio > Threshold ->
+    couch_stats:increment_counter([mango, too_many_docs_scanned]),
+    Reason = <<"The number of documents examined is high in proportion to the number of results returned. Consider adding a more specific index to improve this.">>,
+    [Reason];
+index_scan_warning_int(_, _) ->
+    [].
+
+
+maybe_add_warning(UserFun, #cursor{index = Index, opts = Opts}, Stats, UserAcc) ->
+    W0 = invalid_index_warning(Index, Opts),
+    W1 = no_index_warning(Index),
+    W2 = index_scan_warning(Stats),
+    Warnings = lists:append([W0, W1, W2]),
+    maybe_add_warning_int(Warnings, UserFun, UserAcc).
 
 Review comment:
   I'm kind of inclined to just make this a case statement rather than two functions. 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [couchdb] garrensmith commented on a change in pull request #2410: Mango metrics

Posted by GitBox <gi...@apache.org>.
garrensmith commented on a change in pull request #2410: Mango metrics
URL: https://github.com/apache/couchdb/pull/2410#discussion_r364260299
 
 

 ##########
 File path: src/mango/src/mango_cursor.erl
 ##########
 @@ -146,44 +146,87 @@ group_indexes_by_type(Indexes) ->
     end, ?CURSOR_MODULES).
 
 
-maybe_add_warning(UserFun, #cursor{index = Index, opts = Opts}, UserAcc) ->
-    NoIndexWarning = case Index#idx.type of
-        <<"special">> ->
-            <<"no matching index found, create an index to optimize query time">>;
+% warn if the _all_docs index was used to fulfil a query
+no_index_warning(#idx{type = Type}) when Type =:= <<"special">> ->
+    couch_stats:increment_counter([mango, unindexed_queries]),
+    [<<"No matching index found, create an index to optimize query time.">>];
+no_index_warning(_) ->
+    [].
+
+
+% warn if user specified an index which doesn't exist or isn't valid
+% for the selector.
+% In this scenario, Mango will ignore the index hint and auto-select an index.
+invalid_index_warning(Index, Opts) ->
+    UseIndex = lists:keyfind(use_index, 1, Opts),
+    invalid_index_warning_int(Index, UseIndex).
+
+
+invalid_index_warning_int(Index, {use_index, [DesignId]}) ->
+    case filter_indexes([Index], DesignId) of
+        [] ->
+            couch_stats:increment_counter([mango, query_invalid_index]),
+            Reason = fmt("_design/~s was not used because it does not contain a valid index for this query.",
+                [ddoc_name(DesignId)]),
+            [Reason];
         _ ->
-            ok
-    end,
+            []
+    end;
+invalid_index_warning_int(Index, {use_index, [DesignId, ViewName]}) ->
+    case filter_indexes([Index], DesignId, ViewName) of
+        [] ->
+            couch_stats:increment_counter([mango, query_invalid_index]),
+            Reason = fmt("_design/~s, ~s was not used because it is not a valid index for this query.",
+                [ddoc_name(DesignId), ViewName]),
+            [Reason];
+        _ ->
+            []
+    end;
+invalid_index_warning_int(_, _) ->
+    [].
 
-    UseIndexInvalidWarning = case lists:keyfind(use_index, 1, Opts) of
-        {use_index, []} ->
-            NoIndexWarning;
-        {use_index, [DesignId]} ->
-            case filter_indexes([Index], DesignId) of
-                [] ->
-                    fmt("_design/~s was not used because it does not contain a valid index for this query.", 
-                        [ddoc_name(DesignId)]);
-                _ ->
-                    NoIndexWarning
-            end;
-        {use_index, [DesignId, ViewName]} ->
-            case filter_indexes([Index], DesignId, ViewName) of
-                [] ->
-                    fmt("_design/~s, ~s was not used because it is not a valid index for this query.", 
-                        [ddoc_name(DesignId), ViewName]);
-                _ ->
-                    NoIndexWarning
-            end
-    end,
 
-    maybe_add_warning_int(UseIndexInvalidWarning, UserFun, UserAcc).
+% warn if a large number of documents needed to be scanned per result
+% returned, implying a lot of in-memory filtering
+index_scan_warning(#execution_stats {
+                    totalDocsExamined = Docs,
+                    totalQuorumDocsExamined = DocsQuorum,
+                    resultsReturned = ResultCount
+                }) ->
+    % Docs and DocsQuorum are mutually exclusive so it's safe to sum them
+    DocsScanned = Docs + DocsQuorum,
+    Ratio = index_scan_ratio(DocsScanned, ResultCount),
+    Threshold = config:get("mango", "index_scan_warning_threshold", 10),
+    index_scan_warning_int(Threshold, Ratio).
 
 
-maybe_add_warning_int(ok, _, UserAcc) ->
-   UserAcc;
+index_scan_ratio(DocsScanned, 0) ->
+    DocsScanned;
+index_scan_ratio(DocsScanned, ResultCount) ->
+    DocsScanned / ResultCount.
 
-maybe_add_warning_int(Warning, UserFun, UserAcc) ->
-    couch_stats:increment_counter([mango, unindexed_queries]),
-    Arg = {add_key, warning, Warning},
+
+index_scan_warning_int(Threshold, Ratio) when is_integer(Threshold), Threshold > 0, Ratio > Threshold ->
+    couch_stats:increment_counter([mango, too_many_docs_scanned]),
+    Reason = <<"The number of documents examined is high in proportion to the number of results returned. Consider adding a more specific index to improve this.">>,
+    [Reason];
+index_scan_warning_int(_, _) ->
+    [].
+
+
+maybe_add_warning(UserFun, #cursor{index = Index, opts = Opts}, Stats, UserAcc) ->
+    W0 = invalid_index_warning(Index, Opts),
+    W1 = no_index_warning(Index),
+    W2 = index_scan_warning(Stats),
+    Warnings = lists:append([W0, W1, W2]),
+    maybe_add_warning_int(Warnings, UserFun, UserAcc).
+
+
+maybe_add_warning_int([], _, UserAcc) ->
 
 Review comment:
   Can you move this function above all the functions that it calls. That makes it easier to follow.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [couchdb] garrensmith commented on a change in pull request #2410: Mango metrics

Posted by GitBox <gi...@apache.org>.
garrensmith commented on a change in pull request #2410: Mango metrics
URL: https://github.com/apache/couchdb/pull/2410#discussion_r364256652
 
 

 ##########
 File path: src/mango/src/mango_cursor.erl
 ##########
 @@ -146,44 +146,87 @@ group_indexes_by_type(Indexes) ->
     end, ?CURSOR_MODULES).
 
 
-maybe_add_warning(UserFun, #cursor{index = Index, opts = Opts}, UserAcc) ->
-    NoIndexWarning = case Index#idx.type of
-        <<"special">> ->
-            <<"no matching index found, create an index to optimize query time">>;
+% warn if the _all_docs index was used to fulfil a query
+no_index_warning(#idx{type = Type}) when Type =:= <<"special">> ->
+    couch_stats:increment_counter([mango, unindexed_queries]),
+    [<<"No matching index found, create an index to optimize query time.">>];
+no_index_warning(_) ->
+    [].
+
+
+% warn if user specified an index which doesn't exist or isn't valid
+% for the selector.
+% In this scenario, Mango will ignore the index hint and auto-select an index.
+invalid_index_warning(Index, Opts) ->
+    UseIndex = lists:keyfind(use_index, 1, Opts),
+    invalid_index_warning_int(Index, UseIndex).
+
+
+invalid_index_warning_int(Index, {use_index, [DesignId]}) ->
+    case filter_indexes([Index], DesignId) of
+        [] ->
+            couch_stats:increment_counter([mango, query_invalid_index]),
+            Reason = fmt("_design/~s was not used because it does not contain a valid index for this query.",
+                [ddoc_name(DesignId)]),
+            [Reason];
         _ ->
-            ok
-    end,
+            []
+    end;
+invalid_index_warning_int(Index, {use_index, [DesignId, ViewName]}) ->
+    case filter_indexes([Index], DesignId, ViewName) of
+        [] ->
+            couch_stats:increment_counter([mango, query_invalid_index]),
+            Reason = fmt("_design/~s, ~s was not used because it is not a valid index for this query.",
+                [ddoc_name(DesignId), ViewName]),
+            [Reason];
+        _ ->
+            []
+    end;
+invalid_index_warning_int(_, _) ->
+    [].
 
-    UseIndexInvalidWarning = case lists:keyfind(use_index, 1, Opts) of
-        {use_index, []} ->
-            NoIndexWarning;
-        {use_index, [DesignId]} ->
-            case filter_indexes([Index], DesignId) of
-                [] ->
-                    fmt("_design/~s was not used because it does not contain a valid index for this query.", 
-                        [ddoc_name(DesignId)]);
-                _ ->
-                    NoIndexWarning
-            end;
-        {use_index, [DesignId, ViewName]} ->
-            case filter_indexes([Index], DesignId, ViewName) of
-                [] ->
-                    fmt("_design/~s, ~s was not used because it is not a valid index for this query.", 
-                        [ddoc_name(DesignId), ViewName]);
-                _ ->
-                    NoIndexWarning
-            end
-    end,
 
-    maybe_add_warning_int(UseIndexInvalidWarning, UserFun, UserAcc).
+% warn if a large number of documents needed to be scanned per result
+% returned, implying a lot of in-memory filtering
+index_scan_warning(#execution_stats {
+                    totalDocsExamined = Docs,
+                    totalQuorumDocsExamined = DocsQuorum,
+                    resultsReturned = ResultCount
+                }) ->
+    % Docs and DocsQuorum are mutually exclusive so it's safe to sum them
+    DocsScanned = Docs + DocsQuorum,
+    Ratio = index_scan_ratio(DocsScanned, ResultCount),
+    Threshold = config:get("mango", "index_scan_warning_threshold", 10),
+    index_scan_warning_int(Threshold, Ratio).
 
 
-maybe_add_warning_int(ok, _, UserAcc) ->
-   UserAcc;
+index_scan_ratio(DocsScanned, 0) ->
 
 Review comment:
   can you change this to `calculate_index_scan_ratio`

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [couchdb] garrensmith commented on a change in pull request #2410: Mango metrics

Posted by GitBox <gi...@apache.org>.
garrensmith commented on a change in pull request #2410: Mango metrics
URL: https://github.com/apache/couchdb/pull/2410#discussion_r364257833
 
 

 ##########
 File path: src/mango/src/mango_cursor.erl
 ##########
 @@ -146,44 +146,87 @@ group_indexes_by_type(Indexes) ->
     end, ?CURSOR_MODULES).
 
 
-maybe_add_warning(UserFun, #cursor{index = Index, opts = Opts}, UserAcc) ->
-    NoIndexWarning = case Index#idx.type of
-        <<"special">> ->
-            <<"no matching index found, create an index to optimize query time">>;
+% warn if the _all_docs index was used to fulfil a query
+no_index_warning(#idx{type = Type}) when Type =:= <<"special">> ->
+    couch_stats:increment_counter([mango, unindexed_queries]),
+    [<<"No matching index found, create an index to optimize query time.">>];
+no_index_warning(_) ->
+    [].
+
+
+% warn if user specified an index which doesn't exist or isn't valid
+% for the selector.
+% In this scenario, Mango will ignore the index hint and auto-select an index.
+invalid_index_warning(Index, Opts) ->
+    UseIndex = lists:keyfind(use_index, 1, Opts),
+    invalid_index_warning_int(Index, UseIndex).
+
+
+invalid_index_warning_int(Index, {use_index, [DesignId]}) ->
+    case filter_indexes([Index], DesignId) of
+        [] ->
+            couch_stats:increment_counter([mango, query_invalid_index]),
+            Reason = fmt("_design/~s was not used because it does not contain a valid index for this query.",
+                [ddoc_name(DesignId)]),
+            [Reason];
         _ ->
-            ok
-    end,
+            []
+    end;
+invalid_index_warning_int(Index, {use_index, [DesignId, ViewName]}) ->
+    case filter_indexes([Index], DesignId, ViewName) of
+        [] ->
+            couch_stats:increment_counter([mango, query_invalid_index]),
+            Reason = fmt("_design/~s, ~s was not used because it is not a valid index for this query.",
+                [ddoc_name(DesignId), ViewName]),
+            [Reason];
+        _ ->
+            []
+    end;
+invalid_index_warning_int(_, _) ->
+    [].
 
-    UseIndexInvalidWarning = case lists:keyfind(use_index, 1, Opts) of
-        {use_index, []} ->
-            NoIndexWarning;
-        {use_index, [DesignId]} ->
-            case filter_indexes([Index], DesignId) of
-                [] ->
-                    fmt("_design/~s was not used because it does not contain a valid index for this query.", 
-                        [ddoc_name(DesignId)]);
-                _ ->
-                    NoIndexWarning
-            end;
-        {use_index, [DesignId, ViewName]} ->
-            case filter_indexes([Index], DesignId, ViewName) of
-                [] ->
-                    fmt("_design/~s, ~s was not used because it is not a valid index for this query.", 
-                        [ddoc_name(DesignId), ViewName]);
-                _ ->
-                    NoIndexWarning
-            end
-    end,
 
-    maybe_add_warning_int(UseIndexInvalidWarning, UserFun, UserAcc).
+% warn if a large number of documents needed to be scanned per result
+% returned, implying a lot of in-memory filtering
+index_scan_warning(#execution_stats {
+                    totalDocsExamined = Docs,
+                    totalQuorumDocsExamined = DocsQuorum,
+                    resultsReturned = ResultCount
+                }) ->
+    % Docs and DocsQuorum are mutually exclusive so it's safe to sum them
+    DocsScanned = Docs + DocsQuorum,
+    Ratio = index_scan_ratio(DocsScanned, ResultCount),
+    Threshold = config:get("mango", "index_scan_warning_threshold", 10),
 
 Review comment:
   There is a `config:get_integer` you can use here.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [couchdb] garrensmith commented on a change in pull request #2410: Mango metrics

Posted by GitBox <gi...@apache.org>.
garrensmith commented on a change in pull request #2410: Mango metrics
URL: https://github.com/apache/couchdb/pull/2410#discussion_r364261490
 
 

 ##########
 File path: src/mango/src/mango_execution_stats.erl
 ##########
 @@ -82,9 +83,11 @@ log_end(Stats) ->
 
 
 maybe_add_stats(Opts, UserFun, Stats, UserAcc) ->
+    Stats0 = log_end(Stats),
 
 Review comment:
   A variable with a 0 is lower that Stats. So you could make the first variable of Stats as Stats0 eg:
   
   ```
   maybe_add_stats(Opts, UserFun, Stats0, UserAcc) ->
        Stats = log_end(Stats0), 
   ```
   Is how its typically done in the CouchDB codebase

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services