You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by wi...@apache.org on 2020/01/07 09:19:17 UTC

[couchdb] 01/01: Warn on mango index scan

This is an automated email from the ASF dual-hosted git repository.

willholley pushed a commit to branch mango_metrics
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 051f09e960433eb19eb696a72ad11e2415b7e345
Author: Will Holley <wi...@gmail.com>
AuthorDate: Mon Jan 6 13:22:07 2020 +0000

    Warn on mango index scan
    
    Adds a warning to the _find endpoint if the ratio of docs scanned:
    results returned is higher than a configurable threshold (default
    10). This warning was previously generated in Fauxton; moving
    it to the server side allows us to expose it via _stats as well.
---
 rel/overlay/etc/default.ini           | 20 ++++++++++++--------
 src/couch/priv/stats_descriptions.cfg |  4 ++++
 src/mango/src/mango_cursor.erl        | 34 +++++++++++++++++++++++++++++++---
 src/mango/src/mango_cursor_text.erl   |  2 +-
 src/mango/src/mango_cursor_view.erl   |  2 +-
 5 files changed, 49 insertions(+), 13 deletions(-)

diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini
index a1df080..ab67ec8 100644
--- a/rel/overlay/etc/default.ini
+++ b/rel/overlay/etc/default.ini
@@ -167,8 +167,8 @@ enable_xframe_options = false
 ; CouchDB can optionally enforce a maximum uri length;
 ; max_uri_length = 8000
 ; changes_timeout = 60000
-; config_whitelist = 
-; max_uri_length = 
+; config_whitelist =
+; max_uri_length =
 ; rewrite_limit = 100
 ; x_forwarded_host = X-Forwarded-Host
 ; x_forwarded_proto = X-Forwarded-Proto
@@ -177,7 +177,7 @@ enable_xframe_options = false
 max_http_request_size = 4294967296 ; 4GB
 
 ; [httpd_design_handlers]
-; _view = 
+; _view =
 
 ; [ioq]
 ; concurrency = 10
@@ -191,7 +191,7 @@ port = 6984
 
 ; [chttpd_auth_cache]
 ; max_lifetime = 600000
-; max_objects = 
+; max_objects =
 ; max_size = 104857600
 
 ; [mem3]
@@ -202,7 +202,7 @@ port = 6984
 
 ; [fabric]
 ; all_docs_concurrency = 10
-; changes_duration = 
+; changes_duration =
 ; shard_timeout_factor = 2
 ; uuid_prefix_len = 7
 ; request_timeout = 60000
@@ -240,7 +240,7 @@ iterations = 10 ; iterations for password hashing
 ; proxy_use_secret = false
 ; comma-separated list of public fields, 404 if empty
 ; public_fields =
-; secret = 
+; secret =
 ; users_db_public = false
 ; cookie_domain = example.com
 
@@ -313,12 +313,16 @@ os_process_limit = 100
 ;index_all_disabled = false
 ; Default limit value for mango _find queries.
 ;default_limit = 25
+; Ratio between documents scanned and results matched that will
+; generate a warning in the _find response. Setting this to 0 disables
+; the warning.
+;index_scan_warning_threshold = 10
 
 [indexers]
 couch_mrview = true
 
 [feature_flags]
-; This enables any database to be created as a partitioned databases (except system db's). 
+; This enables any database to be created as a partitioned databases (except system db's).
 ; Setting this to false will stop the creation of paritioned databases.
 ; paritioned||allowed* = true will scope the creation of partitioned databases
 ; to databases with 'allowed' prefix.
@@ -527,7 +531,7 @@ min_priority = 2.0
 ; The default number of results returned from a search on a partition
 ; of a database.
 ; limit_partitions = 2000
- 
+
 ; The maximum number of results that can be returned from a global
 ; search query (or any search query on a database without user-defined
 ; partitions). Attempts to set ?limit=N higher than this value will
diff --git a/src/couch/priv/stats_descriptions.cfg b/src/couch/priv/stats_descriptions.cfg
index d333ea4..f4ff990 100644
--- a/src/couch/priv/stats_descriptions.cfg
+++ b/src/couch/priv/stats_descriptions.cfg
@@ -322,3 +322,7 @@
     {type, histogram},
     {desc, <<"length of time processing a mango query">>}
 ]}.
+{[mango, too_many_docs_scanned], [
+    {type, counter},
+    {desc, <<"number of mango queries that generated an index scan warning">>}
+]}.
diff --git a/src/mango/src/mango_cursor.erl b/src/mango/src/mango_cursor.erl
index 6161270..d0b44c1 100644
--- a/src/mango/src/mango_cursor.erl
+++ b/src/mango/src/mango_cursor.erl
@@ -19,7 +19,7 @@
     execute/3,
     maybe_filter_indexes_by_ddoc/2,
     remove_indexes_with_partial_filter_selector/1,
-    maybe_add_warning/3
+    maybe_add_warning/4
 ]).
 
 
@@ -186,16 +186,44 @@ invalid_index_warning_int(_, _) ->
     [].
 
 
+% warn if a large number of documents needed to be scanned per result
+% returned, implying a lot of in-memory filtering
+index_scan_warning(#execution_stats {
+                    totalDocsExamined = Docs,
+                    totalQuorumDocsExamined = DocsQuorum,
+                    resultsReturned = ResultCount
+                }) ->
+    % Docs and DocsQuorum are mutually exclusive so it's safe to sum them
+    DocsScanned = Docs + DocsQuorum,
+    Ratio = index_scan_ratio(DocsScanned, ResultCount),
+    Threshold = config:get("mango", "index_scan_warning_threshold", 10),
+    index_scan_warning_int(Threshold, Ratio).
+
+
+index_scan_ratio(DocsScanned, 0) ->
+    DocsScanned;
+index_scan_ratio(DocsScanned, ResultCount) ->
+    DocsScanned / ResultCount.
+
+
+index_scan_warning_int(Threshold, Ratio) when is_integer(Threshold), Threshold > 0, Ratio > Threshold ->
+    couch_stats:increment_counter([mango, too_many_docs_scanned]),
+    Reason = <<"the number of documents examined is high in proportion to the number of results returned. Consider adding a more specific index to improve this.">>,
+    [Reason];
+index_scan_warning_int(_, _) ->
+    [].
+
+
 maybe_add_warning(UserFun, #cursor{index = Index, opts = Opts}, Stats, UserAcc) ->
     W0 = invalid_index_warning(Index, Opts),
     W1 = no_index_warning(Index),
-    Warnings = lists:append([W0, W1]),
+    W2 = index_scan_warning(Stats),
+    Warnings = lists:append([W0, W1, W2]),
     maybe_add_warning_int(Warnings, UserFun, UserAcc).
 
 
 maybe_add_warning_int([], _, UserAcc) ->
    UserAcc;
-
 maybe_add_warning_int(Warnings, UserFun, UserAcc) ->
     WarningStr = lists:join(<<"\n">>, Warnings),
     Arg = {add_key, warning, WarningStr},
diff --git a/src/mango/src/mango_cursor_text.erl b/src/mango/src/mango_cursor_text.erl
index 8938f35..9323bcf 100644
--- a/src/mango/src/mango_cursor_text.erl
+++ b/src/mango/src/mango_cursor_text.erl
@@ -126,7 +126,7 @@ execute(Cursor, UserFun, UserAcc) ->
             Arg = {add_key, bookmark, JsonBM},
             {_Go, FinalUserAcc} = UserFun(Arg, LastUserAcc),
             FinalUserAcc0 = mango_execution_stats:maybe_add_stats(Opts, UserFun, Stats0, FinalUserAcc),
-            FinalUserAcc1 = mango_cursor:maybe_add_warning(UserFun, Cursor, FinalUserAcc0),
+            FinalUserAcc1 = mango_cursor:maybe_add_warning(UserFun, Cursor, Stats0, FinalUserAcc0),
             {ok, FinalUserAcc1}
     end.
 
diff --git a/src/mango/src/mango_cursor_view.erl b/src/mango/src/mango_cursor_view.erl
index 9f5e6ec..ceda896 100644
--- a/src/mango/src/mango_cursor_view.erl
+++ b/src/mango/src/mango_cursor_view.erl
@@ -145,7 +145,7 @@ execute(#cursor{db = Db, index = Idx, execution_stats = Stats} = Cursor0, UserFu
                     {_Go, FinalUserAcc} = UserFun(Arg, LastCursor#cursor.user_acc),
                     Stats0 = LastCursor#cursor.execution_stats,
                     FinalUserAcc0 = mango_execution_stats:maybe_add_stats(Opts, UserFun, Stats0, FinalUserAcc),
-                    FinalUserAcc1 = mango_cursor:maybe_add_warning(UserFun, Cursor, FinalUserAcc0),
+                    FinalUserAcc1 = mango_cursor:maybe_add_warning(UserFun, Cursor, Stats0, FinalUserAcc0),
                     {ok, FinalUserAcc1};
                 {error, Reason} ->
                     {error, Reason}