You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by ga...@apache.org on 2018/06/20 19:12:30 UTC

[couchdb] branch master updated: Add constant index fields to sort based on the selector (#1376)

This is an automated email from the ASF dual-hosted git repository.

garren pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/couchdb.git


The following commit(s) were added to refs/heads/master by this push:
     new 5b5c8a1  Add constant index fields to sort based on the selector (#1376)
5b5c8a1 is described below

commit 5b5c8a1dcb196eb461dcab17a52c3c27998c5372
Author: garren smith <ga...@gmail.com>
AuthorDate: Wed Jun 20 21:12:27 2018 +0200

    Add constant index fields to sort based on the selector (#1376)
    
    This moves the sort check into the is_usable function for all indexes.
    For map/reduce indexes it can add any constant fields e.g {a: {"$eq": 4}
    to the prefix of the sort because it won't affect the actual sort but
    will increase the chance that an index is selected. This is a user
    experience fix to help a user if they don't add all the columns for an
    index to the sort fields.
---
 src/mango/src/mango_cursor.erl       |  18 +--
 src/mango/src/mango_idx.erl          |  36 +-----
 src/mango/src/mango_idx_special.erl  |  13 +-
 src/mango/src/mango_idx_view.erl     |  30 ++++-
 src/mango/src/mango_selector.erl     | 113 +++++++++++++++++-
 src/mango/test/02-basic-find-test.py |   7 ++
 src/mango/test/18-json-sort.py       | 222 +++++++++++++++++++++++++++++++++++
 7 files changed, 392 insertions(+), 47 deletions(-)

diff --git a/src/mango/src/mango_cursor.erl b/src/mango/src/mango_cursor.erl
index 5108d36..5d2ea71 100644
--- a/src/mango/src/mango_cursor.erl
+++ b/src/mango/src/mango_cursor.erl
@@ -48,18 +48,12 @@
 create(Db, Selector0, Opts) ->
     Selector = mango_selector:normalize(Selector0),
     UsableIndexes = mango_idx:get_usable_indexes(Db, Selector, Opts),
-    case length(UsableIndexes) of
-        0 ->
-            AllDocs = mango_idx:special(Db),
-            create_cursor(Db, AllDocs, Selector, Opts);
-        _ ->
-            case mango_cursor:maybe_filter_indexes_by_ddoc(UsableIndexes, Opts) of
-                [] ->
-                    % use_index doesn't match a valid index - fall back to a valid one
-                    create_cursor(Db, UsableIndexes, Selector, Opts);
-                UserSpecifiedIndex ->
-                    create_cursor(Db, UserSpecifiedIndex, Selector, Opts)
-            end
+    case mango_cursor:maybe_filter_indexes_by_ddoc(UsableIndexes, Opts) of
+        [] ->
+            % use_index doesn't match a valid index - fall back to a valid one
+            create_cursor(Db, UsableIndexes, Selector, Opts);
+        UserSpecifiedIndex ->
+            create_cursor(Db, UserSpecifiedIndex, Selector, Opts)
     end.
 
 
diff --git a/src/mango/src/mango_idx.erl b/src/mango/src/mango_idx.erl
index ea5949c..8af92b9 100644
--- a/src/mango/src/mango_idx.erl
+++ b/src/mango/src/mango_idx.erl
@@ -66,13 +66,12 @@ get_usable_indexes(Db, Selector, Opts) ->
 
     SortFields = get_sort_fields(Opts),
     UsableFilter = fun(I) -> is_usable(I, Selector, SortFields) end,
-    UsableIndexes1 = lists:filter(UsableFilter, UsableIndexes0),
 
-    case maybe_filter_by_sort_fields(UsableIndexes1, SortFields) of
-        {ok, SortIndexes} -> 
-            SortIndexes;
-        {error, no_usable_index} -> 
-            ?MANGO_ERROR({no_usable_index, missing_sort_index})
+    case lists:filter(UsableFilter, UsableIndexes0) of
+        [] -> 
+            ?MANGO_ERROR({no_usable_index, missing_sort_index});
+        UsableIndexes -> 
+            UsableIndexes
     end.
 
 
@@ -100,31 +99,6 @@ get_sort_fields(Opts) ->
     end.
 
 
-maybe_filter_by_sort_fields(Indexes, []) ->
-    {ok, Indexes};
-
-maybe_filter_by_sort_fields(Indexes, SortFields) ->
-    FilterFun = fun(Idx) ->
-        Cols = mango_idx:columns(Idx),
-        case {mango_idx:type(Idx), Cols} of
-            {_, all_fields} ->
-                true;
-            {<<"text">>, _} ->
-                sets:is_subset(sets:from_list(SortFields), sets:from_list(Cols));
-            {<<"json">>, _} ->
-                lists:prefix(SortFields, Cols);
-            {<<"special">>, _} ->
-                lists:prefix(SortFields, Cols)
-        end
-    end,
-    case lists:filter(FilterFun, Indexes) of
-        [] ->
-            {error, no_usable_index};
-        FilteredIndexes ->
-            {ok, FilteredIndexes}
-    end.
-
-
 new(Db, Opts) ->
     Def = get_idx_def(Opts),
     Type = get_idx_type(Opts),
diff --git a/src/mango/src/mango_idx_special.erl b/src/mango/src/mango_idx_special.erl
index 12da1cb..ac6efc7 100644
--- a/src/mango/src/mango_idx_special.erl
+++ b/src/mango/src/mango_idx_special.erl
@@ -63,9 +63,11 @@ columns(#idx{def=all_docs}) ->
     [<<"_id">>].
 
 
-is_usable(#idx{def=all_docs}, Selector, _) ->
+is_usable(#idx{def=all_docs}, _Selector, []) ->
+    true;
+is_usable(#idx{def=all_docs} = Idx, Selector, SortFields) ->
     Fields = mango_idx_view:indexable_fields(Selector),
-    lists:member(<<"_id">>, Fields).
+    lists:member(<<"_id">>, Fields) and can_use_sort(Idx, SortFields, Selector).
 
 
 start_key([{'$gt', Key, _, _}]) ->
@@ -96,3 +98,10 @@ end_key([{_, _, '$lte', Key}]) ->
 end_key([{'$eq', Key, '$eq', Key}]) ->
     false = mango_json:special(Key),
     Key.
+
+
+can_use_sort(_Idx, [], _Selector) ->
+    true;
+can_use_sort(Idx, SortFields, _Selector) ->
+    Cols = columns(Idx),
+    lists:prefix(SortFields, Cols).
diff --git a/src/mango/src/mango_idx_view.erl b/src/mango/src/mango_idx_view.erl
index 8956b27..2d784b6 100644
--- a/src/mango/src/mango_idx_view.erl
+++ b/src/mango/src/mango_idx_view.erl
@@ -131,7 +131,8 @@ is_usable(Idx, Selector, SortFields) ->
         [<<"_id">>, <<"_rev">>]),
 
     mango_selector:has_required_fields(Selector, RequiredFields2)
-        andalso not is_text_search(Selector).
+        andalso not is_text_search(Selector)
+        andalso can_use_sort(RequiredFields, SortFields, Selector).
 
 
 is_text_search({[]}) ->
@@ -511,3 +512,30 @@ range_pos(Low, Arg, High) ->
                     max
             end
     end.
+
+
+% Can_use_sort works as follows:
+%
+% * no sort fields then we can use this
+% * Run out index columns we can't use this index
+% * If the current column is the start of the sort, return if sort is a prefix
+% * If the current column is constant, drop it and continue, else return false
+%
+% A constant column is a something that won't affect the sort
+% for example A: {$eq: 21}}
+%
+% Currently we only look at constant fields that are prefixes to the sort fields
+% set by the user. We considered adding in constant fields after sort fields
+% but were not 100% sure that it would not affect the sorting of the query.
+
+can_use_sort(_Cols, [], _Selector) ->
+    true;
+can_use_sort([], _SortFields, _Selector) ->
+    false;
+can_use_sort([Col | _] = Cols, [Col | _] = SortFields, _Selector) ->
+    lists:prefix(SortFields, Cols);
+can_use_sort([Col | RestCols], SortFields, Selector) ->
+    case mango_selector:is_constant_field(Selector, Col) of
+        true -> can_use_sort(RestCols, SortFields, Selector);
+        false -> false
+    end.
diff --git a/src/mango/src/mango_selector.erl b/src/mango/src/mango_selector.erl
index 968dc3c..fffadcd 100644
--- a/src/mango/src/mango_selector.erl
+++ b/src/mango/src/mango_selector.erl
@@ -16,7 +16,8 @@
 -export([
     normalize/1,
     match/2,
-    has_required_fields/2
+    has_required_fields/2,
+    is_constant_field/2
 ]).
 
 
@@ -638,11 +639,121 @@ has_required_fields_int([{[{Field, Cond}]} | Rest], RequiredFields) ->
     end.
 
 
+% Returns true if a field in the selector is a constant value e.g. {a: {$eq: 1}}
+is_constant_field({[]}, _Field) ->
+    false;
+
+is_constant_field(Selector, Field) when not is_list(Selector) ->
+    is_constant_field([Selector], Field);
+
+is_constant_field([], _Field) ->
+    false;
+
+is_constant_field([{[{<<"$and">>, Args}]}], Field) when is_list(Args) ->
+    lists:any(fun(Arg) -> is_constant_field(Arg, Field) end, Args);
+
+is_constant_field([{[{<<"$and">>, Args}]}], Field) ->
+    is_constant_field(Args, Field);
+
+is_constant_field([{[{Field, {[{Cond, _Val}]}}]} | _Rest], Field) ->
+    Cond =:= <<"$eq">>;
+
+is_constant_field([{[{_UnMatched, _}]} | Rest], Field) ->
+    is_constant_field(Rest, Field).
+
+
 %%%%%%%% module tests below %%%%%%%%
 
 -ifdef(TEST).
 -include_lib("eunit/include/eunit.hrl").
 
+is_constant_field_basic_test() ->
+    Selector = normalize({[{<<"A">>, <<"foo">>}]}),
+    Field = <<"A">>,
+    ?assertEqual(true, is_constant_field(Selector, Field)).
+
+is_constant_field_basic_two_test() ->
+    Selector = normalize({[{<<"$and">>,
+        [
+            {[{<<"cars">>,{[{<<"$eq">>,<<"2">>}]}}]},
+            {[{<<"age">>,{[{<<"$gt">>,10}]}}]}
+        ]
+    }]}),
+    Field = <<"cars">>,
+    ?assertEqual(true, is_constant_field(Selector, Field)).
+
+is_constant_field_not_eq_test() ->
+    Selector = normalize({[{<<"$and">>,
+        [
+            {[{<<"cars">>,{[{<<"$eq">>,<<"2">>}]}}]},
+            {[{<<"age">>,{[{<<"$gt">>,10}]}}]}
+        ]
+    }]}),
+    Field = <<"age">>,
+    ?assertEqual(false, is_constant_field(Selector, Field)).
+
+is_constant_field_missing_field_test() ->
+    Selector = normalize({[{<<"$and">>,
+        [
+            {[{<<"cars">>,{[{<<"$eq">>,<<"2">>}]}}]},
+            {[{<<"age">>,{[{<<"$gt">>,10}]}}]}
+        ]
+    }]}),
+    Field = <<"wrong">>,
+    ?assertEqual(false, is_constant_field(Selector, Field)).
+
+is_constant_field_or_field_test() ->
+    Selector = {[{<<"$or">>,
+          [
+              {[{<<"A">>, <<"foo">>}]},
+              {[{<<"B">>, <<"foo">>}]}
+          ]
+    }]},
+    Normalized = normalize(Selector),
+    Field = <<"A">>,
+    ?assertEqual(false, is_constant_field(Normalized, Field)).
+
+is_constant_field_empty_selector_test() ->
+    Selector = normalize({[]}),
+    Field = <<"wrong">>,
+    ?assertEqual(false, is_constant_field(Selector, Field)).
+
+is_constant_nested_and_test() ->
+    Selector1 = {[{<<"$and">>,
+          [
+              {[{<<"A">>, <<"foo">>}]}
+          ]
+    }]},
+    Selector2 = {[{<<"$and">>,
+          [
+              {[{<<"B">>, {[{<<"$gt">>,10}]}}]}
+          ]
+    }]},
+    Selector = {[{<<"$and">>,
+          [
+              Selector1,
+              Selector2
+          ]
+    }]},
+
+    Normalized = normalize(Selector),
+    ?assertEqual(true, is_constant_field(Normalized, <<"A">>)),
+    ?assertEqual(false, is_constant_field(Normalized, <<"B">>)).
+
+is_constant_combined_or_and_equals_test() ->
+    Selector = {[{<<"A">>, "foo"},
+          {<<"$or">>,
+              [
+                  {[{<<"B">>, <<"bar">>}]},
+                  {[{<<"B">>, <<"baz">>}]}
+              ]
+          },
+		  {<<"C">>, "qux"}
+	]},
+    Normalized = normalize(Selector),
+    ?assertEqual(true, is_constant_field(Normalized, <<"C">>)),
+    ?assertEqual(false, is_constant_field(Normalized, <<"B">>)).
+
 has_required_fields_basic_test() ->
     RequiredFields = [<<"A">>],
     Selector = {[{<<"A">>, <<"foo">>}]},
diff --git a/src/mango/test/02-basic-find-test.py b/src/mango/test/02-basic-find-test.py
index f7e151a..6a31d33 100644
--- a/src/mango/test/02-basic-find-test.py
+++ b/src/mango/test/02-basic-find-test.py
@@ -333,3 +333,10 @@ class BasicFindTests(mango.UserDocsTests):
         assert explain["mrargs"]["start_key"] == [0]
         assert explain["mrargs"]["end_key"] == ["<MAX>"]
         assert explain["mrargs"]["include_docs"] == True
+
+    def test_sort_with_all_docs(self):
+        explain = self.db.find({
+            "_id": {"$gt": 0},
+            "age": {"$gt": 0}
+        }, sort=["_id"], explain=True)
+        self.assertEquals(explain["index"]["type"], "special")
diff --git a/src/mango/test/18-json-sort.py b/src/mango/test/18-json-sort.py
new file mode 100644
index 0000000..f8d2abe
--- /dev/null
+++ b/src/mango/test/18-json-sort.py
@@ -0,0 +1,222 @@
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
+import mango
+import copy
+import unittest
+
+DOCS = [
+    {
+        "_id": "1",
+        "name": "Jimi",
+        "age": 10,
+        "cars": 1
+    },
+    {
+        "_id": "2",
+        "name": "Eddie",
+        "age": 20,
+        "cars": 1
+    },
+    {
+        "_id": "3",
+        "name": "Jane",
+        "age": 30,
+        "cars": 2
+    },
+    {
+        "_id": "4",
+        "name": "Mary",
+        "age": 40,
+        "cars": 2
+    },
+    {
+        "_id": "5",
+        "name": "Sam",
+        "age": 50,
+        "cars": 3
+    }
+]
+
+class JSONIndexSortOptimisations(mango.DbPerClass):
+    def setUp(self):
+        self.db.recreate()
+        self.db.save_docs(copy.deepcopy(DOCS))
+
+    def test_works_for_basic_case(self):
+        self.db.create_index(["cars", "age"], name="cars-age")
+        selector = {
+            "cars": "2",
+            "age": {
+                "$gt": 10
+            }
+        }
+        explain = self.db.find(selector, sort=["age"], explain=True)
+        self.assertEqual(explain["index"]["name"], "cars-age")
+        self.assertEqual(explain["mrargs"]["direction"], "fwd")
+
+    def test_works_for_all_fields_specified(self):
+        self.db.create_index(["cars", "age"], name="cars-age")
+        selector = {
+            "cars": "2",
+            "age": {
+                "$gt": 10
+            }
+        }
+        explain = self.db.find(selector, sort=["cars", "age"], explain=True)
+        self.assertEqual(explain["index"]["name"], "cars-age")
+
+    def test_works_for_no_sort_fields_specified(self):
+        self.db.create_index(["cars", "age"], name="cars-age")
+        selector = {
+            "cars": {
+                "$gt": 10
+            },
+            "age": {
+                "$gt": 10
+            }
+        }
+        explain = self.db.find(selector, explain=True)
+        self.assertEqual(explain["index"]["name"], "cars-age")
+
+    def test_works_for_opp_dir_sort(self):
+        self.db.create_index(["cars", "age"], name="cars-age")
+        selector = {
+            "cars": "2",
+            "age": {
+                "$gt": 10
+            }
+        }
+        explain = self.db.find(selector, sort=[{"age": "desc"}], explain=True)
+        self.assertEqual(explain["index"]["name"], "cars-age")
+        self.assertEqual(explain["mrargs"]["direction"], "rev")
+    
+    def test_not_work_for_non_constant_field(self):
+        self.db.create_index(["cars", "age"], name="cars-age")
+        selector = {
+            "cars": {
+                "$gt": 10
+            },
+            "age": {
+                "$gt": 10
+            }
+        }
+        try:
+            self.db.find(selector, explain=True, sort=["age"])
+            raise Exception("Should not get here")
+        except Exception as e:
+            resp = e.response.json()
+            self.assertEqual(resp["error"], "no_usable_index")
+
+    def test_three_index_one(self):
+        self.db.create_index(["cars", "age", "name"], name="cars-age-name")
+        selector = {
+            "cars": "2",
+            "age": 10,
+            "name": {
+                "$gt": "AA"
+            }
+        }
+        explain = self.db.find(selector, sort=["name"], explain=True)
+        self.assertEqual(explain["index"]["name"], "cars-age-name")
+
+    def test_three_index_two(self):
+        self.db.create_index(["cars", "age", "name"], name="cars-age-name")
+        selector = {
+            "cars": "2",
+            "name": "Eddie",
+            "age": {
+                "$gt": 10
+            }
+        }
+        explain = self.db.find(selector, sort=["age"], explain=True)
+        self.assertEqual(explain["index"]["name"], "cars-age-name")
+
+    def test_three_index_fails(self):
+        self.db.create_index(["cars", "age", "name"], name="cars-age-name")
+        selector = {
+            "name": "Eddie",
+            "age": {
+                "$gt": 1
+            },
+            "cars": {
+                "$gt": "1"
+            }
+        }
+        try:
+            self.db.find(selector, explain=True, sort=["name"])
+            raise Exception("Should not get here")
+        except Exception as e:
+            resp = e.response.json()
+            self.assertEqual(resp["error"], "no_usable_index")
+
+    def test_empty_sort(self):
+        self.db.create_index(["cars", "age", "name"], name="cars-age-name")
+        selector = {
+            "name": {
+                "$gt": "Eddie",
+            },
+            "age": 10,
+            "cars": {
+                "$gt": "1"
+            }
+        }
+        explain = self.db.find(selector, explain=True)
+        self.assertEqual(explain["index"]["name"], "cars-age-name")
+
+    def test_in_between(self):
+        self.db.create_index(["cars", "age", "name"], name="cars-age-name")
+        selector = {
+            "name": "Eddie",
+            "age": 10,
+            "cars": {
+                "$gt": "1"
+            }
+        }
+        explain = self.db.find(selector, explain=True)
+        self.assertEqual(explain["index"]["name"], "cars-age-name")
+
+        try:
+            self.db.find(selector, sort=["cars", "name"], explain=True)
+            raise Exception("Should not get here")
+        except Exception as e:
+            resp = e.response.json()
+            self.assertEqual(resp["error"], "no_usable_index")
+    
+    def test_ignore_after_set_sort_value(self):
+        self.db.create_index(["cars", "age", "name"], name="cars-age-name")
+        selector = {
+            "age": {
+                "$gt": 10
+            },
+            "cars": 2,
+            "name": {
+                "$gt": "A"
+            }
+        }
+        explain = self.db.find(selector, sort=["age"], explain=True)
+        self.assertEqual(explain["index"]["name"], "cars-age-name")
+
+    def test_not_use_index_if_other_fields_in_sort(self):
+        self.db.create_index(["cars", "age"], name="cars-age")
+        selector = {
+            "age": 10,
+            "cars": {
+                "$gt": "1"
+            }
+        }
+        try:
+            self.db.find(selector, sort=["cars", "name"], explain=True)
+            raise Exception("Should not get here")
+        except Exception as e:
+            resp = e.response.json()
+            self.assertEqual(resp["error"], "no_usable_index")