You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by ro...@apache.org on 2015/02/03 16:13:43 UTC

[36/50] [abbrv] couchdb-mango git commit: Move view specific logic to mango_view_idx

Move view specific logic to mango_view_idx

A lot of the logic in mango_selector is view specific so this moves it
to mango_idx_view to make that more apparent.

BugzId: 33294


Project: http://git-wip-us.apache.org/repos/asf/couchdb-mango/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-mango/commit/7a9de344
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-mango/tree/7a9de344
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-mango/diff/7a9de344

Branch: refs/heads/master
Commit: 7a9de3447652d31bb517a6975b6cb5c622854681
Parents: 08c3320
Author: Paul J. Davis <pa...@gmail.com>
Authored: Fri Jan 9 12:03:29 2015 -0600
Committer: Paul J. Davis <pa...@gmail.com>
Committed: Fri Jan 16 13:32:49 2015 -0600

----------------------------------------------------------------------
 src/mango_cursor.erl   |  19 +---
 src/mango_idx_view.erl | 272 +++++++++++++++++++++++++++++++++++++++++++-
 src/mango_selector.erl | 245 ---------------------------------------
 3 files changed, 273 insertions(+), 263 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-mango/blob/7a9de344/src/mango_cursor.erl
----------------------------------------------------------------------
diff --git a/src/mango_cursor.erl b/src/mango_cursor.erl
index 22970c7..8f780b7 100644
--- a/src/mango_cursor.erl
+++ b/src/mango_cursor.erl
@@ -29,8 +29,8 @@
 
 create(Db, Selector0, Opts) ->
     Selector = mango_selector:normalize(Selector0),
-    IndexFields = mango_selector:index_fields(Selector),    
-    FieldRanges = find_field_ranges(Selector, IndexFields),
+    IndexFields = mango_idx_view:indexable_fields(Selector),    
+    FieldRanges = mango_idx_view:field_ranges(Selector, IndexFields),
 
     if IndexFields /= [] -> ok; true ->
         ?MANGO_ERROR({no_usable_index, operator_unsupported})
@@ -123,21 +123,6 @@ limit_to_sort(ExistingIndexes, UsableIndexes, Sort) ->
     FinalIndexes.
 
 
-% For each field, return {Field, Range}
-find_field_ranges(Selector, Fields) ->
-    find_field_ranges(Selector, Fields, []).
-
-find_field_ranges(_Selector, [], Acc) ->
-    lists:reverse(Acc);
-find_field_ranges(Selector, [Field | Rest], Acc) ->
-    case mango_selector:range(Selector, Field) of
-        empty ->
-            [{Field, empty}];
-        Range ->
-            find_field_ranges(Selector, Rest, [{Field, Range} | Acc])
-    end.
-
-
 % Any of these indexes may be a composite index. For each
 % index find the most specific set of fields for each
 % index. Ie, if an index has columns a, b, c, d, then

http://git-wip-us.apache.org/repos/asf/couchdb-mango/blob/7a9de344/src/mango_idx_view.erl
----------------------------------------------------------------------
diff --git a/src/mango_idx_view.erl b/src/mango_idx_view.erl
index 993574f..eaa4341 100644
--- a/src/mango_idx_view.erl
+++ b/src/mango_idx_view.erl
@@ -21,7 +21,11 @@
     to_json/1,
     columns/1,
     start_key/1,
-    end_key/1
+    end_key/1,
+
+    indexable_fields/1,
+    field_ranges/1,
+    field_ranges/2
 ]).
 
 
@@ -171,3 +175,269 @@ make_view(Idx) ->
         {<<"options">>, {Idx#idx.opts}}
     ]},
     {Idx#idx.name, View}.
+
+
+% This function returns a list of indexes that
+% can be used to restrict this query. This works by
+% searching the selector looking for field names that
+% can be "seen".
+%
+% Operators that can be seen through are '$and' and any of
+% the logical comparisons ('$lt', '$eq', etc). Things like
+% '$regex', '$in', '$nin', and '$or' can't be serviced by
+% a single index scan so we disallow them. In the future
+% we may become more clever and increase our ken such that
+% we will be able to see through these with crafty indexes
+% or new uses for existing indexes. For instance, I could
+% see an '$or' between comparisons on the same field becoming
+% the equivalent of a multi-query. But that's for another
+% day.
+
+% We can see through '$and' trivially
+indexable_fields({[{<<"$and">>, Args}]}) ->
+    lists:usort(lists:flatten([indexable_fields(A) || A <- Args]));
+
+% So far we can't see through any other operator
+indexable_fields({[{<<"$", _/binary>>, _}]}) ->
+    [];
+
+% If we have a field with a terminator that is locatable
+% using an index then the field is a possible index
+indexable_fields({[{Field, Cond}]}) ->
+    case indexable(Cond) of
+        true ->
+            [Field];
+        false ->
+            []
+    end;
+
+% An empty selector
+indexable_fields({[]}) ->
+    [].
+
+
+% Check if a condition is indexable. The logical
+% comparisons are mostly straight forward. We
+% currently don't understand '$in' which is
+% theoretically supportable. '$nin' and '$ne'
+% aren't currently supported because they require
+% multiple index scans.
+indexable({[{<<"$lt">>, _}]}) ->
+    true;
+indexable({[{<<"$lte">>, _}]}) ->
+    true;
+indexable({[{<<"$eq">>, _}]}) ->
+    true;
+indexable({[{<<"$gt">>, _}]}) ->
+    true;
+indexable({[{<<"$gte">>, _}]}) ->
+    true;
+
+% All other operators are currently not indexable.
+% This is also a subtle assertion that we don't
+% call indexable/1 on a field name.
+indexable({[{<<"$", _/binary>>, _}]}) ->
+    false.
+
+
+% For each field, return {Field, Range}
+field_ranges(Selector) ->
+    Fields = indexable_fields(Selector),
+    field_ranges(Selector, Fields).
+
+
+field_ranges(Selector, Fields) ->
+    field_ranges(Selector, Fields, []).
+
+
+field_ranges(_Selector, [], Acc) ->
+    lists:reverse(Acc);
+field_ranges(Selector, [Field | Rest], Acc) ->
+    case range(Selector, Field) of
+        empty ->
+            [{Field, empty}];
+        Range ->
+            field_ranges(Selector, Rest, [{Field, Range} | Acc])
+    end.
+
+
+% Find the complete range for a given index in this
+% selector. This works by AND'ing logical comparisons
+% together so that we can define the start and end
+% keys for a given index.
+%
+% Selector must have been normalized before calling
+% this function.
+range(Selector, Index) ->
+    range(Selector, Index, '$gt', mango_json:min(), '$lt', mango_json:max()).
+
+
+% Adjust Low and High based on values found for the
+% givend Index in Selector.
+range({[{<<"$and">>, Args}]}, Index, LCmp, Low, HCmp, High) ->
+    lists:foldl(fun
+        (Arg, {LC, L, HC, H}) ->
+            range(Arg, Index, LC, L, HC, H);
+        (_Arg, empty) ->
+            empty
+    end, {LCmp, Low, HCmp, High}, Args);
+
+% We can currently only traverse '$and' operators
+range({[{<<"$", _/binary>>}]}, _Index, LCmp, Low, HCmp, High) ->
+    {LCmp, Low, HCmp, High};
+
+% If the field name matches the index see if we can narrow
+% the acceptable range.
+range({[{Index, Cond}]}, Index, LCmp, Low, HCmp, High) ->
+    range(Cond, LCmp, Low, HCmp, High);
+
+% Else we have a field unrelated to this index so just
+% return the current values.
+range(_, _, LCmp, Low, HCmp, High) ->
+    {LCmp, Low, HCmp, High}.
+
+
+% The comments below are a bit cryptic at first but they show
+% where the Arg cand land in the current range.
+%
+% For instance, given:
+%
+%     {$lt: N}
+%     Low = 1
+%     High = 5
+%
+% Depending on the value of N we can have one of five locations
+% in regards to a given Low/High pair:
+%
+%     min low mid high max
+%
+%   That is:
+%       min = (N < Low)
+%       low = (N == Low)
+%       mid = (Low < N < High)
+%       high = (N == High)
+%       max = (High < N)
+%
+% If N < 1, (min) then the effective range is empty.
+%
+% If N == 1, (low) then we have to set the range to empty because
+% N < 1 && N >= 1 is an empty set. If the operator had been '$lte'
+% and LCmp was '$gte' or '$eq' then we could keep around the equality
+% check on Arg by setting LCmp == HCmp = '$eq' and Low == High == Arg.
+%
+% If 1 < N < 5 (mid), then we set High to Arg and Arg has just
+% narrowed our range. HCmp is set the the '$lt' operator that was
+% part of the input.
+%
+% If N == 5 (high), We just set HCmp to '$lt' since its guaranteed
+% to be equally or more restrictive than the current possible values
+% of '$lt' or '$lte'.
+%
+% If N > 5 (max), nothing changes as our current range is already
+% more narrow than the current condition.
+%
+% Obviously all of that logic gets tweaked for the other logical
+% operators but its all straight forward once you figure out how
+% we're basically just narrowing our logical ranges.
+
+range({[{<<"$lt">>, Arg}]}, LCmp, Low, HCmp, High) ->
+    case range_pos(Low, Arg, High) of
+        min ->
+            empty;
+        low ->
+            empty;
+        mid ->
+            {LCmp, Low, '$lt', Arg};
+        high ->
+            {LCmp, Low, '$lt', Arg};
+        max ->
+            {LCmp, Low, HCmp, High}
+    end;
+
+range({[{<<"$lte">>, Arg}]}, LCmp, Low, HCmp, High) ->
+    case range_pos(Low, Arg, High) of
+        min ->
+            empty;
+        low when LCmp == '$gte'; LCmp == '$eq' ->
+            {'$eq', Arg, '$eq', Arg};
+        low ->
+            empty;
+        mid ->
+            {LCmp, Low, '$lte', Arg};
+        high ->
+            {LCmp, Low, HCmp, High};
+        max ->
+            {LCmp, Low, HCmp, High}
+    end;
+
+range({[{<<"$eq">>, Arg}]}, LCmp, Low, HCmp, High) ->
+    case range_pos(Low, Arg, High) of
+        min ->
+            empty;
+        low when LCmp == '$gte'; LCmp == '$eq' ->
+            {'$eq', Arg, '$eq', Arg};
+        low ->
+            empty;
+        mid ->
+            {'$eq', Arg, '$eq', Arg};
+        high when HCmp == '$lte'; HCmp == '$eq' ->
+            {'$eq', Arg, '$eq', Arg};
+        high ->
+            empty;
+        max ->
+            empty
+    end;
+
+range({[{<<"$gte">>, Arg}]}, LCmp, Low, HCmp, High) ->
+    case range_pos(Low, Arg, High) of
+        min ->
+            {LCmp, Low, HCmp, High};
+        low ->
+            {LCmp, Low, HCmp, High};
+        mid ->
+            {'$gte', Arg, HCmp, High};
+        high when HCmp == '$lte'; HCmp == '$eq' ->
+            {'$eq', Arg, '$eq', Arg};
+        high ->
+            empty;
+        max ->
+            empty
+    end;
+
+range({[{<<"$gt">>, Arg}]}, LCmp, Low, HCmp, High) ->
+    case range_pos(Low, Arg, High) of
+        min ->
+            {LCmp, Low, HCmp, High};
+        low ->
+            {'$gt', Arg, HCmp, High};
+        mid ->
+            {'$gt', Arg, HCmp, High};
+        high ->
+            empty;
+        max ->
+            empty
+    end;
+
+% There's some other un-indexable restriction on the index
+% that will be applied as a post-filter. Ignore it and
+% carry on our merry way.
+range({[{<<"$", _/binary>>, _}]}, LCmp, Low, HCmp, High) ->
+    {LCmp, Low, HCmp, High}.
+
+
+% Returns the value min | low | mid | high | max depending
+% on how Arg compares to Low and High.
+range_pos(Low, Arg, High) ->
+    case mango_json:cmp(Arg, Low) of
+        N when N < 0 -> min;
+        N when N == 0 -> low;
+        _ ->
+            case mango_json:cmp(Arg, High) of
+                X when X < 0 ->
+                    mid;
+                X when X == 0 ->
+                    high;
+                _ ->
+                    max
+            end
+    end.

http://git-wip-us.apache.org/repos/asf/couchdb-mango/blob/7a9de344/src/mango_selector.erl
----------------------------------------------------------------------
diff --git a/src/mango_selector.erl b/src/mango_selector.erl
index d1c9898..dd16bf5 100644
--- a/src/mango_selector.erl
+++ b/src/mango_selector.erl
@@ -15,8 +15,6 @@
 
 -export([
     normalize/1,
-    index_fields/1,
-    range/2,
     match/2
 ]).
 
@@ -48,54 +46,6 @@ normalize(Selector) ->
     end,
     {NProps}.
 
-% This function returns a list of indexes that
-% can be used to restrict this query. This works by
-% searching the selector looking for field names that
-% can be "seen".
-%
-% Operators that can be seen through are '$and' and any of
-% the logical comparisons ('$lt', '$eq', etc). Things like
-% '$regex', '$in', '$nin', and '$or' can't be serviced by
-% a single index scan so we disallow them. In the future
-% we may become more clever and increase our ken such that
-% we will be able to see through these with crafty indexes
-% or new uses for existing indexes. For instance, I could
-% see an '$or' between comparisons on the same field becoming
-% the equivalent of a multi-query. But that's for another
-% day.
-
-% We can see through '$and' trivially
-index_fields({[{<<"$and">>, Args}]}) ->
-    lists:usort(lists:flatten([index_fields(A) || A <- Args]));
-
-% So far we can't see through any other operator
-index_fields({[{<<"$", _/binary>>, _}]}) ->
-    [];
-
-% If we have a field with a terminator that is locatable
-% using an index then the field is a possible index
-index_fields({[{Field, Cond}]}) ->
-    case indexable(Cond) of
-        true ->
-            [Field];
-        false ->
-            []
-    end;
-
-% An empty selector
-index_fields({[]}) ->
-    [].
-
-% Find the complete range for a given index in this
-% selector. This works by AND'ing logical comparisons
-% together so that we can define the start and end
-% keys for a given index.
-%
-% Selector must have been normalized before calling
-% this function.
-range(Selector, Index) ->
-    range(Selector, Index, '$gt', mango_json:min(), '$lt', mango_json:max()).
-
 
 % Match a selector against a #doc{} or EJSON value.
 % This assumes that the Selector has been normalized.
@@ -407,201 +357,6 @@ negate({[{Field, Cond}]}) ->
     {[{Field, negate(Cond)}]}.
 
 
-% Check if a condition is indexable. The logical
-% comparisons are mostly straight forward. We
-% currently don't understand '$in' which is
-% theoretically supportable. '$nin' and '$ne'
-% aren't currently supported because they require
-% multiple index scans.
-indexable({[{<<"$lt">>, _}]}) ->
-    true;
-indexable({[{<<"$lte">>, _}]}) ->
-    true;
-indexable({[{<<"$eq">>, _}]}) ->
-    true;
-indexable({[{<<"$gt">>, _}]}) ->
-    true;
-indexable({[{<<"$gte">>, _}]}) ->
-    true;
-
-% All other operators are currently not indexable.
-% This is also a subtle assertion that we don't
-% call indexable/1 on a field name.
-indexable({[{<<"$", _/binary>>, _}]}) ->
-    false.
-
-
-% Adjust Low and High based on values found for the
-% givend Index in Selector.
-range({[{<<"$and">>, Args}]}, Index, LCmp, Low, HCmp, High) ->
-    lists:foldl(fun
-        (Arg, {LC, L, HC, H}) ->
-            range(Arg, Index, LC, L, HC, H);
-        (_Arg, empty) ->
-            empty
-    end, {LCmp, Low, HCmp, High}, Args);
-
-% We can currently only traverse '$and' operators
-range({[{<<"$", _/binary>>}]}, _Index, LCmp, Low, HCmp, High) ->
-    {LCmp, Low, HCmp, High};
-
-% If the field name matches the index see if we can narrow
-% the acceptable range.
-range({[{Index, Cond}]}, Index, LCmp, Low, HCmp, High) ->
-    range(Cond, LCmp, Low, HCmp, High);
-
-% Else we have a field unrelated to this index so just
-% return the current values.
-range(_, _, LCmp, Low, HCmp, High) ->
-    {LCmp, Low, HCmp, High}.
-
-
-% The comments below are a bit cryptic at first but they show
-% where the Arg cand land in the current range.
-%
-% For instance, given:
-%
-%     {$lt: N}
-%     Low = 1
-%     High = 5
-%
-% Depending on the value of N we can have one of five locations
-% in regards to a given Low/High pair:
-%
-%     min low mid high max
-%
-%   That is:
-%       min = (N < Low)
-%       low = (N == Low)
-%       mid = (Low < N < High)
-%       high = (N == High)
-%       max = (High < N)
-%
-% If N < 1, (min) then the effective range is empty.
-%
-% If N == 1, (low) then we have to set the range to empty because
-% N < 1 && N >= 1 is an empty set. If the operator had been '$lte'
-% and LCmp was '$gte' or '$eq' then we could keep around the equality
-% check on Arg by setting LCmp == HCmp = '$eq' and Low == High == Arg.
-%
-% If 1 < N < 5 (mid), then we set High to Arg and Arg has just
-% narrowed our range. HCmp is set the the '$lt' operator that was
-% part of the input.
-%
-% If N == 5 (high), We just set HCmp to '$lt' since its guaranteed
-% to be equally or more restrictive than the current possible values
-% of '$lt' or '$lte'.
-%
-% If N > 5 (max), nothing changes as our current range is already
-% more narrow than the current condition.
-%
-% Obviously all of that logic gets tweaked for the other logical
-% operators but its all straight forward once you figure out how
-% we're basically just narrowing our logical ranges.
-
-range({[{<<"$lt">>, Arg}]}, LCmp, Low, HCmp, High) ->
-    case range_pos(Low, Arg, High) of
-        min ->
-            empty;
-        low ->
-            empty;
-        mid ->
-            {LCmp, Low, '$lt', Arg};
-        high ->
-            {LCmp, Low, '$lt', Arg};
-        max ->
-            {LCmp, Low, HCmp, High}
-    end;
-
-range({[{<<"$lte">>, Arg}]}, LCmp, Low, HCmp, High) ->
-    case range_pos(Low, Arg, High) of
-        min ->
-            empty;
-        low when LCmp == '$gte'; LCmp == '$eq' ->
-            {'$eq', Arg, '$eq', Arg};
-        low ->
-            empty;
-        mid ->
-            {LCmp, Low, '$lte', Arg};
-        high ->
-            {LCmp, Low, HCmp, High};
-        max ->
-            {LCmp, Low, HCmp, High}
-    end;
-
-range({[{<<"$eq">>, Arg}]}, LCmp, Low, HCmp, High) ->
-    case range_pos(Low, Arg, High) of
-        min ->
-            empty;
-        low when LCmp == '$gte'; LCmp == '$eq' ->
-            {'$eq', Arg, '$eq', Arg};
-        low ->
-            empty;
-        mid ->
-            {'$eq', Arg, '$eq', Arg};
-        high when HCmp == '$lte'; HCmp == '$eq' ->
-            {'$eq', Arg, '$eq', Arg};
-        high ->
-            empty;
-        max ->
-            empty
-    end;
-
-range({[{<<"$gte">>, Arg}]}, LCmp, Low, HCmp, High) ->
-    case range_pos(Low, Arg, High) of
-        min ->
-            {LCmp, Low, HCmp, High};
-        low ->
-            {LCmp, Low, HCmp, High};
-        mid ->
-            {'$gte', Arg, HCmp, High};
-        high when HCmp == '$lte'; HCmp == '$eq' ->
-            {'$eq', Arg, '$eq', Arg};
-        high ->
-            empty;
-        max ->
-            empty
-    end;
-
-range({[{<<"$gt">>, Arg}]}, LCmp, Low, HCmp, High) ->
-    case range_pos(Low, Arg, High) of
-        min ->
-            {LCmp, Low, HCmp, High};
-        low ->
-            {'$gt', Arg, HCmp, High};
-        mid ->
-            {'$gt', Arg, HCmp, High};
-        high ->
-            empty;
-        max ->
-            empty
-    end;
-
-% There's some other un-indexable restriction on the index
-% that will be applied as a post-filter. Ignore it and
-% carry on our merry way.
-range({[{<<"$", _/binary>>, _}]}, LCmp, Low, HCmp, High) ->
-    {LCmp, Low, HCmp, High}.
-
-
-% Returns the value min | low | mid | high | max depending
-% on how Arg compares to Low and High.
-range_pos(Low, Arg, High) ->
-    case mango_json:cmp(Arg, Low) of
-        N when N < 0 -> min;
-        N when N == 0 -> low;
-        _ ->
-            case mango_json:cmp(Arg, High) of
-                X when X < 0 ->
-                    mid;
-                X when X == 0 ->
-                    high;
-                _ ->
-                    max
-            end
-    end.
-
-
 match({[{<<"$and">>, Args}]}, Value, Cmp) ->
     Pred = fun(SubSel) -> match(SubSel, Value, Cmp) end,
     lists:all(Pred, Args);