You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by ro...@apache.org on 2015/02/03 16:13:43 UTC
[36/50] [abbrv] couchdb-mango git commit: Move view specific logic to
mango_view_idx
Move view specific logic to mango_view_idx
A lot of the logic in mango_selector is view specific so this moves it
to mango_idx_view to make that more apparent.
BugzId: 33294
Project: http://git-wip-us.apache.org/repos/asf/couchdb-mango/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-mango/commit/7a9de344
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-mango/tree/7a9de344
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-mango/diff/7a9de344
Branch: refs/heads/master
Commit: 7a9de3447652d31bb517a6975b6cb5c622854681
Parents: 08c3320
Author: Paul J. Davis <pa...@gmail.com>
Authored: Fri Jan 9 12:03:29 2015 -0600
Committer: Paul J. Davis <pa...@gmail.com>
Committed: Fri Jan 16 13:32:49 2015 -0600
----------------------------------------------------------------------
src/mango_cursor.erl | 19 +---
src/mango_idx_view.erl | 272 +++++++++++++++++++++++++++++++++++++++++++-
src/mango_selector.erl | 245 ---------------------------------------
3 files changed, 273 insertions(+), 263 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/couchdb-mango/blob/7a9de344/src/mango_cursor.erl
----------------------------------------------------------------------
diff --git a/src/mango_cursor.erl b/src/mango_cursor.erl
index 22970c7..8f780b7 100644
--- a/src/mango_cursor.erl
+++ b/src/mango_cursor.erl
@@ -29,8 +29,8 @@
create(Db, Selector0, Opts) ->
Selector = mango_selector:normalize(Selector0),
- IndexFields = mango_selector:index_fields(Selector),
- FieldRanges = find_field_ranges(Selector, IndexFields),
+ IndexFields = mango_idx_view:indexable_fields(Selector),
+ FieldRanges = mango_idx_view:field_ranges(Selector, IndexFields),
if IndexFields /= [] -> ok; true ->
?MANGO_ERROR({no_usable_index, operator_unsupported})
@@ -123,21 +123,6 @@ limit_to_sort(ExistingIndexes, UsableIndexes, Sort) ->
FinalIndexes.
-% For each field, return {Field, Range}
-find_field_ranges(Selector, Fields) ->
- find_field_ranges(Selector, Fields, []).
-
-find_field_ranges(_Selector, [], Acc) ->
- lists:reverse(Acc);
-find_field_ranges(Selector, [Field | Rest], Acc) ->
- case mango_selector:range(Selector, Field) of
- empty ->
- [{Field, empty}];
- Range ->
- find_field_ranges(Selector, Rest, [{Field, Range} | Acc])
- end.
-
-
% Any of these indexes may be a composite index. For each
% index find the most specific set of fields for each
% index. Ie, if an index has columns a, b, c, d, then
http://git-wip-us.apache.org/repos/asf/couchdb-mango/blob/7a9de344/src/mango_idx_view.erl
----------------------------------------------------------------------
diff --git a/src/mango_idx_view.erl b/src/mango_idx_view.erl
index 993574f..eaa4341 100644
--- a/src/mango_idx_view.erl
+++ b/src/mango_idx_view.erl
@@ -21,7 +21,11 @@
to_json/1,
columns/1,
start_key/1,
- end_key/1
+ end_key/1,
+
+ indexable_fields/1,
+ field_ranges/1,
+ field_ranges/2
]).
@@ -171,3 +175,269 @@ make_view(Idx) ->
{<<"options">>, {Idx#idx.opts}}
]},
{Idx#idx.name, View}.
+
+
+% This function returns a list of indexes that
+% can be used to restrict this query. This works by
+% searching the selector looking for field names that
+% can be "seen".
+%
+% Operators that can be seen through are '$and' and any of
+% the logical comparisons ('$lt', '$eq', etc). Things like
+% '$regex', '$in', '$nin', and '$or' can't be serviced by
+% a single index scan so we disallow them. In the future
+% we may become more clever and increase our ken such that
+% we will be able to see through these with crafty indexes
+% or new uses for existing indexes. For instance, I could
+% see an '$or' between comparisons on the same field becoming
+% the equivalent of a multi-query. But that's for another
+% day.
+
+% We can see through '$and' trivially
+indexable_fields({[{<<"$and">>, Args}]}) ->
+ lists:usort(lists:flatten([indexable_fields(A) || A <- Args]));
+
+% So far we can't see through any other operator
+indexable_fields({[{<<"$", _/binary>>, _}]}) ->
+ [];
+
+% If we have a field with a terminator that is locatable
+% using an index then the field is a possible index
+indexable_fields({[{Field, Cond}]}) ->
+ case indexable(Cond) of
+ true ->
+ [Field];
+ false ->
+ []
+ end;
+
+% An empty selector
+indexable_fields({[]}) ->
+ [].
+
+
+% Check if a condition is indexable. The logical
+% comparisons are mostly straight forward. We
+% currently don't understand '$in' which is
+% theoretically supportable. '$nin' and '$ne'
+% aren't currently supported because they require
+% multiple index scans.
+indexable({[{<<"$lt">>, _}]}) ->
+ true;
+indexable({[{<<"$lte">>, _}]}) ->
+ true;
+indexable({[{<<"$eq">>, _}]}) ->
+ true;
+indexable({[{<<"$gt">>, _}]}) ->
+ true;
+indexable({[{<<"$gte">>, _}]}) ->
+ true;
+
+% All other operators are currently not indexable.
+% This is also a subtle assertion that we don't
+% call indexable/1 on a field name.
+indexable({[{<<"$", _/binary>>, _}]}) ->
+ false.
+
+
+% For each field, return {Field, Range}
+field_ranges(Selector) ->
+ Fields = indexable_fields(Selector),
+ field_ranges(Selector, Fields).
+
+
+field_ranges(Selector, Fields) ->
+ field_ranges(Selector, Fields, []).
+
+
+field_ranges(_Selector, [], Acc) ->
+ lists:reverse(Acc);
+field_ranges(Selector, [Field | Rest], Acc) ->
+ case range(Selector, Field) of
+ empty ->
+ [{Field, empty}];
+ Range ->
+ field_ranges(Selector, Rest, [{Field, Range} | Acc])
+ end.
+
+
+% Find the complete range for a given index in this
+% selector. This works by AND'ing logical comparisons
+% together so that we can define the start and end
+% keys for a given index.
+%
+% Selector must have been normalized before calling
+% this function.
+range(Selector, Index) ->
+ range(Selector, Index, '$gt', mango_json:min(), '$lt', mango_json:max()).
+
+
+% Adjust Low and High based on values found for the
+% givend Index in Selector.
+range({[{<<"$and">>, Args}]}, Index, LCmp, Low, HCmp, High) ->
+ lists:foldl(fun
+ (Arg, {LC, L, HC, H}) ->
+ range(Arg, Index, LC, L, HC, H);
+ (_Arg, empty) ->
+ empty
+ end, {LCmp, Low, HCmp, High}, Args);
+
+% We can currently only traverse '$and' operators
+range({[{<<"$", _/binary>>}]}, _Index, LCmp, Low, HCmp, High) ->
+ {LCmp, Low, HCmp, High};
+
+% If the field name matches the index see if we can narrow
+% the acceptable range.
+range({[{Index, Cond}]}, Index, LCmp, Low, HCmp, High) ->
+ range(Cond, LCmp, Low, HCmp, High);
+
+% Else we have a field unrelated to this index so just
+% return the current values.
+range(_, _, LCmp, Low, HCmp, High) ->
+ {LCmp, Low, HCmp, High}.
+
+
+% The comments below are a bit cryptic at first but they show
+% where the Arg cand land in the current range.
+%
+% For instance, given:
+%
+% {$lt: N}
+% Low = 1
+% High = 5
+%
+% Depending on the value of N we can have one of five locations
+% in regards to a given Low/High pair:
+%
+% min low mid high max
+%
+% That is:
+% min = (N < Low)
+% low = (N == Low)
+% mid = (Low < N < High)
+% high = (N == High)
+% max = (High < N)
+%
+% If N < 1, (min) then the effective range is empty.
+%
+% If N == 1, (low) then we have to set the range to empty because
+% N < 1 && N >= 1 is an empty set. If the operator had been '$lte'
+% and LCmp was '$gte' or '$eq' then we could keep around the equality
+% check on Arg by setting LCmp == HCmp = '$eq' and Low == High == Arg.
+%
+% If 1 < N < 5 (mid), then we set High to Arg and Arg has just
+% narrowed our range. HCmp is set the the '$lt' operator that was
+% part of the input.
+%
+% If N == 5 (high), We just set HCmp to '$lt' since its guaranteed
+% to be equally or more restrictive than the current possible values
+% of '$lt' or '$lte'.
+%
+% If N > 5 (max), nothing changes as our current range is already
+% more narrow than the current condition.
+%
+% Obviously all of that logic gets tweaked for the other logical
+% operators but its all straight forward once you figure out how
+% we're basically just narrowing our logical ranges.
+
+range({[{<<"$lt">>, Arg}]}, LCmp, Low, HCmp, High) ->
+ case range_pos(Low, Arg, High) of
+ min ->
+ empty;
+ low ->
+ empty;
+ mid ->
+ {LCmp, Low, '$lt', Arg};
+ high ->
+ {LCmp, Low, '$lt', Arg};
+ max ->
+ {LCmp, Low, HCmp, High}
+ end;
+
+range({[{<<"$lte">>, Arg}]}, LCmp, Low, HCmp, High) ->
+ case range_pos(Low, Arg, High) of
+ min ->
+ empty;
+ low when LCmp == '$gte'; LCmp == '$eq' ->
+ {'$eq', Arg, '$eq', Arg};
+ low ->
+ empty;
+ mid ->
+ {LCmp, Low, '$lte', Arg};
+ high ->
+ {LCmp, Low, HCmp, High};
+ max ->
+ {LCmp, Low, HCmp, High}
+ end;
+
+range({[{<<"$eq">>, Arg}]}, LCmp, Low, HCmp, High) ->
+ case range_pos(Low, Arg, High) of
+ min ->
+ empty;
+ low when LCmp == '$gte'; LCmp == '$eq' ->
+ {'$eq', Arg, '$eq', Arg};
+ low ->
+ empty;
+ mid ->
+ {'$eq', Arg, '$eq', Arg};
+ high when HCmp == '$lte'; HCmp == '$eq' ->
+ {'$eq', Arg, '$eq', Arg};
+ high ->
+ empty;
+ max ->
+ empty
+ end;
+
+range({[{<<"$gte">>, Arg}]}, LCmp, Low, HCmp, High) ->
+ case range_pos(Low, Arg, High) of
+ min ->
+ {LCmp, Low, HCmp, High};
+ low ->
+ {LCmp, Low, HCmp, High};
+ mid ->
+ {'$gte', Arg, HCmp, High};
+ high when HCmp == '$lte'; HCmp == '$eq' ->
+ {'$eq', Arg, '$eq', Arg};
+ high ->
+ empty;
+ max ->
+ empty
+ end;
+
+range({[{<<"$gt">>, Arg}]}, LCmp, Low, HCmp, High) ->
+ case range_pos(Low, Arg, High) of
+ min ->
+ {LCmp, Low, HCmp, High};
+ low ->
+ {'$gt', Arg, HCmp, High};
+ mid ->
+ {'$gt', Arg, HCmp, High};
+ high ->
+ empty;
+ max ->
+ empty
+ end;
+
+% There's some other un-indexable restriction on the index
+% that will be applied as a post-filter. Ignore it and
+% carry on our merry way.
+range({[{<<"$", _/binary>>, _}]}, LCmp, Low, HCmp, High) ->
+ {LCmp, Low, HCmp, High}.
+
+
+% Returns the value min | low | mid | high | max depending
+% on how Arg compares to Low and High.
+range_pos(Low, Arg, High) ->
+ case mango_json:cmp(Arg, Low) of
+ N when N < 0 -> min;
+ N when N == 0 -> low;
+ _ ->
+ case mango_json:cmp(Arg, High) of
+ X when X < 0 ->
+ mid;
+ X when X == 0 ->
+ high;
+ _ ->
+ max
+ end
+ end.
http://git-wip-us.apache.org/repos/asf/couchdb-mango/blob/7a9de344/src/mango_selector.erl
----------------------------------------------------------------------
diff --git a/src/mango_selector.erl b/src/mango_selector.erl
index d1c9898..dd16bf5 100644
--- a/src/mango_selector.erl
+++ b/src/mango_selector.erl
@@ -15,8 +15,6 @@
-export([
normalize/1,
- index_fields/1,
- range/2,
match/2
]).
@@ -48,54 +46,6 @@ normalize(Selector) ->
end,
{NProps}.
-% This function returns a list of indexes that
-% can be used to restrict this query. This works by
-% searching the selector looking for field names that
-% can be "seen".
-%
-% Operators that can be seen through are '$and' and any of
-% the logical comparisons ('$lt', '$eq', etc). Things like
-% '$regex', '$in', '$nin', and '$or' can't be serviced by
-% a single index scan so we disallow them. In the future
-% we may become more clever and increase our ken such that
-% we will be able to see through these with crafty indexes
-% or new uses for existing indexes. For instance, I could
-% see an '$or' between comparisons on the same field becoming
-% the equivalent of a multi-query. But that's for another
-% day.
-
-% We can see through '$and' trivially
-index_fields({[{<<"$and">>, Args}]}) ->
- lists:usort(lists:flatten([index_fields(A) || A <- Args]));
-
-% So far we can't see through any other operator
-index_fields({[{<<"$", _/binary>>, _}]}) ->
- [];
-
-% If we have a field with a terminator that is locatable
-% using an index then the field is a possible index
-index_fields({[{Field, Cond}]}) ->
- case indexable(Cond) of
- true ->
- [Field];
- false ->
- []
- end;
-
-% An empty selector
-index_fields({[]}) ->
- [].
-
-% Find the complete range for a given index in this
-% selector. This works by AND'ing logical comparisons
-% together so that we can define the start and end
-% keys for a given index.
-%
-% Selector must have been normalized before calling
-% this function.
-range(Selector, Index) ->
- range(Selector, Index, '$gt', mango_json:min(), '$lt', mango_json:max()).
-
% Match a selector against a #doc{} or EJSON value.
% This assumes that the Selector has been normalized.
@@ -407,201 +357,6 @@ negate({[{Field, Cond}]}) ->
{[{Field, negate(Cond)}]}.
-% Check if a condition is indexable. The logical
-% comparisons are mostly straight forward. We
-% currently don't understand '$in' which is
-% theoretically supportable. '$nin' and '$ne'
-% aren't currently supported because they require
-% multiple index scans.
-indexable({[{<<"$lt">>, _}]}) ->
- true;
-indexable({[{<<"$lte">>, _}]}) ->
- true;
-indexable({[{<<"$eq">>, _}]}) ->
- true;
-indexable({[{<<"$gt">>, _}]}) ->
- true;
-indexable({[{<<"$gte">>, _}]}) ->
- true;
-
-% All other operators are currently not indexable.
-% This is also a subtle assertion that we don't
-% call indexable/1 on a field name.
-indexable({[{<<"$", _/binary>>, _}]}) ->
- false.
-
-
-% Adjust Low and High based on values found for the
-% givend Index in Selector.
-range({[{<<"$and">>, Args}]}, Index, LCmp, Low, HCmp, High) ->
- lists:foldl(fun
- (Arg, {LC, L, HC, H}) ->
- range(Arg, Index, LC, L, HC, H);
- (_Arg, empty) ->
- empty
- end, {LCmp, Low, HCmp, High}, Args);
-
-% We can currently only traverse '$and' operators
-range({[{<<"$", _/binary>>}]}, _Index, LCmp, Low, HCmp, High) ->
- {LCmp, Low, HCmp, High};
-
-% If the field name matches the index see if we can narrow
-% the acceptable range.
-range({[{Index, Cond}]}, Index, LCmp, Low, HCmp, High) ->
- range(Cond, LCmp, Low, HCmp, High);
-
-% Else we have a field unrelated to this index so just
-% return the current values.
-range(_, _, LCmp, Low, HCmp, High) ->
- {LCmp, Low, HCmp, High}.
-
-
-% The comments below are a bit cryptic at first but they show
-% where the Arg cand land in the current range.
-%
-% For instance, given:
-%
-% {$lt: N}
-% Low = 1
-% High = 5
-%
-% Depending on the value of N we can have one of five locations
-% in regards to a given Low/High pair:
-%
-% min low mid high max
-%
-% That is:
-% min = (N < Low)
-% low = (N == Low)
-% mid = (Low < N < High)
-% high = (N == High)
-% max = (High < N)
-%
-% If N < 1, (min) then the effective range is empty.
-%
-% If N == 1, (low) then we have to set the range to empty because
-% N < 1 && N >= 1 is an empty set. If the operator had been '$lte'
-% and LCmp was '$gte' or '$eq' then we could keep around the equality
-% check on Arg by setting LCmp == HCmp = '$eq' and Low == High == Arg.
-%
-% If 1 < N < 5 (mid), then we set High to Arg and Arg has just
-% narrowed our range. HCmp is set the the '$lt' operator that was
-% part of the input.
-%
-% If N == 5 (high), We just set HCmp to '$lt' since its guaranteed
-% to be equally or more restrictive than the current possible values
-% of '$lt' or '$lte'.
-%
-% If N > 5 (max), nothing changes as our current range is already
-% more narrow than the current condition.
-%
-% Obviously all of that logic gets tweaked for the other logical
-% operators but its all straight forward once you figure out how
-% we're basically just narrowing our logical ranges.
-
-range({[{<<"$lt">>, Arg}]}, LCmp, Low, HCmp, High) ->
- case range_pos(Low, Arg, High) of
- min ->
- empty;
- low ->
- empty;
- mid ->
- {LCmp, Low, '$lt', Arg};
- high ->
- {LCmp, Low, '$lt', Arg};
- max ->
- {LCmp, Low, HCmp, High}
- end;
-
-range({[{<<"$lte">>, Arg}]}, LCmp, Low, HCmp, High) ->
- case range_pos(Low, Arg, High) of
- min ->
- empty;
- low when LCmp == '$gte'; LCmp == '$eq' ->
- {'$eq', Arg, '$eq', Arg};
- low ->
- empty;
- mid ->
- {LCmp, Low, '$lte', Arg};
- high ->
- {LCmp, Low, HCmp, High};
- max ->
- {LCmp, Low, HCmp, High}
- end;
-
-range({[{<<"$eq">>, Arg}]}, LCmp, Low, HCmp, High) ->
- case range_pos(Low, Arg, High) of
- min ->
- empty;
- low when LCmp == '$gte'; LCmp == '$eq' ->
- {'$eq', Arg, '$eq', Arg};
- low ->
- empty;
- mid ->
- {'$eq', Arg, '$eq', Arg};
- high when HCmp == '$lte'; HCmp == '$eq' ->
- {'$eq', Arg, '$eq', Arg};
- high ->
- empty;
- max ->
- empty
- end;
-
-range({[{<<"$gte">>, Arg}]}, LCmp, Low, HCmp, High) ->
- case range_pos(Low, Arg, High) of
- min ->
- {LCmp, Low, HCmp, High};
- low ->
- {LCmp, Low, HCmp, High};
- mid ->
- {'$gte', Arg, HCmp, High};
- high when HCmp == '$lte'; HCmp == '$eq' ->
- {'$eq', Arg, '$eq', Arg};
- high ->
- empty;
- max ->
- empty
- end;
-
-range({[{<<"$gt">>, Arg}]}, LCmp, Low, HCmp, High) ->
- case range_pos(Low, Arg, High) of
- min ->
- {LCmp, Low, HCmp, High};
- low ->
- {'$gt', Arg, HCmp, High};
- mid ->
- {'$gt', Arg, HCmp, High};
- high ->
- empty;
- max ->
- empty
- end;
-
-% There's some other un-indexable restriction on the index
-% that will be applied as a post-filter. Ignore it and
-% carry on our merry way.
-range({[{<<"$", _/binary>>, _}]}, LCmp, Low, HCmp, High) ->
- {LCmp, Low, HCmp, High}.
-
-
-% Returns the value min | low | mid | high | max depending
-% on how Arg compares to Low and High.
-range_pos(Low, Arg, High) ->
- case mango_json:cmp(Arg, Low) of
- N when N < 0 -> min;
- N when N == 0 -> low;
- _ ->
- case mango_json:cmp(Arg, High) of
- X when X < 0 ->
- mid;
- X when X == 0 ->
- high;
- _ ->
- max
- end
- end.
-
-
match({[{<<"$and">>, Args}]}, Value, Cmp) ->
Pred = fun(SubSel) -> match(SubSel, Value, Cmp) end,
lists:all(Pred, Args);