You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by to...@apache.org on 2015/09/10 02:34:15 UTC

[13/26] couchdb-mango git commit: Replace element position with brackets

Replace element position with brackets

To be consistent with view based indexes, we allow the user
to access array elements via position. We convert that position
to [] for the underlying indexed field. For example, myarray.0.1,
would be converted to myarray.[].[]. This converted field will then
be used for the text search. The results will then be filtered by
the match function.

Bugzid: 46268


Project: http://git-wip-us.apache.org/repos/asf/couchdb-mango/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-mango/commit/460fcd47
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-mango/tree/460fcd47
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-mango/diff/460fcd47

Branch: refs/heads/master
Commit: 460fcd4723a70ba24aee2b0254ba02602260431a
Parents: f481b56
Author: Tony Sun <to...@cloudant.com>
Authored: Wed Aug 26 01:30:27 2015 -0700
Committer: Tony Sun <to...@cloudant.com>
Committed: Wed Aug 26 01:30:27 2015 -0700

----------------------------------------------------------------------
 src/mango_doc.erl           | 47 +++-------------------------------
 src/mango_fields.erl        |  2 +-
 src/mango_selector_text.erl | 54 +++++++++++++++++++++++++++++++++++-----
 src/mango_util.erl          | 47 ++++++++++++++++++++++++++++++++--
 test/06-basic-text-test.py  | 22 ++++++++++++++++
 test/user_docs.py           |  4 ++-
 6 files changed, 122 insertions(+), 54 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-mango/blob/460fcd47/src/mango_doc.erl
----------------------------------------------------------------------
diff --git a/src/mango_doc.erl b/src/mango_doc.erl
index 479a8ad..c22b155 100644
--- a/src/mango_doc.erl
+++ b/src/mango_doc.erl
@@ -22,7 +22,6 @@
 
     get_field/2,
     get_field/3,
-    parse_field/1,
     rem_field/2,
     set_field/3
 ]).
@@ -373,7 +372,7 @@ get_field(Props, Field) ->
 
 
 get_field(Props, Field, Validator) when is_binary(Field) ->
-    {ok, Path} = parse_field(Field),
+    {ok, Path} = mango_util:parse_field(Field),
     get_field(Props, Path, Validator);
 get_field(Props, [], no_validation) ->
     Props;
@@ -411,7 +410,7 @@ get_field(_, [_|_], _) ->
 
 
 rem_field(Props, Field) when is_binary(Field) ->
-    {ok, Path} = parse_field(Field),
+    {ok, Path} = mango_util:parse_field(Field),
     rem_field(Props, Path);
 rem_field({Props}, [Name]) ->
     case lists:keytake(Name, 1, Props) of
@@ -472,7 +471,7 @@ rem_field(_, [_|_]) ->
 
 
 set_field(Props, Field, Value) when is_binary(Field) ->
-    {ok, Path} = parse_field(Field),
+    {ok, Path} = mango_util:parse_field(Field),
     set_field(Props, Path, Value);
 set_field({Props}, [Name], Value) ->
     {lists:keystore(Name, 1, Props, {Name, Value})};
@@ -536,43 +535,3 @@ set_elem(1, [_ | Rest], Value) ->
     [Value | Rest];
 set_elem(I, [Item | Rest], Value) when I > 1 ->
     [Item | set_elem(I-1, Rest, Value)].
-
-parse_field(Field) ->
-    case binary:match(Field, <<"\\">>, []) of
-        nomatch ->
-            % Fast path, no regex required
-            {ok, check_non_empty(Field, binary:split(Field, <<".">>, [global]))};
-        _ ->
-            parse_field_slow(Field)
-    end.
-
-parse_field_slow(Field) ->
-    Path = lists:map(fun
-        (P) when P =:= <<>> ->
-            ?MANGO_ERROR({invalid_field_name, Field});
-        (P) ->
-            re:replace(P, <<"\\\\">>, <<>>, [global, {return, binary}])
-    end, re:split(Field, <<"(?<!\\\\)\\.">>)),
-    {ok, Path}.
-
-check_non_empty(Field, Parts) ->
-    case lists:member(<<>>, Parts) of
-        true ->
-            ?MANGO_ERROR({invalid_field_name, Field});
-        false ->
-            Parts
-    end.
-
--ifdef(TEST).
--include_lib("eunit/include/eunit.hrl").
-
-parse_field_test() ->
-    ?assertEqual({ok, [<<"ab">>]}, parse_field(<<"ab">>)),
-    ?assertEqual({ok, [<<"a">>, <<"b">>]}, parse_field(<<"a.b">>)),
-    ?assertEqual({ok, [<<"a.b">>]}, parse_field(<<"a\\.b">>)),
-    ?assertEqual({ok, [<<"a">>, <<"b">>, <<"c">>]}, parse_field(<<"a.b.c">>)),
-    ?assertEqual({ok, [<<"a">>, <<"b.c">>]}, parse_field(<<"a.b\\.c">>)),
-    Exception = {mango_error, ?MODULE, {invalid_field_name, <<"a..b">>}},
-    ?assertThrow(Exception, parse_field(<<"a..b">>)).
-
--endif.
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/couchdb-mango/blob/460fcd47/src/mango_fields.erl
----------------------------------------------------------------------
diff --git a/src/mango_fields.erl b/src/mango_fields.erl
index 8b6a00b..2732560 100644
--- a/src/mango_fields.erl
+++ b/src/mango_fields.erl
@@ -35,7 +35,7 @@ extract(Doc, all_fields) ->
     Doc;
 extract(Doc, Fields) ->
     lists:foldl(fun(F, NewDoc) ->
-        {ok, Path} = mango_doc:parse_field(F),
+        {ok, Path} = mango_util:parse_field(F),
         case mango_doc:get_field(Doc, Path) of
             not_found ->
                 NewDoc;

http://git-wip-us.apache.org/repos/asf/couchdb-mango/blob/460fcd47/src/mango_selector_text.erl
----------------------------------------------------------------------
diff --git a/src/mango_selector_text.erl b/src/mango_selector_text.erl
index b7d78e4..e61f759 100644
--- a/src/mango_selector_text.erl
+++ b/src/mango_selector_text.erl
@@ -25,6 +25,12 @@
 -include("mango.hrl").
 
 
+%% Regex for <<"\\.">>
+-define(PERIOD, {re_pattern,0,0,<<69,82,67,80,57,0,0,0,0,0,0,0,2,0,0,0,
+0,0,0,0,46,0,0,0,48,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,93,0,
+5,27,46,84,0,5,0>>}).
+
+
 convert(Object) ->
     TupleTree = convert([], Object),
     iolist_to_binary(to_query(TupleTree)).
@@ -159,12 +165,35 @@ convert(Path, {[{<<"$size">>, Arg}]}) ->
 convert(_Path, {[{<<"$", _/binary>>=Op, _}]}) ->
     ?MANGO_ERROR({invalid_operator, Op});
 
-% We've hit a field name specifier. We need to break the name
-% into path parts and continue our conversion.
-convert(Path, {[{Field, Cond}]}) ->
-    NewPathParts = re:split(Field, <<"\\.">>),
-    NewPath = lists:reverse(NewPathParts) ++ Path,
-    convert(NewPath, Cond);
+% We've hit a field name specifier. Check if the field name is accessing
+% arrays. Convert occurrences of element position references to .[]. Then we
+% need to break the name into path parts and continue our conversion.
+convert(Path, {[{Field0, Cond}]}) ->
+    {ok, PP0} = case Field0 of
+        <<>> ->
+            {ok, []};
+        _ ->
+            mango_util:parse_field(Field0)
+    end,
+    % Later on, we perform a lucene_escape_user call on the
+    % final Path, which calls parse_field again. Calling the function
+    % twice converts <<"a\\.b">> to [<<"a">>,<<"b">>]. This leads to
+    % an incorrect query since we need [<<"a.b">>]. Without breaking
+    % our escaping mechanism, we simply revert this first parse_field
+    % effect and replace instances of "." to "\\.".
+    PP1 = [re:replace(P, ?PERIOD, <<"\\\\.">>,
+        [global,{return,binary}]) || P <- PP0],
+    {PP2, HasInteger} = replace_array_indexes(PP1, [], false),
+    NewPath = PP2 ++ Path,
+    case HasInteger of
+        true ->
+            OldPath = lists:reverse(PP1, Path),
+            OldParts = convert(OldPath, Cond),
+            NewParts = convert(NewPath, Cond),
+            {op_or, [OldParts, NewParts]};
+        false ->
+            convert(NewPath, Cond)
+    end;
 
 %% For $in
 convert(Path, Val) when is_binary(Val); is_number(Val); is_boolean(Val) ->
@@ -362,3 +391,16 @@ get_sort_types(Field, {[{_, Cond}]}, Acc)  when is_tuple(Cond)->
 
 get_sort_types(_Field, _, Acc)  ->
     Acc.
+
+
+replace_array_indexes([], NewPartsAcc, HasIntAcc) ->
+    {NewPartsAcc, HasIntAcc};
+replace_array_indexes([Part | Rest], NewPartsAcc, HasIntAcc) ->
+    {NewPart, HasInt} = try
+        _ = list_to_integer(binary_to_list(Part)),
+        {<<"[]">>, true}
+    catch _:_ ->
+        {Part, false}
+    end,
+    replace_array_indexes(Rest, [NewPart | NewPartsAcc],
+         HasInt or HasIntAcc).

http://git-wip-us.apache.org/repos/asf/couchdb-mango/blob/460fcd47/src/mango_util.erl
----------------------------------------------------------------------
diff --git a/src/mango_util.erl b/src/mango_util.erl
index 0e06f66..aa11310 100644
--- a/src/mango_util.erl
+++ b/src/mango_util.erl
@@ -38,7 +38,9 @@
 
     has_suffix/2,
 
-    join/2
+    join/2,
+
+    parse_field/1
 ]).
 
 
@@ -343,7 +345,7 @@ lucene_escape_qv(<<C, Rest/binary>>) ->
 
 
 lucene_escape_user(Field) ->
-    {ok, Path} = mango_doc:parse_field(Field),
+    {ok, Path} = parse_field(Field),
     Escaped = [mango_util:lucene_escape_field(P) || P <- Path],
     iolist_to_binary(join(".", Escaped)).
 
@@ -377,3 +379,44 @@ is_number_string(Value) when is_list(Value)->
         _ ->
             true
     end.
+
+
+parse_field(Field) ->
+    case binary:match(Field, <<"\\">>, []) of
+        nomatch ->
+            % Fast path, no regex required
+            {ok, check_non_empty(Field, binary:split(Field, <<".">>, [global]))};
+        _ ->
+            parse_field_slow(Field)
+    end.
+
+parse_field_slow(Field) ->
+    Path = lists:map(fun
+        (P) when P =:= <<>> ->
+            ?MANGO_ERROR({invalid_field_name, Field});
+        (P) ->
+            re:replace(P, <<"\\\\">>, <<>>, [global, {return, binary}])
+    end, re:split(Field, <<"(?<!\\\\)\\.">>)),
+    {ok, Path}.
+
+check_non_empty(Field, Parts) ->
+    case lists:member(<<>>, Parts) of
+        true ->
+            ?MANGO_ERROR({invalid_field_name, Field});
+        false ->
+            Parts
+    end.
+
+-ifdef(TEST).
+-include_lib("eunit/include/eunit.hrl").
+
+parse_field_test() ->
+    ?assertEqual({ok, [<<"ab">>]}, parse_field(<<"ab">>)),
+    ?assertEqual({ok, [<<"a">>, <<"b">>]}, parse_field(<<"a.b">>)),
+    ?assertEqual({ok, [<<"a.b">>]}, parse_field(<<"a\\.b">>)),
+    ?assertEqual({ok, [<<"a">>, <<"b">>, <<"c">>]}, parse_field(<<"a.b.c">>)),
+    ?assertEqual({ok, [<<"a">>, <<"b.c">>]}, parse_field(<<"a.b\\.c">>)),
+    Exception = {mango_error, ?MODULE, {invalid_field_name, <<"a..b">>}},
+    ?assertThrow(Exception, parse_field(<<"a..b">>)).
+
+-endif.

http://git-wip-us.apache.org/repos/asf/couchdb-mango/blob/460fcd47/test/06-basic-text-test.py
----------------------------------------------------------------------
diff --git a/test/06-basic-text-test.py b/test/06-basic-text-test.py
index 28538ad..d44e340 100644
--- a/test/06-basic-text-test.py
+++ b/test/06-basic-text-test.py
@@ -59,6 +59,28 @@ class BasicTextTests(mango.UserDocsTextTests):
         assert docs[0]["name"]["first"] == "Stephanie"
         assert docs[0]["favorites"] == faves
 
+    def test_array_ref(self):
+        docs = self.db.find({"favorites.1": "Python"})
+        assert len(docs) == 4
+        for d in docs:
+            assert "Python" in d["favorites"]
+
+        # Nested Level
+        docs = self.db.find({"favorites.0.2": "Python"})
+        print len(docs)
+        assert len(docs) == 1
+        for d in docs:
+            assert "Python" in d["favorites"][0][2]
+
+    def test_number_ref(self):
+        docs = self.db.find({"11111": "number_field"})
+        assert len(docs) == 1
+        assert docs[0]["11111"] == "number_field"
+
+        docs = self.db.find({"22222.33333": "nested_number_field"})
+        assert len(docs) == 1
+        assert docs[0]["22222"]["33333"] == "nested_number_field"
+
     def test_lt(self):
         docs = self.db.find({"age": {"$lt": 22}})
         assert len(docs) == 0

http://git-wip-us.apache.org/repos/asf/couchdb-mango/blob/460fcd47/test/user_docs.py
----------------------------------------------------------------------
diff --git a/test/user_docs.py b/test/user_docs.py
index baf83f7..e2f1705 100644
--- a/test/user_docs.py
+++ b/test/user_docs.py
@@ -197,7 +197,9 @@ DOCS = [
             "Erlang",
             "C",
             "Erlang"
-        ]
+        ],
+        "11111": "number_field",
+        "22222": {"33333" : "nested_number_field"}
     },
     {
         "_id": "8e1c90c0-ac18-4832-8081-40d14325bde0",