You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by to...@apache.org on 2017/04/05 07:31:13 UTC

[couchdb] 01/01: Use efficient set storage for field names

This is an automated email from the ASF dual-hosted git repository.

tonysun83 pushed a commit to branch 3358-use-efficient-set
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 21cb0eb98bbf592a24456637429e5df6ce99ffc6
Author: Tony Sun <to...@cloudant.com>
AuthorDate: Wed Apr 5 00:02:57 2017 -0700

    Use efficient set storage for field names
    
    When indexing a set of fields for text search, we also create a special
    field called $fieldnames. It contains values for all the fields that
    need to be indexed. In order to do that, we need a unique list of the
    form [[<<"$fieldnames">>, Name, [] | Rest]. The old code would add an
    element to the list, and then check for membership via lists:member/2.
    This is inefficient. Some documents can contain a large number of
    fields, so we will use gb_sets to create a unique set of fields, and
    then extract out the field names.
    
    COUCHDB-3358
---
 src/mango/src/mango_native_proc.erl | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/src/mango/src/mango_native_proc.erl b/src/mango/src/mango_native_proc.erl
index 6d0fb24..f36d6f7 100644
--- a/src/mango/src/mango_native_proc.erl
+++ b/src/mango/src/mango_native_proc.erl
@@ -175,7 +175,7 @@ get_text_entries0(IdxProps, Doc) ->
     Fields = if not DefaultEnabled -> Fields0; true ->
         add_default_text_field(Fields0)
     end,
-    FieldNames = get_field_names(Fields, []),
+    FieldNames = get_field_names(Fields),
     Converted = convert_text_fields(Fields),
     FieldNames ++ Converted.
 
@@ -257,15 +257,10 @@ add_default_text_field([_ | Rest], Acc) ->
 
 
 %% index of all field names
-get_field_names([], FAcc) ->
-    FAcc;
-get_field_names([{Name, _Type, _Value} | Rest], FAcc) ->
-    case lists:member([<<"$fieldnames">>, Name, []], FAcc) of
-        true ->
-            get_field_names(Rest, FAcc);
-        false ->
-            get_field_names(Rest, [[<<"$fieldnames">>, Name, []] | FAcc])
-    end.
+get_field_names(Fields) ->
+    GBFieldSet = gb_sets:from_list(Fields),
+    UFields = gb_sets:to_list(GBFieldSet),
+    [[<<"$fieldnames">>, Name, []] || {Name, _Type, _Value} <- UFields].
 
 
 convert_text_fields([]) ->

-- 
To stop receiving notification emails like this one, please contact
"commits@couchdb.apache.org" <co...@couchdb.apache.org>.