You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@couchdb.apache.org by GitBox <gi...@apache.org> on 2022/12/05 21:34:42 UTC

[GitHub] [couchdb] rnewson opened a new pull request, #4291: Import nouveau

rnewson opened a new pull request, #4291:
URL: https://github.com/apache/couchdb/pull/4291

   This PR imports both halves of the "nouveau" project I've been working on as a possible replacement for dreyfus/clouseau, the current approach to supplementing CouchDB's indexing feature (map/reduce) with a Lucene-based option.
   
   I've squashed the history of both projects as I don't think it's very interesting, happy to discuss this point.
   
   I'm marking this as a draft PR not because I don't think the code is not worth merging (I believe the basics of indexing and querying with nouveau work pretty well, and I've integrated with mango as well), but because the project needs to discuss how this should land. Perhaps I need to change how a nouveau index is defined, perhaps I have to complete all the usual chores (api docs, extensive tests) in this PR, and so on. It largely depends on whether 'main' is releasable at all times or whether experimental features can live there for a while as they mature.
   
   Currently, nouveau indexes are stored under the "nouveau" key of a design document, at the same level as "indexes" for clouseau and "views" for map/reduce. Instead of this I have suggested that nouveau indexes are defined under "indexes" as they are for clouseau but a new "version" field is required (with value 2) to select the nouveau engine instead. Thoughts on this are extremely welcome.
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] rnewson commented on a diff in pull request #4291: Import nouveau

Posted by "rnewson (via GitHub)" <gi...@apache.org>.
rnewson commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1173593062


##########
src/nouveau/src/nouveau_fabric_search.erl:
##########
@@ -0,0 +1,213 @@
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+
+%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*-
+
+-module(nouveau_fabric_search).
+
+-export([go/4]).
+
+-include_lib("mem3/include/mem3.hrl").
+-include_lib("couch/include/couch_db.hrl").
+
+-record(state, {
+    limit,
+    sort,
+    counters,
+    search_results
+}).
+
+go(DbName, GroupId, IndexName, QueryArgs0) when is_binary(GroupId) ->
+    {ok, DDoc} = fabric:open_doc(
+        DbName,
+        <<"_design/", GroupId/binary>>,
+        [ejson_body]
+    ),
+    go(DbName, DDoc, IndexName, QueryArgs0);
+go(DbName, #doc{} = DDoc, IndexName, QueryArgs0) ->
+    {ok, Index} = nouveau_util:design_doc_to_index(DbName, DDoc, IndexName),
+    Shards = mem3:shards(DbName),
+    Bookmark = nouveau_bookmark:unpack(DbName, maps:get(bookmark, QueryArgs0)),
+    QueryArgs1 = maps:remove(bookmark, QueryArgs0),
+    Counters0 = lists:map(
+        fun(#shard{} = Shard) ->
+            After = maps:get(Shard#shard.range, Bookmark, null),
+            Ref = rexi:cast(
+                Shard#shard.node,
+                {nouveau_rpc, search, [Shard#shard.name, Index, QueryArgs1#{'after' => After}]}
+            ),
+            Shard#shard{ref = Ref}
+        end,
+        Shards
+    ),
+    Counters = fabric_dict:init(Counters0, nil),
+    Workers = fabric_dict:fetch_keys(Counters),
+    RexiMon = fabric_util:create_monitors(Workers),
+    State = #state{
+        limit = maps:get(limit, QueryArgs0),
+        sort = maps:get(sort, QueryArgs0),
+        counters = Counters,
+        search_results = #{}
+    },
+    try
+        rexi_utils:recv(Workers, #shard.ref, fun handle_message/3, State, infinity, 1000 * 60 * 60)

Review Comment:
   done, made them configurable too (recently did this for dreyfus)



##########
src/nouveau/src/nouveau_httpd.erl:
##########
@@ -0,0 +1,276 @@
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+
+%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*-
+
+-module(nouveau_httpd).
+
+-include_lib("couch/include/couch_db.hrl").
+
+-export([
+    handle_analyze_req/1,
+    handle_search_req/3,
+    handle_info_req/3,
+    handle_cleanup_req/2
+]).
+
+-import(chttpd, [
+    send_method_not_allowed/2,
+    send_json/2, send_json/3,
+    send_error/2
+]).
+
+-define(RETRY_LIMIT, 20).
+-define(RETRY_SLEEP, 500).
+
+handle_analyze_req(#httpd{method = 'POST'} = Req) ->

Review Comment:
   sure



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] nickva commented on a diff in pull request #4291: Import nouveau

Posted by "nickva (via GitHub)" <gi...@apache.org>.
nickva commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1170863006


##########
src/nouveau/src/nouveau_util.erl:
##########
@@ -0,0 +1,97 @@
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+
+%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*-
+
+-module(nouveau_util).
+
+-include("nouveau.hrl").
+-include_lib("couch/include/couch_db.hrl").
+
+-export([
+    index_name/1,
+    design_doc_to_indexes/2,
+    design_doc_to_index/3,
+    nouveau_url/0
+]).
+
+index_name(Path) when is_binary(Path) ->
+    <<(node_prefix())/binary, "/", Path/binary>>;
+index_name(#index{} = Index) ->
+    <<(node_prefix())/binary, "/", (Index#index.dbname)/binary, "/", (Index#index.sig)/binary>>.
+
+node_prefix() ->
+    atom_to_binary(node(), utf8).
+
+%% copied from dreyfus_index.erl
+design_doc_to_indexes(DbName, #doc{body = {Fields}} = Doc) ->
+    RawIndexes = couch_util:get_value(<<"nouveau">>, Fields, {[]}),
+    case RawIndexes of
+        {IndexList} when is_list(IndexList) ->
+            {IndexNames, _} = lists:unzip(IndexList),
+            lists:flatmap(
+                fun(IndexName) ->
+                    case (catch design_doc_to_index(DbName, Doc, IndexName)) of
+                        {ok, #index{} = Index} -> [Index];
+                        _ -> []
+                    end
+                end,
+                IndexNames
+            );
+        _ ->
+            []
+    end.
+
+%% copied from dreyfus_index.erl
+design_doc_to_index(DbName, #doc{id = Id, body = {Fields}}, IndexName) ->
+    Language = couch_util:get_value(<<"language">>, Fields, <<"javascript">>),
+    {RawIndexes} = couch_util:get_value(<<"nouveau">>, Fields, {[]}),
+    InvalidDDocError =
+        {invalid_design_doc, <<"index `", IndexName/binary, "` must have parameter `index`">>},
+    case lists:keyfind(IndexName, 1, RawIndexes) of
+        false ->
+            {error, {not_found, <<IndexName/binary, " not found.">>}};
+        {IndexName, {Index}} ->
+            DefaultAnalyzer = couch_util:get_value(<<"default_analyzer">>, Index, <<"standard">>),
+            FieldAnalyzers = couch_util:get_value(<<"field_analyzers">>, Index, #{}),
+            case couch_util:get_value(<<"index">>, Index) of
+                undefined ->
+                    {error, InvalidDDocError};
+                Def ->
+                    Sig = ?l2b(
+                        couch_util:to_hex(
+                            crypto:hash(
+                                sha256,
+                                term_to_binary(
+                                    {DefaultAnalyzer, FieldAnalyzers, Def}
+                                )
+                            )

Review Comment:
   Minor nit: sig hash calculation may look nicer in a separate small function.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] iilyak commented on pull request #4291: Import nouveau

Posted by GitBox <gi...@apache.org>.
iilyak commented on PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#issuecomment-1362991997

   JFYI 
   In case if there is an intend to run two backends at the same time.
   Instead of relying on `config:get_boolean("nouveau", "enable", false).` it should be possible to enable/disable nouveau on per databases basis using feature flags. The feature flags are compiled down into an erlang module and therefore very efficient (if performance is the concern).
   
   At the moment the feature flags are used for partition queries. However they can be used here as well. In order to make them work the `subject` need to be passed to the check function. Currently supported subjects are defined [here](https://github.com/apache/couchdb/blob/main/src/couch/src/couch_flags.erl#L109:L122). The check for a feature flag look like `couch_flags:is_enabled(nouveau, Subject)`. In order to make it work for `#idx{}` and `#index{}` records we would need to extract the database name.
   
   ```erlang
   is_nouveau(#index{dbname = Subject}) ->
        couch_flags:is_enabled(nouveau, Subject);
   is_nouveau(#idx{dbname = Subject}) ->
        couch_flags:is_enabled(nouveau, Subject).
   ```
   
   The configuration for feature flag is done in ini files as follows.
   
   ```ini
   [feature_flags]
   ; This enables nouveau for databases with given prefix. 
   nouveau||my_database* = true
   ```
   
   Supporting another type of a subject also supported via EPI. 
   
   ```erlang
   -module(nouveue_plugin_feature_flags).
   
   -export([
       rules/0,
       subject_key/1
   ]).
   
   rules() ->
       [].
   
   subject_key(#idx{dbname = Subject}) ->
      Subject;
   subject_key(#index{dbname = Subject}) ->
      Subject;    
   subject_key(_) ->
       no_decision.
   ```
   
   ```erlang
   -module(nouveau_epi).
   
   -behaviour(couch_epi_plugin).
   
   ....
   providers() ->
       [
            {chttpd_handlers, nouveau_httpd_handlers},
            {feature_flags, nouveau_plugin_feature_flags}
       ].
   
   ```
   
   The only two places in the PR which are hard to do are in `nouveau_api.erl` since in these case we don't have access to database name.
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] big-r81 commented on a diff in pull request #4291: Import nouveau

Posted by "big-r81 (via GitHub)" <gi...@apache.org>.
big-r81 commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1134533232


##########
java/nouveau/server/README.md:
##########
@@ -0,0 +1,107 @@
+# nouveau
+Lucene 9 + DropWizard = Maybe a good search option for Apache CouchDB?
+
+Nouveau is an experimental search extension for CouchDB 3.x.
+
+## What works?
+
+* you can define a default analyzer and different analyzers by field name.
+* sorting on text and numbers
+* classic lucene query syntax
+* count and range facets
+* cursor support for paginating efficiently through large results sets
+* indexes automatically deleted if database is deleted (as long as nouveau is running!)
+* integration with ken
+* update=false
+* support for stale=ok
+* integration with mango
+
+## What doesn't work yet?
+
+* include_docs=true
+* No support for results grouping
+* No support to configure stop words for analyzers
+
+## Why is this better than dreyfus/clouseau?
+
+* No scalang (or Scala!)
+* Supports any version of Java that Lucene 9 supports
+* memory-mapped I/O for performance
+* direct I/O used for segment merging (so we don't evict useful data from disk cache)
+* It's new and shiny.
+
+## Erlang side
+
+You'll need to run a fork of couchdb: https://github.com/rnewson/couchdb-nouveau
+
+## Getting started
+
+Build Nouveau with;
+
+`mvn package`
+
+Run Nouvea with;
+
+`java -jar target/nouveau-*.jar server nouveau.yaml`
+
+Now run CouchDB using the 'nouveau' branch of my fork at https://github.com/rnewson/couchdb-nouveau;

Review Comment:
   ```suggestion
   ## Getting started
   
   Build and run Nouveau with;
   
   `make nouveau-start`
   
   Now run CouchDB;
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] rnewson merged pull request #4291: Import nouveau

Posted by "rnewson (via GitHub)" <gi...@apache.org>.
rnewson merged PR #4291:
URL: https://github.com/apache/couchdb/pull/4291


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] big-r81 commented on a diff in pull request #4291: Import nouveau

Posted by "big-r81 (via GitHub)" <gi...@apache.org>.
big-r81 commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1170942648


##########
src/nouveau/src/nouveau_util.erl:
##########
@@ -0,0 +1,97 @@
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+
+%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*-
+
+-module(nouveau_util).
+
+-include("nouveau.hrl").
+-include_lib("couch/include/couch_db.hrl").
+
+-export([
+    index_name/1,
+    design_doc_to_indexes/2,
+    design_doc_to_index/3,
+    nouveau_url/0
+]).
+
+index_name(Path) when is_binary(Path) ->
+    <<(node_prefix())/binary, "/", Path/binary>>;
+index_name(#index{} = Index) ->
+    <<(node_prefix())/binary, "/", (Index#index.dbname)/binary, "/", (Index#index.sig)/binary>>.
+
+node_prefix() ->
+    atom_to_binary(node(), utf8).
+
+%% copied from dreyfus_index.erl
+design_doc_to_indexes(DbName, #doc{body = {Fields}} = Doc) ->
+    RawIndexes = couch_util:get_value(<<"nouveau">>, Fields, {[]}),
+    case RawIndexes of
+        {IndexList} when is_list(IndexList) ->
+            {IndexNames, _} = lists:unzip(IndexList),
+            lists:flatmap(
+                fun(IndexName) ->
+                    case (catch design_doc_to_index(DbName, Doc, IndexName)) of
+                        {ok, #index{} = Index} -> [Index];
+                        _ -> []
+                    end
+                end,
+                IndexNames
+            );
+        _ ->
+            []
+    end.
+
+%% copied from dreyfus_index.erl
+design_doc_to_index(DbName, #doc{id = Id, body = {Fields}}, IndexName) ->
+    Language = couch_util:get_value(<<"language">>, Fields, <<"javascript">>),
+    {RawIndexes} = couch_util:get_value(<<"nouveau">>, Fields, {[]}),
+    InvalidDDocError =
+        {invalid_design_doc, <<"index `", IndexName/binary, "` must have parameter `index`">>},
+    case lists:keyfind(IndexName, 1, RawIndexes) of
+        false ->
+            {error, {not_found, <<IndexName/binary, " not found.">>}};
+        {IndexName, {Index}} ->
+            DefaultAnalyzer = couch_util:get_value(<<"default_analyzer">>, Index, <<"standard">>),
+            FieldAnalyzers = couch_util:get_value(<<"field_analyzers">>, Index, #{}),
+            case couch_util:get_value(<<"index">>, Index) of
+                undefined ->
+                    {error, InvalidDDocError};
+                Def ->
+                    Sig = ?l2b(
+                        couch_util:to_hex(
+                            crypto:hash(
+                                sha256,
+                                term_to_binary(
+                                    {DefaultAnalyzer, FieldAnalyzers, Def}
+                                )
+                            )

Review Comment:
   Should we us the [default configured](https://github.com/apache/couchdb/blob/df52be5778534c2c11e18b6ed2f7bdf242012184/rel/overlay/etc/default.ini#L332) `hash_algorithm` here, like we do in [cookie ](https://github.com/apache/couchdb/blob/df52be5778534c2c11e18b6ed2f7bdf242012184/src/couch/src/couch_httpd_auth.erl#L349) and [proxy ](https://github.com/apache/couchdb/blob/df52be5778534c2c11e18b6ed2f7bdf242012184/src/couch/src/couch_httpd_auth.erl#L204) auth?
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] nickva commented on a diff in pull request #4291: Import nouveau

Posted by "nickva (via GitHub)" <gi...@apache.org>.
nickva commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1170865112


##########
src/nouveau/src/nouveau_httpd_handlers.erl:
##########
@@ -0,0 +1,35 @@
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+
+%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*-
+
+-module(nouveau_httpd_handlers).
+
+-export([url_handler/1, db_handler/1, design_handler/1]).
+
+url_handler(<<"_nouveau_analyze">>) ->
+    fun nouveau_httpd:handle_analyze_req/1;
+url_handler(_) ->
+    no_match.
+
+db_handler(<<"_nouveau_cleanup">>) ->

Review Comment:
   It may be tricky having a full test here and asserting cleanup happened but at least a basic call and check for a success return code should work.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] big-r81 commented on a diff in pull request #4291: Import nouveau

Posted by "big-r81 (via GitHub)" <gi...@apache.org>.
big-r81 commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1129229857


##########
java/nouveau/base/src/test/resources/fixtures/SearchRequest.json:
##########
@@ -0,0 +1,17 @@
+{
+    "query": "*:*",
+    "limit": 10,
+    "sort": null,
+    "counts": [
+        "bar"
+    ],
+    "ranges": {
+        "foo": [
+            {
+                "label": "0 to 100 inc",
+                "min": 0.0,
+                "max": 100.0
+            }
+        ]
+    }
+}

Review Comment:
   nit: missing empty newline



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] janl commented on pull request #4291: Import nouveau

Posted by GitBox <gi...@apache.org>.
janl commented on PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#issuecomment-1342892353

   @rnewson thanks for the view definition side of things. How would the query URLs look for this? And how do things look like in mango?


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] nickva commented on a diff in pull request #4291: Import nouveau

Posted by "nickva (via GitHub)" <gi...@apache.org>.
nickva commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1170788010


##########
nouveau/README.md:
##########
@@ -0,0 +1,118 @@
+# nouveau
+
+Nouveau is a modern replacement for dreyfus/clouseau and is built on;
+
+1) the Dropwizard framework (https://dropwizard.io)
+2) Java 11+
+3) Lucene 9
+
+Nouveau transforms Apache CouchDB databases into Apache Lucene indexes at the shard level and then merges the results together.
+
+This work is currently EXPERIMENTAL and may change in ways that invalidate any existing Nouveau index.
+
+## What works?
+
+* you can define a default analyzer and different analyzers by field name.
+* sorting on text and numbers (and combinations of fields)
+* classic lucene query syntax
+* count and range facets
+* bookmark support for paginating efficiently through large results sets
+* indexes automatically deleted if database is deleted (as long as nouveau is running!)
+* integration with ken
+* integration with mango
+* integration with resharding
+* update=false
+* `_nouveau_info`
+* `_search_cleanup`
+* /openapi.{json.yaml}
+
+## What doesn't work yet?
+
+* results grouping
+* configurable stop words for analyzers
+* Makefile.win or Windows generally
+
+I don't intend to add grouping support, it seems barely used. Would accept a tidy contribution, though.
+
+## Why is this better than dreyfus/clouseau?
+
+* No scalang (or Scala!)
+* Supports any version of Java that Lucene 9 supports
+* memory-mapped I/O for performance (which works best on Java 19)
+* direct I/O used for segment merging (so we don't evict useful data from disk cache)
+
+## Getting started
+
+Configure CouchDB with `--enable-nouveau'
+
+Build Nouveau with;
+
+`make`
+
+Run Nouvea with;
+
+`dev/run --admin=foo:bar --with-nouveau`
+
+Make a database with some data and an index definition;
+
+```
+#!/bin/sh
+
+URL="http://foo:bar@127.0.0.1:15984/foo"
+
+curl -X DELETE "$URL"
+curl -X PUT "$URL?n=3&q=16"
+
+curl -X PUT "$URL/_design/foo" -d '{"nouveau":{"bar":{"default_analyzer":"standard", "field_analyzers":{"foo":"english"}, "index":"function(doc) { index(\"string\", \"foo\", \"bar\"); }"}}}'
+
+# curl "$URL/_index" -Hcontent-type:application/json -d '{"type":"nouveau", "index": {"fields": [{"name": "bar", "type":"number"}]}}'
+
+for I in {1..5}; do
+    DOCID=$RANDOM
+    DOCID=$[ $DOCID % 100000 ]
+    BAR=$RANDOM
+    BAR=$[ $BAR % 100000 ]
+    curl -X PUT "$URL/doc$DOCID" -d "{\"bar\": $BAR}"
+done
+
+while true; do
+    curl 'foo:bar@localhost:15984/foo/_design/foo/_nouveau/bar?q=*:*'
+done
+```
+
+In order not to collide with `dreyfus` I've hooked Nouveau in with new paths;
+
+`curl 'foo:bar@localhost:15984/foo/_design/foo/_nouveau/bar?q=*:*'`
+
+This will cause Nouveau to build indexes for each copy (N) and each
+shard range (Q) and then perform a search and return the results. Lots
+of query syntax is working as is sorting on strings and numbers
+(`sort=["fieldnamehere&lt;string&gt;"] or sort=["fieldnamehere&lt;number&gt;"],

Review Comment:
   Closing back-tick quote seems to be missing?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] nickva commented on a diff in pull request #4291: Import nouveau

Posted by "nickva (via GitHub)" <gi...@apache.org>.
nickva commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1170843321


##########
src/nouveau/src/nouveau_bookmark.erl:
##########
@@ -0,0 +1,68 @@
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+
+%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*-
+
+-module(nouveau_bookmark).
+
+-include_lib("mem3/include/mem3.hrl").
+
+-export([new/0, update/3, unpack/2, pack/1, to_ejson/1]).
+
+new() ->
+    #{}.
+
+%% Form a bookmark from the last contribution from each shard range
+update(DbName, PreviousBookmark, SearchResults) when is_binary(PreviousBookmark) ->
+    update(DbName, unpack(DbName, PreviousBookmark), SearchResults);
+update(DbName, {EJson}, SearchResults) when is_list(EJson) ->
+    update(DbName, from_ejson({EJson}), SearchResults);
+update(DbName, PreviousBookmark, SearchResults) when is_map(PreviousBookmark) ->
+    Hits = maps:get(<<"hits">>, SearchResults),

Review Comment:
   `#{<<"hits">> := Hits} = SearchResults` is a bit more common.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] nickva commented on a diff in pull request #4291: Import nouveau

Posted by "nickva (via GitHub)" <gi...@apache.org>.
nickva commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1170809637


##########
rel/overlay/etc/default.ini:
##########
@@ -802,6 +802,7 @@ state_dir = {{state_dir}}
 ; The name and location of the Clouseau Java service required to
 ; enable Search functionality.
 ;name = clouseau@127.0.0.1
+name={{clouseau_name}}

Review Comment:
   Add a space before and after `=` for consistency.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] nickva commented on a diff in pull request #4291: Import nouveau

Posted by "nickva (via GitHub)" <gi...@apache.org>.
nickva commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1170861753


##########
src/nouveau/src/nouveau_util.erl:
##########
@@ -0,0 +1,97 @@
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+
+%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*-
+
+-module(nouveau_util).
+
+-include("nouveau.hrl").
+-include_lib("couch/include/couch_db.hrl").
+
+-export([
+    index_name/1,
+    design_doc_to_indexes/2,
+    design_doc_to_index/3,
+    nouveau_url/0
+]).
+
+index_name(Path) when is_binary(Path) ->
+    <<(node_prefix())/binary, "/", Path/binary>>;
+index_name(#index{} = Index) ->
+    <<(node_prefix())/binary, "/", (Index#index.dbname)/binary, "/", (Index#index.sig)/binary>>.
+
+node_prefix() ->
+    atom_to_binary(node(), utf8).

Review Comment:
   Any danger here in case the node is started in non-distributed mode (without a -name ...)? The prefix would then be `nonode@nohost`?
   
   I think we may be ok since if distribution crashes the node would crash as well (or restart)...



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] rnewson commented on a diff in pull request #4291: Import nouveau

Posted by "rnewson (via GitHub)" <gi...@apache.org>.
rnewson commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1173593844


##########
src/nouveau/src/nouveau_util.erl:
##########
@@ -0,0 +1,97 @@
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+
+%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*-
+
+-module(nouveau_util).
+
+-include("nouveau.hrl").
+-include_lib("couch/include/couch_db.hrl").
+
+-export([
+    index_name/1,
+    design_doc_to_indexes/2,
+    design_doc_to_index/3,
+    nouveau_url/0
+]).
+
+index_name(Path) when is_binary(Path) ->
+    <<(node_prefix())/binary, "/", Path/binary>>;
+index_name(#index{} = Index) ->
+    <<(node_prefix())/binary, "/", (Index#index.dbname)/binary, "/", (Index#index.sig)/binary>>.
+
+node_prefix() ->
+    atom_to_binary(node(), utf8).

Review Comment:
   that's a fine prefix. the purpose is to allow a nouveau server to host indexes for multiple nodes. in the nonode@ situation it just means there's one node, its name doesn't really matter.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] big-r81 commented on pull request #4291: Import nouveau

Posted by GitBox <gi...@apache.org>.
big-r81 commented on PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#issuecomment-1340924188

   Are the GH workflows working/necessary, if they are not in the root directory after the merge?


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] nickva commented on a diff in pull request #4291: Import nouveau

Posted by "nickva (via GitHub)" <gi...@apache.org>.
nickva commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1170866194


##########
src/nouveau/src/nouveau_httpd_handlers.erl:
##########
@@ -0,0 +1,35 @@
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+
+%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*-
+
+-module(nouveau_httpd_handlers).
+
+-export([url_handler/1, db_handler/1, design_handler/1]).
+
+url_handler(<<"_nouveau_analyze">>) ->

Review Comment:
   We should have an integration test calling this API. At least asserting it won't crash.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] nickva commented on a diff in pull request #4291: Import nouveau

Posted by "nickva (via GitHub)" <gi...@apache.org>.
nickva commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1170851808


##########
src/nouveau/src/nouveau_fabric_search.erl:
##########
@@ -0,0 +1,213 @@
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+
+%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*-
+
+-module(nouveau_fabric_search).
+
+-export([go/4]).
+
+-include_lib("mem3/include/mem3.hrl").
+-include_lib("couch/include/couch_db.hrl").
+
+-record(state, {
+    limit,
+    sort,
+    counters,
+    search_results
+}).
+
+go(DbName, GroupId, IndexName, QueryArgs0) when is_binary(GroupId) ->
+    {ok, DDoc} = fabric:open_doc(
+        DbName,
+        <<"_design/", GroupId/binary>>,
+        [ejson_body]
+    ),
+    go(DbName, DDoc, IndexName, QueryArgs0);
+go(DbName, #doc{} = DDoc, IndexName, QueryArgs0) ->
+    {ok, Index} = nouveau_util:design_doc_to_index(DbName, DDoc, IndexName),
+    Shards = mem3:shards(DbName),
+    Bookmark = nouveau_bookmark:unpack(DbName, maps:get(bookmark, QueryArgs0)),
+    QueryArgs1 = maps:remove(bookmark, QueryArgs0),
+    Counters0 = lists:map(
+        fun(#shard{} = Shard) ->
+            After = maps:get(Shard#shard.range, Bookmark, null),
+            Ref = rexi:cast(
+                Shard#shard.node,
+                {nouveau_rpc, search, [Shard#shard.name, Index, QueryArgs1#{'after' => After}]}
+            ),
+            Shard#shard{ref = Ref}
+        end,
+        Shards
+    ),
+    Counters = fabric_dict:init(Counters0, nil),
+    Workers = fabric_dict:fetch_keys(Counters),
+    RexiMon = fabric_util:create_monitors(Workers),
+    State = #state{
+        limit = maps:get(limit, QueryArgs0),

Review Comment:
   A minor nit: It's a bit odd to use a `QueryArgs0` after we've updated it to `QueryArgs1`. May be better to extract `Limit` and `Sort` initially and keep them as separate variables with `#{limit = Limit, sort = Sort} = QueryArgs0` somewhere before we updated to `QueryArgs1`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] rnewson commented on a diff in pull request #4291: Import nouveau

Posted by "rnewson (via GitHub)" <gi...@apache.org>.
rnewson commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1173595362


##########
src/nouveau/src/nouveau_util.erl:
##########
@@ -0,0 +1,97 @@
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+
+%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*-
+
+-module(nouveau_util).
+
+-include("nouveau.hrl").
+-include_lib("couch/include/couch_db.hrl").
+
+-export([
+    index_name/1,
+    design_doc_to_indexes/2,
+    design_doc_to_index/3,
+    nouveau_url/0
+]).
+
+index_name(Path) when is_binary(Path) ->
+    <<(node_prefix())/binary, "/", Path/binary>>;
+index_name(#index{} = Index) ->
+    <<(node_prefix())/binary, "/", (Index#index.dbname)/binary, "/", (Index#index.sig)/binary>>.
+
+node_prefix() ->
+    atom_to_binary(node(), utf8).
+
+%% copied from dreyfus_index.erl
+design_doc_to_indexes(DbName, #doc{body = {Fields}} = Doc) ->
+    RawIndexes = couch_util:get_value(<<"nouveau">>, Fields, {[]}),
+    case RawIndexes of
+        {IndexList} when is_list(IndexList) ->
+            {IndexNames, _} = lists:unzip(IndexList),
+            lists:flatmap(
+                fun(IndexName) ->
+                    case (catch design_doc_to_index(DbName, Doc, IndexName)) of
+                        {ok, #index{} = Index} -> [Index];
+                        _ -> []
+                    end
+                end,
+                IndexNames
+            );
+        _ ->
+            []
+    end.
+
+%% copied from dreyfus_index.erl
+design_doc_to_index(DbName, #doc{id = Id, body = {Fields}}, IndexName) ->
+    Language = couch_util:get_value(<<"language">>, Fields, <<"javascript">>),
+    {RawIndexes} = couch_util:get_value(<<"nouveau">>, Fields, {[]}),
+    InvalidDDocError =
+        {invalid_design_doc, <<"index `", IndexName/binary, "` must have parameter `index`">>},
+    case lists:keyfind(IndexName, 1, RawIndexes) of
+        false ->
+            {error, {not_found, <<IndexName/binary, " not found.">>}};
+        {IndexName, {Index}} ->
+            DefaultAnalyzer = couch_util:get_value(<<"default_analyzer">>, Index, <<"standard">>),
+            FieldAnalyzers = couch_util:get_value(<<"field_analyzers">>, Index, #{}),
+            case couch_util:get_value(<<"index">>, Index) of
+                undefined ->
+                    {error, InvalidDDocError};
+                Def ->
+                    Sig = ?l2b(
+                        couch_util:to_hex(
+                            crypto:hash(
+                                sha256,
+                                term_to_binary(
+                                    {DefaultAnalyzer, FieldAnalyzers, Def}
+                                )
+                            )

Review Comment:
   that controls the HMAC hash for security reasons. we use a crypto hash here for non-cryptographic reasons (https://docs.couchdb.org/en/stable/best-practices/views.html#deploying-a-view-change-in-a-live-environment). sha256 is plenty for that.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] big-r81 commented on a diff in pull request #4291: Import nouveau

Posted by "big-r81 (via GitHub)" <gi...@apache.org>.
big-r81 commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1129233307


##########
java/nouveau/lucene4/src/main/java/org/apache/couchdb/nouveau/lucene4/resources/IndexResource.java:
##########
@@ -0,0 +1,138 @@
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package org.apache.couchdb.nouveau.lucene4.resources;
+
+import java.io.IOException;
+import java.util.Map;
+
+import javax.validation.Valid;
+import javax.validation.constraints.NotNull;
+import javax.ws.rs.Consumes;
+import javax.ws.rs.DELETE;
+import javax.ws.rs.GET;
+import javax.ws.rs.POST;
+import javax.ws.rs.PUT;
+import javax.ws.rs.Path;
+import javax.ws.rs.PathParam;
+import javax.ws.rs.Produces;
+import javax.ws.rs.core.MediaType;
+
+import org.apache.couchdb.nouveau.api.DocumentDeleteRequest;
+import org.apache.couchdb.nouveau.api.DocumentUpdateRequest;
+import org.apache.couchdb.nouveau.api.IndexDefinition;
+import org.apache.couchdb.nouveau.api.IndexInfo;
+import org.apache.couchdb.nouveau.api.SearchRequest;
+import org.apache.couchdb.nouveau.api.SearchResults;
+import org.apache.couchdb.nouveau.core.IndexLoader;
+import org.apache.couchdb.nouveau.core.IndexManager;
+import org.apache.couchdb.nouveau.lucene4.core.Lucene4AnalyzerFactory;
+import org.apache.couchdb.nouveau.lucene4.core.Lucene4Index;
+import org.apache.couchdb.nouveau.lucene4.core.Utils;
+import org.apache.couchdb.nouveau.resources.BaseIndexResource;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.search.SearcherFactory;
+import org.apache.lucene.search.SearcherManager;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+
+import com.codahale.metrics.annotation.ExceptionMetered;
+import com.codahale.metrics.annotation.Metered;
+import com.codahale.metrics.annotation.ResponseMetered;;
+
+@Path("/4/index/{name}")
+@Metered
+@ResponseMetered
+@ExceptionMetered(cause = IOException.class)
+@Consumes(MediaType.APPLICATION_JSON)
+@Produces(MediaType.APPLICATION_JSON)
+public class IndexResource extends BaseIndexResource<IndexableField> {
+
+    private final SearcherFactory searcherFactory;
+
+    public IndexResource(final IndexManager indexManager, final SearcherFactory searcherFactory) {
+        super(indexManager);
+        this.searcherFactory = searcherFactory;
+    }
+
+    @PUT
+    @Override
+    public void createIndex(@PathParam("name") String name, @NotNull @Valid IndexDefinition indexDefinition)
+            throws IOException {
+        super.createIndex(name, indexDefinition);
+    }
+
+    @DELETE
+    @Path("/doc/{docId}")
+    @Override
+    public void deleteDoc(@PathParam("name") String name, @PathParam("docId") String docId,
+            @NotNull @Valid DocumentDeleteRequest request) throws Exception {
+        super.deleteDoc(name, docId, request);
+    }
+
+    @DELETE
+    @Override
+    public void deletePath(@PathParam("name") String path) throws IOException {
+        super.deletePath(path);
+    }
+
+    @GET
+    @Override
+    public IndexInfo indexInfo(@PathParam("name") String name) throws Exception {
+        return super.indexInfo(name);
+    }
+
+    @POST
+    @Path("/search")
+    @Override
+    public SearchResults<IndexableField> searchIndex(@PathParam("name") String name,
+            @NotNull @Valid SearchRequest request)
+            throws Exception {
+        return super.searchIndex(name, request);
+    }
+
+    @PUT
+    @Path("/doc/{docId}")
+    @Override
+    public void updateDoc(@PathParam("name") String name, @PathParam("docId") String docId,
+            @NotNull @Valid DocumentUpdateRequest<IndexableField> request)
+            throws Exception {
+        super.updateDoc(name, docId, request);
+    }
+
+    @Override
+    protected IndexLoader<IndexableField> indexLoader() {
+        return (path, indexDefinition) -> {
+            final Analyzer analyzer = Lucene4AnalyzerFactory.fromDefinition(indexDefinition);
+            final Directory dir = FSDirectory.open(path.toFile());
+            final IndexWriterConfig config = new IndexWriterConfig(Utils.LUCENE_VERSION, analyzer);
+            config.setUseCompoundFile(false);
+            final IndexWriter writer = new IndexWriter(dir, config);
+            final long updateSeq = getUpdateSeq(writer);
+            final SearcherManager searcherManager = new SearcherManager(writer, true, searcherFactory);
+            return new Lucene4Index(analyzer, writer, updateSeq, searcherManager);
+        };
+    }
+
+    private static long getUpdateSeq(final IndexWriter writer) throws IOException {
+        final Map<String, String> commitData = writer.getCommitData();
+        if (commitData == null) {
+            return 0L;
+        }
+        return Long.parseLong(commitData.getOrDefault("update_seq", "0"));
+    }
+
+}

Review Comment:
   nit: missing empty newline



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] big-r81 commented on a diff in pull request #4291: Import nouveau

Posted by "big-r81 (via GitHub)" <gi...@apache.org>.
big-r81 commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1129235690


##########
java/nouveau/lucene9/src/main/java/org/apache/couchdb/nouveau/lucene9/resources/IndexResource.java:
##########
@@ -0,0 +1,143 @@
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package org.apache.couchdb.nouveau.lucene9.resources;
+
+import java.io.IOException;
+import java.util.Map;
+
+import javax.validation.Valid;
+import javax.validation.constraints.NotNull;
+import javax.ws.rs.Consumes;
+import javax.ws.rs.DELETE;
+import javax.ws.rs.GET;
+import javax.ws.rs.POST;
+import javax.ws.rs.PUT;
+import javax.ws.rs.Path;
+import javax.ws.rs.PathParam;
+import javax.ws.rs.Produces;
+import javax.ws.rs.core.MediaType;
+
+import org.apache.couchdb.nouveau.api.DocumentDeleteRequest;
+import org.apache.couchdb.nouveau.api.DocumentUpdateRequest;
+import org.apache.couchdb.nouveau.api.IndexDefinition;
+import org.apache.couchdb.nouveau.api.IndexInfo;
+import org.apache.couchdb.nouveau.api.SearchRequest;
+import org.apache.couchdb.nouveau.api.SearchResults;
+import org.apache.couchdb.nouveau.core.IndexLoader;
+import org.apache.couchdb.nouveau.core.IndexManager;
+import org.apache.couchdb.nouveau.lucene9.core.Lucene9AnalyzerFactory;
+import org.apache.couchdb.nouveau.lucene9.core.Lucene9Index;
+import org.apache.couchdb.nouveau.resources.BaseIndexResource;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.misc.store.DirectIODirectory;
+import org.apache.lucene.search.SearcherFactory;
+import org.apache.lucene.search.SearcherManager;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+
+import com.codahale.metrics.annotation.ExceptionMetered;
+import com.codahale.metrics.annotation.Metered;
+import com.codahale.metrics.annotation.ResponseMetered;;
+
+@Path("/9/index/{name}")
+@Metered
+@ResponseMetered
+@ExceptionMetered(cause = IOException.class)
+@Consumes(MediaType.APPLICATION_JSON)
+@Produces(MediaType.APPLICATION_JSON)
+public class IndexResource extends BaseIndexResource<IndexableField> {
+
+    private final SearcherFactory searcherFactory;
+
+    public IndexResource(final IndexManager indexManager, final SearcherFactory searcherFactory) {
+        super(indexManager);
+        this.searcherFactory = searcherFactory;
+    }
+
+    @PUT
+    @Override
+    public void createIndex(@PathParam("name") String name, @NotNull @Valid IndexDefinition indexDefinition)
+            throws IOException {
+        super.createIndex(name, indexDefinition);
+    }
+
+    @DELETE
+    @Path("/doc/{docId}")
+    @Override
+    public void deleteDoc(@PathParam("name") String name, @PathParam("docId") String docId,
+            @NotNull @Valid DocumentDeleteRequest request) throws Exception {
+        super.deleteDoc(name, docId, request);
+    }
+
+    @DELETE
+    @Override
+    public void deletePath(@PathParam("name") String path) throws IOException {
+        super.deletePath(path);
+    }
+
+    @GET
+    @Override
+    public IndexInfo indexInfo(@PathParam("name") String name) throws Exception {
+        return super.indexInfo(name);
+    }
+
+    @POST
+    @Path("/search")
+    @Override
+    public SearchResults<IndexableField> searchIndex(@PathParam("name") String name,
+            @NotNull @Valid SearchRequest request)
+            throws Exception {
+        return super.searchIndex(name, request);
+    }
+
+    @PUT
+    @Path("/doc/{docId}")
+    @Override
+    public void updateDoc(@PathParam("name") String name, @PathParam("docId") String docId,
+            @NotNull @Valid DocumentUpdateRequest<IndexableField> request)
+            throws Exception {
+        super.updateDoc(name, docId, request);
+    }
+
+    @Override
+    protected IndexLoader<IndexableField> indexLoader() {
+        return (path, indexDefinition) -> {
+            final Analyzer analyzer = Lucene9AnalyzerFactory.fromDefinition(indexDefinition);
+            final Directory dir = new DirectIODirectory(FSDirectory.open(path));
+            final IndexWriterConfig config = new IndexWriterConfig(analyzer);
+            config.setUseCompoundFile(false);
+            final IndexWriter writer = new IndexWriter(dir, config);
+            final long updateSeq = getUpdateSeq(writer);
+            final SearcherManager searcherManager = new SearcherManager(writer, searcherFactory);
+            return new Lucene9Index(analyzer, writer, updateSeq, searcherManager);
+        };
+    }
+
+    private static long getUpdateSeq(final IndexWriter writer) throws IOException {
+        final Iterable<Map.Entry<String, String>> commitData = writer.getLiveCommitData();
+        if (commitData == null) {
+            return 0L;
+        }
+        for (Map.Entry<String, String> entry : commitData) {
+            if (entry.getKey().equals("update_seq")) {
+                return Long.parseLong(entry.getValue());
+            }
+        }
+        return 0L;
+    }
+
+}

Review Comment:
   nit: missing empty newline



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] big-r81 commented on a diff in pull request #4291: Import nouveau

Posted by "big-r81 (via GitHub)" <gi...@apache.org>.
big-r81 commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1129235190


##########
java/nouveau/lucene9/src/main/java/org/apache/couchdb/nouveau/lucene9/core/QueryDeserializer.java:
##########
@@ -0,0 +1,121 @@
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package org.apache.couchdb.nouveau.lucene9.core;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.FuzzyQuery;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.PrefixQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.RegexpQuery;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.WildcardQuery;
+
+import com.fasterxml.jackson.core.JsonParseException;
+import com.fasterxml.jackson.core.JsonParser;
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.DeserializationContext;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.deser.std.StdDeserializer;
+
+public class QueryDeserializer extends StdDeserializer<Query> {
+
+    public QueryDeserializer() {
+        this(null);
+    }
+
+    public QueryDeserializer(Class<?> vc) {
+        super(vc);
+    }
+
+    @Override
+    public Query deserialize(final JsonParser parser, final DeserializationContext context)
+            throws IOException, JsonProcessingException {
+        return deserializeNode(parser, context, parser.getCodec().readTree(parser));
+    }
+
+    private Query deserializeNode(final JsonParser parser, final DeserializationContext context, final JsonNode node)
+            throws IOException, JsonProcessingException {
+        final String type = node.get("@type").asText();
+        switch (type) {
+            case "term": {
+                final String field = node.get("field").asText();
+                final String text = node.get("text").asText();
+                return new TermQuery(new Term(field, text));
+            }
+            case "boolean": {
+                if (!node.get("clauses").isArray()) {
+                    throw new JsonParseException(parser, "boolean clauses must be an array");
+                }
+                final BooleanQuery.Builder builder = new BooleanQuery.Builder();
+                final Iterator<JsonNode> it = node.get("clauses").elements();
+                while (it.hasNext()) {
+                    final Query q = deserializeNode(parser, context, it.next());
+                    builder.add(q, null);
+                }
+                return builder.build();
+            }
+            case "wildcard": {
+                final String field = node.get("field").asText();
+                final String text = node.get("text").asText();
+                return new WildcardQuery(new Term(field, text));
+            }
+            case "phrase": {
+                final String field = node.get("field").asText();
+                if (!node.get("terms").isArray()) {
+                    throw new JsonParseException(parser, "phrase terms must be an array");
+                }
+                final PhraseQuery.Builder builder = new PhraseQuery.Builder();
+                final Iterator<JsonNode> it = node.get("terms").elements();
+                while (it.hasNext()) {
+                    builder.add(new Term(field, it.next().asText()));
+                }
+                builder.setSlop(node.get("slop").asInt());
+                return builder.build();
+            }
+            case "prefix": {
+                final String field = node.get("field").asText();
+                final String text = node.get("text").asText();
+                return new PrefixQuery(new Term(field, text));
+            }
+            case "fuzzy": {
+                final String field = node.get("field").asText();
+                final String text = node.get("text").asText();
+                final int maxEdits = node.get("max_edits").asInt();
+                final int prefixLength = node.get("prefix_length").asInt();
+                return new FuzzyQuery(new Term(field, text), maxEdits, prefixLength);
+            }
+            case "regexp": {
+                final String field = node.get("field").asText();
+                final String text = node.get("text").asText();
+                return new RegexpQuery(new Term(field, text));
+            }
+            case "term_range": {
+
+            }
+            case "point_range": {
+
+            }
+            case "match_all":
+            return new MatchAllDocsQuery();
+        }
+        throw new JsonParseException(parser, type + " not a supported query type");
+    }
+
+}

Review Comment:
   nit: missing empty newline



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] nickva commented on a diff in pull request #4291: Import nouveau

Posted by "nickva (via GitHub)" <gi...@apache.org>.
nickva commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1170793651


##########
nouveau/src/main/java/org/apache/couchdb/nouveau/api/DoubleRange.java:
##########
@@ -0,0 +1,31 @@
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package org.apache.couchdb.nouveau.api;
+
+import com.fasterxml.jackson.databind.PropertyNamingStrategies;
+import com.fasterxml.jackson.databind.annotation.JsonNaming;
+
+
+

Review Comment:
   Extra two empty lines?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] rnewson commented on a diff in pull request #4291: Import nouveau

Posted by "rnewson (via GitHub)" <gi...@apache.org>.
rnewson commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1173551270


##########
src/nouveau/src/nouveau_bookmark.erl:
##########
@@ -0,0 +1,68 @@
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+
+%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*-
+
+-module(nouveau_bookmark).
+
+-include_lib("mem3/include/mem3.hrl").
+
+-export([new/0, update/3, unpack/2, pack/1, to_ejson/1]).
+
+new() ->
+    #{}.
+
+%% Form a bookmark from the last contribution from each shard range
+update(DbName, PreviousBookmark, SearchResults) when is_binary(PreviousBookmark) ->
+    update(DbName, unpack(DbName, PreviousBookmark), SearchResults);
+update(DbName, {EJson}, SearchResults) when is_list(EJson) ->
+    update(DbName, from_ejson({EJson}), SearchResults);
+update(DbName, PreviousBookmark, SearchResults) when is_map(PreviousBookmark) ->
+    Hits = maps:get(<<"hits">>, SearchResults),
+    NewBookmark0 = lists:foldl(
+        fun(Hit, Acc) ->
+            maps:put(range_of(DbName, maps:get(<<"id">>, Hit)), maps:get(<<"order">>, Hit), Acc)

Review Comment:
   sure.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] rnewson commented on pull request #4291: Import nouveau

Posted by "rnewson (via GitHub)" <gi...@apache.org>.
rnewson commented on PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#issuecomment-1518664205

   leaving branch alive as I squashed the history when I merged. some of the commits might be useful for understanding nascent project history.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] big-r81 commented on a diff in pull request #4291: Import nouveau

Posted by "big-r81 (via GitHub)" <gi...@apache.org>.
big-r81 commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1129229264


##########
java/nouveau/base/src/main/java/org/apache/couchdb/nouveau/resources/BaseIndexResource.java:
##########
@@ -0,0 +1,96 @@
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package org.apache.couchdb.nouveau.resources;
+
+import java.io.IOException;
+
+import javax.validation.Valid;
+import javax.validation.constraints.NotNull;
+
+import org.apache.couchdb.nouveau.api.DocumentDeleteRequest;
+import org.apache.couchdb.nouveau.api.DocumentUpdateRequest;
+import org.apache.couchdb.nouveau.api.IndexDefinition;
+import org.apache.couchdb.nouveau.api.IndexInfo;
+import org.apache.couchdb.nouveau.api.SearchRequest;
+import org.apache.couchdb.nouveau.api.SearchResults;
+import org.apache.couchdb.nouveau.core.Index;
+import org.apache.couchdb.nouveau.core.IndexLoader;
+import org.apache.couchdb.nouveau.core.IndexManager;
+
+public abstract class BaseIndexResource<T> {
+
+    protected final IndexManager indexManager;
+
+    protected BaseIndexResource(final IndexManager indexManager) {
+        this.indexManager = indexManager;
+    }
+
+    @SuppressWarnings("unchecked")
+    public IndexInfo indexInfo(String name)
+            throws Exception {
+        final Index<T> index = indexManager.acquire(name, indexLoader());
+        try {
+            return index.info();
+        } finally {
+            indexManager.release(name, index);
+        }
+    }
+
+    public void deletePath(String path) throws IOException {
+        indexManager.deleteAll(path);
+    }
+
+    public void createIndex(String name, @NotNull @Valid IndexDefinition indexDefinition)
+            throws IOException {
+        indexManager.create(name, indexDefinition);
+    }
+
+    @SuppressWarnings("unchecked")
+    public void deleteDoc(String name, String docId,
+            @NotNull @Valid final DocumentDeleteRequest request)
+            throws Exception {
+        final Index<T> index = indexManager.acquire(name, indexLoader());
+        try {
+            index.delete(docId, request);
+        } finally {
+            indexManager.release(name, index);
+        }
+    }
+
+    @SuppressWarnings("unchecked")
+    public void updateDoc(String name, String docId,
+            @NotNull @Valid final DocumentUpdateRequest<T> request)
+            throws Exception {
+        final Index<T> index = indexManager.acquire(name, indexLoader());
+        try {
+            index.update(docId, request);
+        } finally {
+            indexManager.release(name, index);
+        }
+    }
+
+    @SuppressWarnings("unchecked")
+    public SearchResults<T> searchIndex(String name, @NotNull @Valid SearchRequest request)
+            throws Exception {
+        final Index<T> index = indexManager.acquire(name, indexLoader());
+        try {
+            return index.search(request);
+        } finally {
+            indexManager.release(name, index);
+        }
+    }
+
+    protected abstract IndexLoader<T> indexLoader();
+
+}

Review Comment:
   nit: missing empty newline



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] big-r81 commented on a diff in pull request #4291: Import nouveau

Posted by "big-r81 (via GitHub)" <gi...@apache.org>.
big-r81 commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1129224254


##########
java/nouveau/base/src/main/java/org/apache/couchdb/nouveau/api/After.java:
##########
@@ -0,0 +1,59 @@
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package org.apache.couchdb.nouveau.api;
+
+import org.apache.couchdb.nouveau.core.ser.AfterDeserializer;
+import org.apache.couchdb.nouveau.core.ser.AfterSerializer;
+
+import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
+import com.fasterxml.jackson.databind.annotation.JsonSerialize;
+
+import io.dropwizard.jackson.JsonSnakeCase;
+
+@JsonSnakeCase
+@JsonSerialize(using = AfterSerializer.class)
+@JsonDeserialize(using = AfterDeserializer.class)
+public class After {
+
+    private Object[] fields;
+
+    public After() {
+    }
+
+    public After(final Object... fields) {
+        this.fields = fields;
+    }
+
+    public Object[] getFields() {
+        return fields;
+    }
+
+    public void setFields(Object[] fields) {
+        this.fields = fields;
+    }
+
+    @Override
+    public String toString() {
+        final StringBuilder result = new StringBuilder();
+        result.append("After [fields=");

Review Comment:
   Missing closing-bracket "]"?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] nickva commented on pull request #4291: Import nouveau

Posted by "nickva (via GitHub)" <gi...@apache.org>.
nickva commented on PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#issuecomment-1486220073

   > * Nouveau endpoints return 404 if disabled and no attempt is made by the nouveau erlang app to contact the JVM side for background indexing.
   > * update=true|false is supported but stable=true|false is not. nouveau always sorts by _id as the tie-breaker so the instability that clouseau exhibits is much reduced.
   > * mango integration is done.
   > * indexes are deleted if nouveau is up when the database is deleted (same as dreyfus/clouseau)
   > * nouveau indexes are independent, same as dreyfus
   
   The idea is to have some basic coverage for those cases and others mentioned. If mango integration breaks, we'd want to find out, same for `update=false` etc. Indexes are independent but it's a good idea to actually test more than one at a time. We had a recent mishap with replication jobs unintentionally become singletons.
   
   > * I've not yet written a new _search_cleanup endpoint but the java side supports it (and is regularly exercised)
   > * include_docs=true not yet implemented (the README says so)
   
   I saw the README, but wasn't sure it was current with the latest update as it doesn't mention Lucene 4 at all.  No worries about cleanup and include_docs, it can implemented later.
   
   > * not sure what you mean specifically by "Collation (sort order) checks."
   
   Is there any support for altering result sort order. Going by README it mentioned both sorting and facets.
    For collation support, it would be checking if unicode data can be indexed and returned and if there is some normalization (NFD/NFC) or a way to set a locale. It's fine if there isn't, but users will at some point pass non-ASCII encoded data so it would be good to assert what happens in that case.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] rnewson commented on pull request #4291: Import nouveau

Posted by GitBox <gi...@apache.org>.
rnewson commented on PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#issuecomment-1341144819

   @big-r81 they worked in the original repo when they are at the root. good call, though. we either need to adjust them so they work here or remove them. haven't thought much about it.
   
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] janl commented on pull request #4291: Import nouveau

Posted by GitBox <gi...@apache.org>.
janl commented on PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#issuecomment-1339202375

   @rnewson this is great, thanks a lot:
   
   for sake of easier consumption, it might be useful if you could pull out examples for what index definition and querying looks like in Search2 and Nouveau, so we can compare those easily (both JS and Mango).
   
   I think I will prefer the version field (absence means Search2) variant, but I’m not 100% if this maps cleanly to the same _search /_find URL endpoints.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] github-code-scanning[bot] commented on a diff in pull request #4291: Import nouveau

Posted by GitBox <gi...@apache.org>.
github-code-scanning[bot] commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1082646943


##########
java/nouveau/server/src/main/java/org/apache/couchdb/nouveau/core/IndexManager.java:
##########
@@ -0,0 +1,268 @@
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package org.apache.couchdb.nouveau.core;
+
+import java.io.IOException;
+import java.nio.file.FileAlreadyExistsException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.time.Duration;
+import java.util.stream.Stream;
+
+import javax.validation.constraints.Min;
+import javax.validation.constraints.NotEmpty;
+import javax.validation.constraints.NotNull;
+import javax.ws.rs.WebApplicationException;
+import javax.ws.rs.core.Response.Status;
+
+import org.apache.couchdb.nouveau.api.IndexDefinition;
+import org.apache.lucene.util.IOUtils;
+import org.checkerframework.checker.nullness.qual.NonNull;
+import org.checkerframework.checker.nullness.qual.Nullable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.codahale.metrics.MetricRegistry;
+import com.codahale.metrics.caffeine.MetricsStatsCounter;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.github.benmanes.caffeine.cache.CacheLoader;
+import com.github.benmanes.caffeine.cache.Caffeine;
+import com.github.benmanes.caffeine.cache.LoadingCache;
+import com.github.benmanes.caffeine.cache.RemovalCause;
+import com.github.benmanes.caffeine.cache.RemovalListener;
+import com.github.benmanes.caffeine.cache.Scheduler;
+
+import io.dropwizard.lifecycle.Managed;
+
+public final class IndexManager implements Managed {
+
+    private static final Logger LOGGER = LoggerFactory.getLogger(IndexManager.class);
+
+    private class IndexLoader implements CacheLoader<String, Index> {
+
+        @Override
+        public @Nullable Index load(@NonNull String name) throws Exception {
+            final Path path = indexPath(name);
+            final IndexDefinition indexDefinition = objectMapper.readValue(indexDefinitionPath(name).toFile(),
+                    IndexDefinition.class);
+            return indexFactory.open(path, indexDefinition);
+        }
+
+        @Override
+        public @Nullable Index reload(@NonNull String name, @NonNull Index index) throws Exception {
+            if (index.commit()) {
+                LOGGER.info("{} committed.", name);
+            }
+            return index;
+        }
+
+    }
+
+    private class IndexRemovalListener implements RemovalListener<String, Index> {
+
+        public void onRemoval(String name, Index index, RemovalCause cause) {
+            try {
+                if (index.isOpen()) {
+                    LOGGER.info("{} closing.", name);
+                    index.close();
+                    if (index.isDeleteOnClose()) {
+                        IOUtils.rm(indexRootPath(name));
+                    }
+                }
+            } catch (final IOException e) {
+                LOGGER.error(index + " threw exception when closing", e);
+            }
+        }
+    }
+
+    @Min(1)
+    private int maxIndexesOpen;
+
+    @Min(1)
+    private int commitIntervalSeconds;
+
+    @Min(1)
+    private int idleSeconds;
+
+    @NotEmpty
+    private Path rootDir;
+
+    @NotNull
+    private AnalyzerFactory analyzerFactory;
+
+    @NotNull
+    private ObjectMapper objectMapper;
+
+    private IndexFactory indexFactory;
+
+    private MetricRegistry metricRegistry;
+
+    private LoadingCache<String, Index> cache;
+
+    public Index acquire(final String name) throws IOException {
+        if (!exists(name)) {
+            throw new WebApplicationException("Index does not exist", Status.NOT_FOUND);
+        }
+        return cache.get(name);
+    }
+
+    public void invalidate(final String name) {
+        cache.invalidate(name);
+    }
+
+    public void create(final String name, IndexDefinition indexDefinition) throws IOException {
+        if (exists(name)) {
+            throw new WebApplicationException("Index already exists", Status.EXPECTATION_FAILED);
+        }
+        // Validate index definiton
+        analyzerFactory.fromDefinition(indexDefinition);
+
+        // Persist definition
+        final Path path = indexDefinitionPath(name);
+        if (Files.exists(path)) {
+            throw new FileAlreadyExistsException(name + " already exists");
+        }
+        Files.createDirectories(path.getParent());
+        objectMapper.writeValue(path.toFile(), indexDefinition);
+    }
+
+    public boolean exists(final String name) {
+        return Files.exists(indexDefinitionPath(name));
+    }
+
+    public void deleteAll(final String path) throws IOException {
+        final Path rootPath = indexRootPath(path);
+        if (!rootPath.toFile().exists()) {
+            return;
+        }
+        Stream<Path> stream = Files.find(rootPath, 100,
+                (p, attr) -> attr.isDirectory() && isIndex(p));
+        try {
+            stream.forEach((p) -> {
+                try {
+                    deleteIndex(rootDir.relativize(p).toString());
+                } catch (Exception e) {
+                    LOGGER.error("I/O exception deleting " + p, e);
+                }
+            });
+        } finally {
+            stream.close();
+        }
+    }
+
+    private void deleteIndex(final String name) throws IOException {
+        final Index index = cache.getIfPresent(name);
+        if (index == null) {
+            IOUtils.rm(indexRootPath(name));
+        } else {
+            index.setDeleteOnClose(true);
+            cache.invalidate(name);
+        }
+    }
+
+    @JsonProperty
+    public int getMaxIndexesOpen() {
+        return maxIndexesOpen;
+    }
+
+    public void setMaxIndexesOpen(int maxIndexesOpen) {
+        this.maxIndexesOpen = maxIndexesOpen;
+    }
+
+    @JsonProperty
+    public int getCommitIntervalSeconds() {
+        return commitIntervalSeconds;
+    }
+
+    public void setCommitIntervalSeconds(int commitIntervalSeconds) {
+        this.commitIntervalSeconds = commitIntervalSeconds;
+    }
+
+    @JsonProperty
+    public int getIdleSeconds() {
+        return idleSeconds;
+    }
+
+    public void setIdleSeconds(int idleSeconds) {
+        this.idleSeconds = idleSeconds;
+    }
+
+    @JsonProperty
+    public Path getRootDir() {
+        return rootDir;
+    }
+
+    public void setRootDir(Path rootDir) {
+        this.rootDir = rootDir;
+    }
+
+    public void setAnalyzerFactory(final AnalyzerFactory analyzerFactory) {
+        this.analyzerFactory = analyzerFactory;
+    }
+
+    public void setObjectMapper(final ObjectMapper objectMapper) {
+        this.objectMapper = objectMapper;
+    }
+
+    public void setIndexFactory(final IndexFactory indexFactory) {
+        this.indexFactory = indexFactory;
+    }
+
+    public void setMetricRegistry(final MetricRegistry metricRegistry) {
+        this.metricRegistry = metricRegistry;
+    }
+
+    @Override
+    public void start() throws IOException {
+        final IndexRemovalListener indexRemovalListener = new IndexRemovalListener();
+        cache = Caffeine.newBuilder()
+                .recordStats(() -> new MetricsStatsCounter(metricRegistry, "IndexManager"))
+                .initialCapacity(maxIndexesOpen)
+                .maximumSize(maxIndexesOpen)
+                .expireAfterAccess(Duration.ofSeconds(idleSeconds))
+                .expireAfterWrite(Duration.ofSeconds(idleSeconds))
+                .refreshAfterWrite(Duration.ofSeconds(commitIntervalSeconds))
+                .scheduler(Scheduler.systemScheduler())
+                .removalListener(indexRemovalListener)
+                .build(new IndexLoader());
+    }
+
+    @Override
+    public void stop() {
+        cache.invalidateAll();
+    }
+
+    private boolean isIndex(final Path path) {
+        return path.resolve("index_definition.json").toFile().exists();
+    }
+
+    private Path indexDefinitionPath(final String name) {
+        return indexRootPath(name).resolve("index_definition.json");
+    }
+
+    private Path indexPath(final String name) {
+        return indexRootPath(name).resolve("index");
+    }
+
+    private Path indexRootPath(final String name) {
+        final Path result = rootDir.resolve(name).normalize();

Review Comment:
   ## Uncontrolled data used in path expression
   
   This path depends on a [user-provided value](1).
   This path depends on a [user-provided value](2).
   This path depends on a [user-provided value](3).
   This path depends on a [user-provided value](4).
   This path depends on a [user-provided value](5).
   This path depends on a [user-provided value](6).
   
   [Show more details](https://github.com/apache/couchdb/security/code-scanning/2)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] big-r81 commented on a diff in pull request #4291: Import nouveau

Posted by "big-r81 (via GitHub)" <gi...@apache.org>.
big-r81 commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1117284663


##########
java/nouveau/server/TODO:
##########
@@ -0,0 +1,27 @@
+targeted dreyfus feature parity
+
+* pagination (bookmark I guess)
+* grouping
+* faceting
+* partitioned db support
+* ken integration
+* delete indexes on db deletion
+
+not targeted
+
+* stale=ok
+* highlighting
+* drilldown
+
+After reaching dreyfus parity, nouveau will diverge;
+
+* no javascript eval
+* ddoc will require an index schema, mapping couchdb document fields to lucene fields
+* spatial-extras and spatial3d support
+
+interim ideas
+
+* append type to field name, so `index("foo", 12.0")` becomes `new DoublePoint("foo<number>", 12.0)`
+* set a special Map to setPointsConfigMap() which examines that suffix and returns a PointsConfig for <number>
+
+* in nouveau branch of couchdb, remove dreyfus entirely and put nouveau at _search_analyze, _search, 

Review Comment:
   nit: missing \n @ eof



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] github-code-scanning[bot] commented on a diff in pull request #4291: Import nouveau

Posted by GitBox <gi...@apache.org>.
github-code-scanning[bot] commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1042739161


##########
java/nouveau/src/main/java/org/apache/couchdb/nouveau/core/IndexManager.java:
##########
@@ -0,0 +1,470 @@
+// Copyright 2022 Robert Newson
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package org.apache.couchdb.nouveau.core;
+
+import java.io.IOException;
+import java.nio.file.FileAlreadyExistsException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.time.Duration;
+import java.util.Collections;
+import java.util.Map;
+import java.util.concurrent.CompletionException;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+import java.util.stream.Stream;
+
+import javax.validation.constraints.Min;
+import javax.validation.constraints.NotEmpty;
+import javax.validation.constraints.NotNull;
+import javax.ws.rs.WebApplicationException;
+import javax.ws.rs.core.Response.Status;
+
+import org.apache.couchdb.nouveau.api.IndexDefinition;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.github.benmanes.caffeine.cache.CacheLoader;
+import com.github.benmanes.caffeine.cache.Caffeine;
+import com.github.benmanes.caffeine.cache.LoadingCache;
+import com.github.benmanes.caffeine.cache.RemovalCause;
+import com.github.benmanes.caffeine.cache.RemovalListener;
+import com.github.benmanes.caffeine.cache.Scheduler;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.misc.store.DirectIODirectory;
+import org.apache.lucene.search.SearcherFactory;
+import org.apache.lucene.search.SearcherManager;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.LockObtainFailedException;
+import org.apache.lucene.util.IOUtils;
+import org.checkerframework.checker.nullness.qual.NonNull;
+import org.checkerframework.checker.nullness.qual.Nullable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import io.dropwizard.lifecycle.Managed;
+
+public class IndexManager implements Managed {
+
+    private static final int RETRY_LIMIT = 500;
+    private static final int RETRY_SLEEP_MS = 5;
+    private static final Logger LOGGER = LoggerFactory.getLogger(IndexManager.class);
+
+    public class Index {
+        private static final String DEFAULT_FIELD = "default";
+        private final String name;
+        private IndexWriter writer;
+        private SearcherManager searcherManager;
+        private Analyzer analyzer;
+        private final AtomicBoolean deleteOnClose = new AtomicBoolean();
+        private final AtomicLong updateSeq = new AtomicLong();
+
+        // The write lock is to ensure there are no readers/searchers when
+        // we want to close the index.
+        private ReentrantReadWriteLock rwl = new ReentrantReadWriteLock();
+        private Lock rl = rwl.readLock();
+        private Lock wl = rwl.writeLock();
+
+        private Index(
+                      String name,
+                      IndexWriter writer,
+                      SearcherManager searcherManager,
+                      Analyzer analyzer,
+                      long updateSeq) {
+            this.name = name;
+            this.writer = writer;
+            this.searcherManager = searcherManager;
+            this.analyzer = analyzer;
+            this.updateSeq.set(updateSeq);
+        }
+
+        public String getName() {
+            return name;
+        }
+
+        public IndexWriter getWriter() {
+            return writer;
+        }
+
+        public SearcherManager getSearcherManager() {
+            return searcherManager;
+        }
+
+        public QueryParser getQueryParser() {
+            return new NouveauQueryParser(DEFAULT_FIELD, analyzer);
+        }
+
+        public boolean commit() throws IOException {
+            rl.lock();
+            try {
+                writer.setLiveCommitData(generateCommitData().entrySet());
+                return writer.commit() != -1;
+            } finally {
+                rl.unlock();
+            }
+        }
+
+        public long getUpdateSeq() throws IOException {
+            return updateSeq.get();
+        }
+
+        public void incrementUpdateSeq(final long updateSeq) throws IOException {
+            final long newSeq = this.updateSeq.accumulateAndGet(updateSeq, (a, b) -> Math.max(a, b));
+            if (newSeq != updateSeq) {
+                throw new UpdatesOutOfOrderException();
+            }
+        }
+
+        public void close() throws IOException {
+            wl.lock();
+            try {
+                if (writer == null) {
+                    // Already closed.
+                    return;
+                }
+
+                // Close searcher manager
+                if (searcherManager != null) {
+                    try {
+                        searcherManager.close();
+                    } catch (IOException e) {
+                        LOGGER.info(this + " threw exception when closing searcherManager.", e);
+                    } finally {
+                        searcherManager = null;
+                    }
+                }
+
+                if (deleteOnClose.get()) {
+                    try {
+                        // No need to commit in this case.
+                        writer.rollback();
+                    } catch (IOException e) {
+                        LOGGER.info(this + " threw exception when rolling back writer.", e);
+                    } finally {
+                        writer = null;
+                    }
+                    IOUtils.rm(indexRootPath(name));
+                } else {
+                    try {
+                        writer.setLiveCommitData(generateCommitData().entrySet());
+                        writer.close();
+                        LOGGER.info("{} closed.", this);
+                    } finally {
+                        writer = null;
+                    }
+                }
+            } finally {
+                wl.unlock();
+            }
+        }
+
+        private Map<String, String> generateCommitData() {
+            return Collections.singletonMap("update_seq", Long.toString(updateSeq.get()));
+        }
+
+        @Override
+        public String toString() {
+            return "Index [name=" + name + "]";
+        }
+    }
+
+    private class IndexLoader implements CacheLoader<String, Index> {
+
+        @Override
+        public @Nullable Index load(@NonNull String name) throws Exception {
+            return openExistingIndex(name);
+        }
+
+        @Override
+        public @Nullable Index reload(@NonNull String name, @NonNull Index index) throws Exception {
+            try {
+                if (index.commit()) {
+                    LOGGER.info("{} committed.", index);
+                }
+            } catch (final IOException e) {
+                LOGGER.error(index + " threw exception when committing.", e);
+                index.close();
+                return openExistingIndex(name);
+            }
+            return index;
+        }
+
+    }
+
+    private static class IndexCloser implements RemovalListener<String, Index> {
+
+        public void onRemoval(String name, Index index, RemovalCause cause) {
+            try {
+                index.close();
+            } catch (IOException e) {
+                LOGGER.error(index + " threw exception when closing", e);
+            }
+        }
+    }
+
+    private static final IndexCloser INDEX_CLOSER = new IndexCloser();
+
+
+    @Min(1)
+    private int maxIndexesOpen;
+
+    @Min(1)
+    private int commitIntervalSeconds;
+
+    @Min(1)
+    private int idleSeconds;
+
+    @NotEmpty
+    private Path rootDir;
+
+    @NotNull
+    private AnalyzerFactory analyzerFactory;
+
+    @NotNull
+    private ObjectMapper objectMapper;
+
+    private SearcherFactory searcherFactory;
+
+    private LoadingCache<String, Index> cache;
+
+    public Index acquire(final String name) throws IOException {
+        for (int i = 0; i < RETRY_LIMIT; i++) {
+            final Index result = getFromCache(name);
+
+            // Check if we're in the middle of closing.
+            result.rl.lock();
+            if (result.writer != null) {
+                return result;
+            }
+            result.rl.unlock();
+
+            // Retry after a short sleep.
+            try {
+                Thread.sleep(RETRY_SLEEP_MS);
+            } catch (InterruptedException e) {
+                Thread.interrupted();
+                break;
+            }
+        }
+        throw new IOException("Failed to acquire " + name);
+    }
+
+    public void release(final Index index) throws IOException {
+        index.rl.unlock();
+    }
+
+    public void create(final String name, IndexDefinition indexDefinition) throws IOException {
+        createNewIndex(name, indexDefinition);
+    }
+
+    public void deleteAll(final String path) throws IOException {
+        final Path rootPath = indexRootPath(path);
+        if (!rootPath.toFile().exists()) {
+            return;
+        }
+        Stream<Path> stream = Files.find(rootPath, 100,
+            (p, attr) -> attr.isDirectory() && isIndex(p));
+        try {
+            stream.forEach((p) -> {
+                try {
+                    deleteIndex(rootDir.relativize(p).toString());
+                } catch (Exception e) {
+                    LOGGER.error("I/O exception deleting " + p, e);
+                }
+            });
+        } finally {
+            stream.close();
+        }
+    }
+
+    private void deleteIndex(final String name) throws IOException {
+        final Index index = acquire(name);
+        try {
+            index.deleteOnClose.set(true);
+            cache.invalidate(name);
+        } finally {
+            release(index);
+        }
+    }
+
+    @JsonProperty
+    public int getMaxIndexesOpen() {
+        return maxIndexesOpen;
+    }
+
+    public void setMaxIndexesOpen(int maxIndexesOpen) {
+        this.maxIndexesOpen = maxIndexesOpen;
+    }
+
+    @JsonProperty
+    public int getCommitIntervalSeconds() {
+        return commitIntervalSeconds;
+    }
+
+    public void setCommitIntervalSeconds(int commitIntervalSeconds) {
+        this.commitIntervalSeconds = commitIntervalSeconds;
+    }
+
+    @JsonProperty
+    public int getIdleSeconds() {
+        return idleSeconds;
+    }
+
+    public void setIdleSeconds(int idleSeconds) {
+        this.idleSeconds = idleSeconds;
+    }
+
+    @JsonProperty
+    public Path getRootDir() {
+        return rootDir;
+    }
+
+    public void setRootDir(Path rootDir) {
+        this.rootDir = rootDir;
+    }
+
+    public void setAnalyzerFactory(final AnalyzerFactory analyzerFactory) {
+        this.analyzerFactory = analyzerFactory;
+    }
+
+    public void setObjectMapper(final ObjectMapper objectMapper) {
+        this.objectMapper = objectMapper;
+    }
+
+    public void setSearcherFactory(final SearcherFactory searcherFactory) {
+        this.searcherFactory = searcherFactory;
+    }
+
+    @Override
+    public void start() throws IOException {
+        cache = Caffeine.newBuilder()
+            .initialCapacity(maxIndexesOpen)
+            .maximumSize(maxIndexesOpen)
+            .expireAfterAccess(Duration.ofSeconds(idleSeconds))
+            .expireAfterWrite(Duration.ofSeconds(idleSeconds))
+            .refreshAfterWrite(Duration.ofSeconds(commitIntervalSeconds))
+            .scheduler(Scheduler.systemScheduler())
+            .removalListener(INDEX_CLOSER)
+            .evictionListener(INDEX_CLOSER)
+            .build(new IndexLoader());
+    }
+
+    @Override
+    public void stop() {
+        cache.invalidateAll();
+    }
+
+    private Index getFromCache(final String name) throws IOException {
+        try {
+            return cache.get(name);
+        } catch (CompletionException e) {
+            if (e.getCause() instanceof IOException) {
+                throw (IOException) e.getCause();
+            }
+            throw e;
+        }
+    }
+
+    private void createNewIndex(final String name, final IndexDefinition indexDefinition) throws IOException {
+        // Validate index definiton
+        analyzerFactory.fromDefinition(indexDefinition);
+
+        // Persist definition
+        final Path path = indexDefinitionPath(name);
+        if (Files.exists(path)) {
+            throw new FileAlreadyExistsException(name + " already exists");
+        }
+        Files.createDirectories(path.getParent());
+        objectMapper.writeValue(path.toFile(), indexDefinition);
+    }
+
+    private Index openExistingIndex(final String name) throws IOException {
+        final IndexDefinition indexDefinition = objectMapper.readValue(indexDefinitionPath(name).toFile(), IndexDefinition.class);
+        final Analyzer analyzer =  analyzerFactory.fromDefinition(indexDefinition);
+        final Path path = indexPath(name);
+        final Directory dir = directory(path);
+        final IndexWriter writer = newWriter(dir, analyzer);
+        final SearcherManager searcherManager = new SearcherManager(writer, searcherFactory);
+        final long updateSeq = getUpdateSeq(writer);
+        return new Index(name, writer, searcherManager, analyzer, updateSeq);
+    }
+
+    private long getUpdateSeq(final IndexWriter writer) throws IOException {
+        final Iterable<Map.Entry<String, String>> commitData = writer.getLiveCommitData();
+        if (commitData == null) {
+            return 0L;
+        }
+        for (Map.Entry<String, String> entry : commitData) {
+            if (entry.getKey().equals("update_seq")) {
+                return Long.parseLong(entry.getValue());
+            }
+        }
+        return 0L;
+    }
+
+    private IndexWriter newWriter(final Directory dir, final Analyzer analyzer) throws IOException {
+        LockObtainFailedException exceptionThrown = null;
+        for (int i = 0; i < RETRY_LIMIT; i++) {
+            try {
+                final IndexWriterConfig config = new IndexWriterConfig(analyzer);
+                config.setCommitOnClose(true);
+                config.setUseCompoundFile(false);
+                return new IndexWriter(dir, config);
+            } catch (LockObtainFailedException e) {
+                exceptionThrown = e;
+                try {
+                    Thread.sleep(RETRY_SLEEP_MS);
+                } catch (InterruptedException e1) {
+                    Thread.interrupted();
+                    break;
+                }
+            }
+        }
+        throw exceptionThrown;
+    }
+
+    private boolean isIndex(final Path path) {
+        return path.resolve("index_definition.json").toFile().exists();
+    }
+
+    private Path indexDefinitionPath(final String name) {
+        return indexRootPath(name).resolve("index_definition.json");
+    }
+
+    private Path indexPath(final String name) {
+        return indexRootPath(name).resolve("index");
+    }
+
+    private Path indexRootPath(final String name) {
+        final Path result = rootDir.resolve(name).normalize();

Review Comment:
   ## Uncontrolled data used in path expression
   
   This path depends on a [user-provided value](1).
   This path depends on a [user-provided value](2).
   
   [Show more details](https://github.com/apache/couchdb/security/code-scanning/1)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] janl commented on a diff in pull request #4291: Import nouveau

Posted by GitBox <gi...@apache.org>.
janl commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1040865522


##########
java/nouveau/pom.xml:
##########
@@ -0,0 +1,216 @@
+<!--
+    Copyright 2022 Cloudant. All rights reserved.

Review Comment:
   stray copyright notice



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] nickva commented on a diff in pull request #4291: Import nouveau

Posted by "nickva (via GitHub)" <gi...@apache.org>.
nickva commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1170804075


##########
nouveau/src/main/java/org/apache/couchdb/nouveau/core/IndexManager.java:
##########
@@ -0,0 +1,323 @@
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package org.apache.couchdb.nouveau.core;
+
+import static com.codahale.metrics.MetricRegistry.name;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.FileAlreadyExistsException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.time.Duration;
+import java.util.List;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Stream;
+
+import org.apache.couchdb.nouveau.api.IndexDefinition;
+import org.eclipse.jetty.io.RuntimeIOException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.codahale.metrics.MetricRegistry;
+import com.codahale.metrics.caffeine.MetricsStatsCounter;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.github.benmanes.caffeine.cache.Cache;
+import com.github.benmanes.caffeine.cache.Caffeine;
+import com.github.benmanes.caffeine.cache.RemovalCause;
+import com.github.benmanes.caffeine.cache.RemovalListener;
+import com.github.benmanes.caffeine.cache.Scheduler;
+
+import io.dropwizard.lifecycle.Managed;
+import jakarta.ws.rs.WebApplicationException;
+import jakarta.ws.rs.core.Response.Status;
+
+/**
+ * The central class of Nouveau, responsible for loading and unloading Lucene
+ * indexes and making them available for query.
+ */
+
+public final class IndexManager implements Managed {
+
+    @FunctionalInterface
+    public interface IndexFunction<V, R> {
+        R apply(final V value) throws IOException;
+    }
+
+    private static final Logger LOGGER = LoggerFactory.getLogger(IndexManager.class);
+
+    private int maxIndexesOpen;
+
+    private int commitIntervalSeconds;
+
+    private int idleSeconds;
+
+    private Path rootDir;
+
+    private ObjectMapper objectMapper;
+
+    private MetricRegistry metricRegistry;
+
+    private ScheduledExecutorService scheduler;
+
+    private Cache<String, Index> cache;
+
+    public <R> R with(final String name, final IndexLoader loader, final IndexFunction<Index, R> indexFun)
+            throws IOException, InterruptedException {
+        while (true) {
+            if (!exists(name)) {
+                throw new WebApplicationException("Index does not exist", Status.NOT_FOUND);
+            }
+
+            final Index index;
+            try {
+                index = cache.get(name, (n) -> {
+                    LOGGER.info("opening {}", n);
+                    final Path path = indexPath(n);
+                    try {
+                        final IndexDefinition indexDefinition = loadIndexDefinition(n);
+                        return loader.apply(path, indexDefinition);
+                    } catch (final IOException e) {
+                        throw new RuntimeIOException(e);
+                    }
+                });
+            } catch (final RuntimeIOException e) {
+                throw (IOException) e.getCause();
+            }
+
+            if (index.tryAcquire(1, TimeUnit.SECONDS)) {
+                try {
+                    final R result = indexFun.apply(index);
+                    if (index.needsCommit(commitIntervalSeconds, TimeUnit.SECONDS)) {
+                        scheduler.execute(() -> {
+                            if (index.tryAcquire()) {
+                                try {
+                                    LOGGER.info("committing {}", name);

Review Comment:
   Is there a `debug` level. Otherwise, would we expect to see a log entry for every index commit?
   
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] nickva commented on a diff in pull request #4291: Import nouveau

Posted by "nickva (via GitHub)" <gi...@apache.org>.
nickva commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1170864277


##########
src/nouveau/src/nouveau_httpd_handlers.erl:
##########
@@ -0,0 +1,35 @@
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+
+%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*-
+
+-module(nouveau_httpd_handlers).
+
+-export([url_handler/1, db_handler/1, design_handler/1]).
+
+url_handler(<<"_nouveau_analyze">>) ->
+    fun nouveau_httpd:handle_analyze_req/1;
+url_handler(_) ->
+    no_match.
+
+db_handler(<<"_nouveau_cleanup">>) ->
+    fun nouveau_httpd:handle_cleanup_req/2;
+db_handler(_) ->
+    no_match.
+
+design_handler(<<"_nouveau">>) ->
+    fun nouveau_httpd:handle_search_req/3;
+design_handler(<<"_nouveau_info">>) ->
+    fun nouveau_httpd:handle_info_req/3;

Review Comment:
   Would it be possible to have a basic integration test calling this API?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] nickva commented on a diff in pull request #4291: Import nouveau

Posted by "nickva (via GitHub)" <gi...@apache.org>.
nickva commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1170784316


##########
nouveau/README.md:
##########
@@ -0,0 +1,118 @@
+# nouveau
+
+Nouveau is a modern replacement for dreyfus/clouseau and is built on;
+
+1) the Dropwizard framework (https://dropwizard.io)
+2) Java 11+
+3) Lucene 9
+
+Nouveau transforms Apache CouchDB databases into Apache Lucene indexes at the shard level and then merges the results together.
+
+This work is currently EXPERIMENTAL and may change in ways that invalidate any existing Nouveau index.
+
+## What works?
+
+* you can define a default analyzer and different analyzers by field name.
+* sorting on text and numbers (and combinations of fields)
+* classic lucene query syntax
+* count and range facets
+* bookmark support for paginating efficiently through large results sets
+* indexes automatically deleted if database is deleted (as long as nouveau is running!)
+* integration with ken
+* integration with mango
+* integration with resharding
+* update=false
+* `_nouveau_info`
+* `_search_cleanup`
+* /openapi.{json.yaml}
+

Review Comment:
   Would basic geo queries work as well as described in https://blog.cloudant.com/2022/06/28/Simple-Geospatial-Queries.html?
   
   EDIT: I see it mentioned in TODO as slated for the future.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] nickva commented on a diff in pull request #4291: Import nouveau

Posted by "nickva (via GitHub)" <gi...@apache.org>.
nickva commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1170780372


##########
nouveau/README.md:
##########
@@ -0,0 +1,118 @@
+# nouveau
+
+Nouveau is a modern replacement for dreyfus/clouseau and is built on;
+
+1) the Dropwizard framework (https://dropwizard.io)
+2) Java 11+
+3) Lucene 9
+
+Nouveau transforms Apache CouchDB databases into Apache Lucene indexes at the shard level and then merges the results together.
+
+This work is currently EXPERIMENTAL and may change in ways that invalidate any existing Nouveau index.
+
+## What works?
+
+* you can define a default analyzer and different analyzers by field name.
+* sorting on text and numbers (and combinations of fields)
+* classic lucene query syntax
+* count and range facets
+* bookmark support for paginating efficiently through large results sets
+* indexes automatically deleted if database is deleted (as long as nouveau is running!)
+* integration with ken
+* integration with mango
+* integration with resharding
+* update=false
+* `_nouveau_info`
+* `_search_cleanup`
+* /openapi.{json.yaml}
+
+## What doesn't work yet?
+
+* results grouping
+* configurable stop words for analyzers
+* Makefile.win or Windows generally
+
+I don't intend to add grouping support, it seems barely used. Would accept a tidy contribution, though.
+
+## Why is this better than dreyfus/clouseau?
+
+* No scalang (or Scala!)
+* Supports any version of Java that Lucene 9 supports
+* memory-mapped I/O for performance (which works best on Java 19)
+* direct I/O used for segment merging (so we don't evict useful data from disk cache)
+
+## Getting started
+
+Configure CouchDB with `--enable-nouveau'
+
+Build Nouveau with;
+
+`make`
+
+Run Nouvea with;

Review Comment:
   `Nouvea` -> Nouveau`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] rnewson commented on a diff in pull request #4291: Import nouveau

Posted by "rnewson (via GitHub)" <gi...@apache.org>.
rnewson commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1173544103


##########
nouveau/src/main/java/org/apache/couchdb/nouveau/core/IndexManager.java:
##########
@@ -0,0 +1,323 @@
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package org.apache.couchdb.nouveau.core;
+
+import static com.codahale.metrics.MetricRegistry.name;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.FileAlreadyExistsException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.time.Duration;
+import java.util.List;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Stream;
+
+import org.apache.couchdb.nouveau.api.IndexDefinition;
+import org.eclipse.jetty.io.RuntimeIOException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.codahale.metrics.MetricRegistry;
+import com.codahale.metrics.caffeine.MetricsStatsCounter;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.github.benmanes.caffeine.cache.Cache;
+import com.github.benmanes.caffeine.cache.Caffeine;
+import com.github.benmanes.caffeine.cache.RemovalCause;
+import com.github.benmanes.caffeine.cache.RemovalListener;
+import com.github.benmanes.caffeine.cache.Scheduler;
+
+import io.dropwizard.lifecycle.Managed;
+import jakarta.ws.rs.WebApplicationException;
+import jakarta.ws.rs.core.Response.Status;
+
+/**
+ * The central class of Nouveau, responsible for loading and unloading Lucene
+ * indexes and making them available for query.
+ */
+
+public final class IndexManager implements Managed {
+
+    @FunctionalInterface
+    public interface IndexFunction<V, R> {
+        R apply(final V value) throws IOException;
+    }
+
+    private static final Logger LOGGER = LoggerFactory.getLogger(IndexManager.class);
+
+    private int maxIndexesOpen;
+
+    private int commitIntervalSeconds;
+
+    private int idleSeconds;
+
+    private Path rootDir;
+
+    private ObjectMapper objectMapper;
+
+    private MetricRegistry metricRegistry;
+
+    private ScheduledExecutorService scheduler;
+
+    private Cache<String, Index> cache;
+
+    public <R> R with(final String name, final IndexLoader loader, final IndexFunction<Index, R> indexFun)
+            throws IOException, InterruptedException {
+        while (true) {
+            if (!exists(name)) {
+                throw new WebApplicationException("Index does not exist", Status.NOT_FOUND);
+            }
+
+            final Index index;
+            try {
+                index = cache.get(name, (n) -> {
+                    LOGGER.info("opening {}", n);
+                    final Path path = indexPath(n);
+                    try {
+                        final IndexDefinition indexDefinition = loadIndexDefinition(n);
+                        return loader.apply(path, indexDefinition);
+                    } catch (final IOException e) {
+                        throw new RuntimeIOException(e);
+                    }
+                });
+            } catch (final RuntimeIOException e) {
+                throw (IOException) e.getCause();
+            }
+
+            if (index.tryAcquire(1, TimeUnit.SECONDS)) {
+                try {
+                    final R result = indexFun.apply(index);
+                    if (index.needsCommit(commitIntervalSeconds, TimeUnit.SECONDS)) {
+                        scheduler.execute(() -> {
+                            if (index.tryAcquire()) {
+                                try {
+                                    LOGGER.info("committing {}", name);

Review Comment:
   there is but I found it valuable to confirm that indexes are committed. reducing to debug is fine now though.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] rnewson commented on a diff in pull request #4291: Import nouveau

Posted by "rnewson (via GitHub)" <gi...@apache.org>.
rnewson commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1173543580


##########
nouveau/src/main/java/org/apache/couchdb/nouveau/api/Range.java:
##########
@@ -0,0 +1,145 @@
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package org.apache.couchdb.nouveau.api;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.databind.PropertyNamingStrategies;
+import com.fasterxml.jackson.databind.annotation.JsonNaming;
+
+
+import jakarta.validation.constraints.NotEmpty;
+import jakarta.validation.constraints.NotNull;
+
+@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class)
+public class Range<T> {
+
+    @NotEmpty
+    private String label;
+
+    @NotNull
+    private T min;
+
+    private boolean minInclusive = true;
+
+    @NotNull
+    private T max;
+
+    private boolean maxInclusive = true;
+
+    public Range() {
+    }
+
+    public Range(String label, T min, boolean minInclusive, T max, boolean maxInclusive) {
+        this.label = label;
+        this.min = min;
+        this.minInclusive = minInclusive;
+        this.max = max;
+        this.maxInclusive = maxInclusive;
+    }
+
+    @JsonProperty
+    public String getLabel() {
+        return label;
+    }
+
+    public void setLabel(String label) {
+        this.label = label;
+    }
+
+    @JsonProperty
+    public T getMin() {
+        return min;
+    }
+
+    public void setMin(T min) {
+        this.min = min;
+    }
+
+    @JsonProperty("min_inclusive")
+    public boolean isMinInclusive() {
+        return minInclusive;
+    }
+
+    public void setMinInclusive(boolean minInclusive) {
+        this.minInclusive = minInclusive;
+    }
+
+    @JsonProperty
+    public T getMax() {
+        return max;
+    }
+
+    public void setMax(T max) {
+        this.max = max;
+    }
+
+    @JsonProperty("max_inclusive")
+    public boolean isMaxInclusive() {
+        return maxInclusive;
+    }
+
+    public void setMaxInclusive(boolean maxInclusive) {
+        this.maxInclusive = maxInclusive;
+    }
+
+    @Override
+    public int hashCode() {
+        final int prime = 31;
+        int result = 1;
+        result = prime * result + ((label == null) ? 0 : label.hashCode());
+        result = prime * result + ((min == null) ? 0 : min.hashCode());
+        result = prime * result + (minInclusive ? 1231 : 1237);

Review Comment:
   auto-generated by an IDE (vscode).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] rnewson commented on pull request #4291: Import nouveau

Posted by "rnewson (via GitHub)" <gi...@apache.org>.
rnewson commented on PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#issuecomment-1517681072

   the 'expectation_failed' I think is because some tests run so quickly that the new dbname for the next test is the same. I suspect its a timestamp?


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] big-r81 commented on a diff in pull request #4291: Import nouveau

Posted by "big-r81 (via GitHub)" <gi...@apache.org>.
big-r81 commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1129227966


##########
java/nouveau/base/src/main/java/org/apache/couchdb/nouveau/core/Index.java:
##########
@@ -0,0 +1,145 @@
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package org.apache.couchdb.nouveau.core;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.couchdb.nouveau.api.DocumentDeleteRequest;
+import org.apache.couchdb.nouveau.api.DocumentUpdateRequest;
+import org.apache.couchdb.nouveau.api.IndexInfo;
+import org.apache.couchdb.nouveau.api.SearchRequest;
+import org.apache.couchdb.nouveau.api.SearchResults;
+
+public abstract class Index<T> implements Closeable {
+
+    private long updateSeq;
+
+    private boolean deleteOnClose = false;
+
+    private int refCount = 1;
+
+    private long lastUsed = now();
+
+    protected Index(final long updateSeq) {
+        this.updateSeq = updateSeq;
+    }
+
+    public final IndexInfo info() throws IOException {
+        updateLastUsed();
+        final int numDocs = doNumDocs();
+        return new IndexInfo(updateSeq, numDocs);
+    }
+
+    protected abstract int doNumDocs() throws IOException;
+
+    public final synchronized void update(final String docId, final DocumentUpdateRequest<T> request) throws IOException {
+        assertUpdateSeqIsLower(request.getSeq());
+        updateLastUsed();
+        doUpdate(docId, request);
+        incrementUpdateSeq(request.getSeq());
+    }
+
+    protected abstract void doUpdate(final String docId, final DocumentUpdateRequest<T> request) throws IOException;
+
+    public final synchronized void delete(final String docId, final DocumentDeleteRequest request) throws IOException {
+        assertUpdateSeqIsLower(request.getSeq());
+        updateLastUsed();
+        doDelete(docId, request);
+        incrementUpdateSeq(request.getSeq());
+    }
+
+    protected abstract void doDelete(final String docId, final DocumentDeleteRequest request) throws IOException;
+
+    public final SearchResults<T> search(final SearchRequest request) throws IOException {
+        updateLastUsed();
+        return doSearch(request);
+    }
+
+    protected abstract SearchResults<T> doSearch(final SearchRequest request) throws IOException;
+
+    public final boolean commit() throws IOException {
+        final long updateSeq;
+        synchronized (this) {
+            updateSeq = this.updateSeq;
+        }
+        return doCommit(updateSeq);
+    }
+
+    protected abstract boolean doCommit(final long updateSeq) throws IOException;
+
+    @Override
+    public final void close() throws IOException {
+        decRef();
+    }
+
+    protected abstract void doClose() throws IOException;
+
+    public abstract boolean isOpen();
+
+    public boolean isDeleteOnClose() {
+        return deleteOnClose;
+    }
+
+    public void setDeleteOnClose(final boolean deleteOnClose) {
+        this.deleteOnClose = deleteOnClose;
+    }
+
+    protected final void assertUpdateSeqIsLower(final long updateSeq) throws UpdatesOutOfOrderException {
+        if (!(updateSeq > this.updateSeq)) {
+            throw new UpdatesOutOfOrderException();
+        }
+    }
+
+    protected final void incrementUpdateSeq(final long updateSeq) throws IOException {
+        assertUpdateSeqIsLower(updateSeq);
+        this.updateSeq = updateSeq;
+    }
+
+    private synchronized void updateLastUsed() {
+        this.lastUsed = now();
+    }
+
+    public final synchronized void incRef() {
+        // zero is forever.
+        if (refCount > 0) {
+            refCount++;
+        }
+    }
+
+    public final synchronized int getRefCount() {
+        return refCount;
+    }
+
+    public final boolean decRef() throws IOException {
+        boolean close;
+        synchronized (this) {
+            close = --refCount == 0;
+        }
+        if (close) {
+            doClose();
+        }
+        return close;
+    }
+
+    public synchronized long secondsSinceLastUse() {
+        return TimeUnit.NANOSECONDS.toMillis(now() - lastUsed);
+    }
+
+    private long now() {
+        return System.nanoTime();
+    }
+
+}

Review Comment:
   nit: missing empty newline



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] rnewson commented on pull request #4291: Import nouveau

Posted by "rnewson (via GitHub)" <gi...@apache.org>.
rnewson commented on PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#issuecomment-1489996648

   on sort order, yes, you can sort on one or more fields (string or number) in ascending or descending order, the same as dreyfus. Unlike dreyfus, nouveau uses couch_ejson_compare when comparing UTF-8 strings.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] rnewson commented on a diff in pull request #4291: Import nouveau

Posted by "rnewson (via GitHub)" <gi...@apache.org>.
rnewson commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1173541158


##########
nouveau/README.md:
##########
@@ -0,0 +1,118 @@
+# nouveau
+
+Nouveau is a modern replacement for dreyfus/clouseau and is built on;
+
+1) the Dropwizard framework (https://dropwizard.io)
+2) Java 11+
+3) Lucene 9
+
+Nouveau transforms Apache CouchDB databases into Apache Lucene indexes at the shard level and then merges the results together.
+
+This work is currently EXPERIMENTAL and may change in ways that invalidate any existing Nouveau index.
+
+## What works?
+
+* you can define a default analyzer and different analyzers by field name.
+* sorting on text and numbers (and combinations of fields)
+* classic lucene query syntax
+* count and range facets
+* bookmark support for paginating efficiently through large results sets
+* indexes automatically deleted if database is deleted (as long as nouveau is running!)
+* integration with ken
+* integration with mango
+* integration with resharding
+* update=false
+* `_nouveau_info`
+* `_search_cleanup`
+* /openapi.{json.yaml}
+

Review Comment:
   definitely a (near) future thing. once we're happy with the basics we can extend functionality.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] rnewson commented on a diff in pull request #4291: Import nouveau

Posted by "rnewson (via GitHub)" <gi...@apache.org>.
rnewson commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1173543197


##########
nouveau/TODO:
##########
@@ -0,0 +1,27 @@
+targeted dreyfus feature parity
+
+* pagination (bookmark I guess)
+* grouping
+* faceting
+* partitioned db support
+* ken integration

Review Comment:
   yup, out of date.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] rnewson commented on a diff in pull request #4291: Import nouveau

Posted by "rnewson (via GitHub)" <gi...@apache.org>.
rnewson commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1173622821


##########
src/nouveau/src/nouveau_httpd_handlers.erl:
##########
@@ -0,0 +1,35 @@
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+
+%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*-
+
+-module(nouveau_httpd_handlers).
+
+-export([url_handler/1, db_handler/1, design_handler/1]).
+
+url_handler(<<"_nouveau_analyze">>) ->
+    fun nouveau_httpd:handle_analyze_req/1;
+url_handler(_) ->
+    no_match.
+
+db_handler(<<"_nouveau_cleanup">>) ->
+    fun nouveau_httpd:handle_cleanup_req/2;
+db_handler(_) ->
+    no_match.
+
+design_handler(<<"_nouveau">>) ->
+    fun nouveau_httpd:handle_search_req/3;
+design_handler(<<"_nouveau_info">>) ->
+    fun nouveau_httpd:handle_info_req/3;

Review Comment:
   sure



##########
src/nouveau/src/nouveau_httpd_handlers.erl:
##########
@@ -0,0 +1,35 @@
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+
+%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*-
+
+-module(nouveau_httpd_handlers).
+
+-export([url_handler/1, db_handler/1, design_handler/1]).
+
+url_handler(<<"_nouveau_analyze">>) ->

Review Comment:
   sure



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] nickva commented on a diff in pull request #4291: Import nouveau

Posted by "nickva (via GitHub)" <gi...@apache.org>.
nickva commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1170831150


##########
src/mango/src/mango_idx_nouveau.erl:
##########
@@ -0,0 +1,459 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(mango_idx_nouveau).
+
+-export([
+    validate_new/2,
+    validate_fields/1,
+    validate_index_def/1,
+    add/2,
+    remove/2,
+    from_ddoc/1,
+    to_json/1,
+    columns/1,
+    is_usable/3,
+    get_default_field_options/1
+]).
+
+-include_lib("couch/include/couch_db.hrl").
+-include("mango.hrl").
+-include("mango_idx.hrl").
+
+validate_new(#idx{} = Idx, Db) ->
+    {ok, Def} = do_validate(Idx#idx.def),
+    maybe_reject_index_all_req(Def, Db),
+    {ok, Idx#idx{def = Def}}.
+
+validate_index_def(IndexInfo) ->
+    do_validate(IndexInfo).
+
+add(#doc{body = {Props0}} = DDoc, Idx) ->
+    Texts1 =
+        case proplists:get_value(<<"nouveau">>, Props0) of
+            {Texts0} -> Texts0;
+            _ -> []
+        end,
+    NewText = make_text(Idx),
+    Texts2 = lists:keystore(element(1, NewText), 1, Texts1, NewText),
+    Props1 = lists:keystore(<<"nouveau">>, 1, Props0, {<<"nouveau">>, {Texts2}}),
+    {ok, DDoc#doc{body = {Props1}}}.
+
+remove(#doc{body = {Props0}} = DDoc, Idx) ->
+    Texts1 =
+        case proplists:get_value(<<"nouveau">>, Props0) of
+            {Texts0} ->
+                Texts0;
+            _ ->
+                ?MANGO_ERROR({index_not_found, Idx#idx.name})
+        end,
+    Texts2 = lists:keydelete(Idx#idx.name, 1, Texts1),
+    if
+        Texts2 /= Texts1 -> ok;
+        true -> ?MANGO_ERROR({index_not_found, Idx#idx.name})
+    end,
+    Props1 =
+        case Texts2 of
+            [] ->
+                lists:keydelete(<<"nouveau">>, 1, Props0);
+            _ ->
+                lists:keystore(<<"nouveau">>, 1, Props0, {<<"nouveau">>, {Texts2}})
+        end,
+    {ok, DDoc#doc{body = {Props1}}}.
+
+from_ddoc({Props}) ->
+    case lists:keyfind(<<"nouveau">>, 1, Props) of
+        {<<"nouveau">>, {Texts}} when is_list(Texts) ->
+            lists:flatmap(
+                fun({Name, {VProps}}) ->
+                    case validate_ddoc(VProps) of
+                        invalid_ddoc ->
+                            [];
+                        Def ->
+                            I = #idx{
+                                type = <<"nouveau">>,
+                                name = Name,
+                                def = Def
+                            },
+                            [I]
+                    end
+                end,
+                Texts
+            );
+        _ ->
+            []
+    end.
+
+to_json(Idx) ->
+    {[
+        {ddoc, Idx#idx.ddoc},
+        {name, Idx#idx.name},
+        {type, Idx#idx.type},
+        {partitioned, Idx#idx.partitioned},
+        {def, {def_to_json(Idx#idx.def)}}
+    ]}.
+
+columns(Idx) ->
+    {Props} = Idx#idx.def,
+    {<<"fields">>, Fields} = lists:keyfind(<<"fields">>, 1, Props),
+    case Fields of
+        <<"all_fields">> ->
+            all_fields;
+        _ ->
+            {DFProps} = couch_util:get_value(<<"default_field">>, Props, {[]}),
+            Enabled = couch_util:get_value(<<"enabled">>, DFProps, true),
+            Default =
+                case Enabled of
+                    true -> [<<"$default">>];
+                    false -> []
+                end,
+            Default ++
+                lists:map(
+                    fun({FProps}) ->
+                        {_, Name} = lists:keyfind(<<"name">>, 1, FProps),
+                        {_, Type} = lists:keyfind(<<"type">>, 1, FProps),
+                        iolist_to_binary([Name, ":", Type])
+                    end,
+                    Fields
+                )
+    end.
+
+is_usable(_, Selector, _) when Selector =:= {[]} ->
+    false;
+is_usable(Idx, Selector, _) ->
+    case columns(Idx) of
+        all_fields ->
+            true;
+        Cols ->
+            Fields = indexable_fields(Selector),
+            sets:is_subset(sets:from_list(Fields), sets:from_list(Cols))
+    end.
+
+do_validate({Props}) ->
+    {ok, Opts} = mango_opts:validate(Props, opts()),
+    {ok, {Opts}};
+do_validate(Else) ->
+    ?MANGO_ERROR({invalid_index_text, Else}).
+
+def_to_json({Props}) ->
+    def_to_json(Props);
+def_to_json([]) ->
+    [];
+def_to_json([{<<"fields">>, <<"all_fields">>} | Rest]) ->
+    [{<<"fields">>, []} | def_to_json(Rest)];
+def_to_json([{fields, Fields} | Rest]) ->
+    [{<<"fields">>, fields_to_json(Fields)} | def_to_json(Rest)];
+def_to_json([{<<"fields">>, Fields} | Rest]) ->
+    [{<<"fields">>, fields_to_json(Fields)} | def_to_json(Rest)];
+% Don't include partial_filter_selector in the json conversion
+% if its the default value
+def_to_json([{<<"partial_filter_selector">>, {[]}} | Rest]) ->
+    def_to_json(Rest);
+def_to_json([{Key, Value} | Rest]) ->
+    [{Key, Value} | def_to_json(Rest)].
+
+fields_to_json([]) ->
+    [];
+fields_to_json([{[{<<"name">>, Name}, {<<"type">>, Type0}]} | Rest]) ->
+    ok = validate_field_name(Name),
+    Type = validate_field_type(Type0),
+    [{[{Name, Type}]} | fields_to_json(Rest)];
+fields_to_json([{[{<<"type">>, Type0}, {<<"name">>, Name}]} | Rest]) ->
+    ok = validate_field_name(Name),
+    Type = validate_field_type(Type0),
+    [{[{Name, Type}]} | fields_to_json(Rest)].
+
+%% In the future, we can possibly add more restrictive validation.
+%% For now, let's make sure the field name is not blank.
+validate_field_name(<<"">>) ->
+    throw(invalid_field_name);
+validate_field_name(Else) when is_binary(Else) ->
+    ok;
+validate_field_name(_) ->
+    throw(invalid_field_name).
+
+validate_field_type(<<"string">>) ->
+    <<"string">>;
+validate_field_type(<<"number">>) ->
+    <<"number">>;
+validate_field_type(<<"boolean">>) ->
+    <<"boolean">>.
+
+validate_fields(<<"all_fields">>) ->
+    {ok, all_fields};
+validate_fields(Fields) ->
+    try fields_to_json(Fields) of
+        _ ->
+            mango_fields:new(Fields)
+    catch
+        error:function_clause ->
+            ?MANGO_ERROR({invalid_index_fields_definition, Fields});
+        throw:invalid_field_name ->
+            ?MANGO_ERROR({invalid_index_fields_definition, Fields})
+    end.
+
+validate_ddoc(VProps) ->
+    try
+        Def = proplists:get_value(<<"index">>, VProps),
+        validate_index_def(Def),
+        Def
+    catch
+        Error:Reason ->
+            couch_log:error(
+                "Invalid Index Def ~p: Error. ~p, Reason: ~p",
+                [VProps, Error, Reason]
+            ),
+            invalid_ddoc
+    end.
+
+opts() ->
+    [
+        {<<"default_analyzer">>, [
+            {tag, default_analyzer},
+            {optional, true},
+            {default, <<"keyword">>}
+        ]},
+        {<<"default_field">>, [
+            {tag, default_field},
+            {optional, true},
+            {default, {[]}}
+        ]},
+        {<<"partial_filter_selector">>, [
+            {tag, partial_filter_selector},
+            {optional, true},
+            {default, {[]}},
+            {validator, fun mango_opts:validate_selector/1}
+        ]},
+        {<<"selector">>, [
+            {tag, selector},
+            {optional, true},
+            {default, {[]}},
+            {validator, fun mango_opts:validate_selector/1}
+        ]},
+        {<<"fields">>, [
+            {tag, fields},
+            {optional, true},
+            {default, []},
+            {validator, fun ?MODULE:validate_fields/1}
+        ]},
+        {<<"index_array_lengths">>, [
+            {tag, index_array_lengths},
+            {optional, true},
+            {default, true},
+            {validator, fun mango_opts:is_boolean/1}
+        ]}
+    ].
+
+make_text(Idx) ->
+    Text =
+        {[
+            {<<"index">>, Idx#idx.def},
+            {<<"analyzer">>, construct_analyzer(Idx#idx.def)}
+        ]},
+    {Idx#idx.name, Text}.
+
+get_default_field_options(Props) ->
+    Default = couch_util:get_value(default_field, Props, {[]}),
+    case Default of
+        Bool when is_boolean(Bool) ->
+            {Bool, <<"standard">>};
+        {[]} ->
+            {true, <<"standard">>};
+        {Opts} ->
+            Enabled = couch_util:get_value(<<"enabled">>, Opts, true),
+            Analyzer = couch_util:get_value(
+                <<"analyzer">>,
+                Opts,
+                <<"standard">>
+            ),
+            {Enabled, Analyzer}
+    end.
+
+construct_analyzer({Props}) ->
+    DefaultAnalyzer = couch_util:get_value(
+        default_analyzer,
+        Props,
+        <<"keyword">>
+    ),
+    {DefaultField, DefaultFieldAnalyzer} = get_default_field_options(Props),
+    DefaultAnalyzerDef =
+        case DefaultField of
+            true ->
+                [{<<"$default">>, DefaultFieldAnalyzer}];
+            _ ->
+                []
+        end,
+    case DefaultAnalyzerDef of
+        [] ->
+            <<"keyword">>;
+        _ ->
+            {[
+                {<<"name">>, <<"perfield">>},
+                {<<"default">>, DefaultAnalyzer},
+                {<<"fields">>, {DefaultAnalyzerDef}}
+            ]}
+    end.
+
+indexable_fields(Selector) ->
+    TupleTree = mango_selector_text:convert([], Selector),
+    indexable_fields([], TupleTree).
+
+indexable_fields(Fields, {op_and, Args}) when is_list(Args) ->
+    lists:foldl(
+        fun(Arg, Fields0) -> indexable_fields(Fields0, Arg) end,
+        Fields,
+        Args
+    );
+%% For queries that use array element access or $in operations, two
+%% fields get generated by mango_selector_text:convert. At index
+%% definition time, only one field gets defined. In this situation, we
+%% remove the extra generated field so that the index can be used. For
+%% all other situations, we include the fields as normal.
+indexable_fields(
+    Fields,
+    {op_or, [
+        {op_field, Field0},
+        {op_field, {[Name | _], _}} = Field1
+    ]}
+) ->
+    case lists:member(<<"[]">>, Name) of
+        true ->
+            indexable_fields(Fields, {op_field, Field0});
+        false ->
+            Fields1 = indexable_fields(Fields, {op_field, Field0}),
+            indexable_fields(Fields1, Field1)
+    end;
+indexable_fields(Fields, {op_or, Args}) when is_list(Args) ->
+    lists:foldl(
+        fun(Arg, Fields0) -> indexable_fields(Fields0, Arg) end,
+        Fields,
+        Args
+    );
+indexable_fields(Fields, {op_not, {ExistsQuery, Arg}}) when is_tuple(Arg) ->
+    Fields0 = indexable_fields(Fields, ExistsQuery),
+    indexable_fields(Fields0, Arg);
+% forces "$exists" : false to use _all_docs
+indexable_fields(_, {op_not, {_, false}}) ->
+    [];
+indexable_fields(Fields, {op_insert, Arg}) when is_binary(Arg) ->
+    Fields;
+%% fieldname.[]:length is not a user defined field.
+indexable_fields(Fields, {op_field, {[_, <<":length">>], _}}) ->
+    Fields;
+indexable_fields(Fields, {op_field, {Name, _}}) ->
+    [iolist_to_binary(Name) | Fields];
+%% In this particular case, the lucene index is doing a field_exists query
+%% so it is looking at all sorts of combinations of field:* and field.*
+%% We don't add the field because we cannot pre-determine what field will exist.
+%% Hence we just return Fields and make it less restrictive.
+indexable_fields(Fields, {op_fieldname, {_, _}}) ->
+    Fields;
+%% Similar idea to op_fieldname but with fieldname:null
+indexable_fields(Fields, {op_null, {_, _}}) ->
+    Fields;
+indexable_fields(Fields, {op_default, _}) ->
+    [<<"$default">> | Fields].
+
+maybe_reject_index_all_req({Def}, Db) ->
+    DbName = couch_db:name(Db),
+    #user_ctx{name = User} = couch_db:get_user_ctx(Db),
+    Fields = couch_util:get_value(fields, Def),
+    case {Fields, forbid_index_all()} of
+        {all_fields, "true"} ->
+            ?MANGO_ERROR(index_all_disabled);
+        {all_fields, "warn"} ->
+            couch_log:warning(
+                "User ~p is indexing all fields in db ~p",
+                [User, DbName]
+            );
+        _ ->
+            ok
+    end.
+
+forbid_index_all() ->
+    config:get("mango", "index_all_disabled", "false").
+
+-ifdef(TEST).
+-include_lib("eunit/include/eunit.hrl").
+
+setup_all() ->
+    Ctx = test_util:start_couch(),
+    meck:expect(
+        couch_log,
+        warning,
+        2,
+        fun(_, _) ->
+            throw({test_error, logged_warning})
+        end
+    ),
+    Ctx.
+
+teardown_all(Ctx) ->
+    meck:unload(),
+    test_util:stop_couch(Ctx).
+
+setup() ->
+    %default index all def that generates {fields, all_fields}
+    Index = #idx{def = {[]}},
+    DbName = <<"testdb">>,
+    UserCtx = #user_ctx{name = <<"u1">>},
+    {ok, Db} = couch_db:clustered_db(DbName, UserCtx),
+    {Index, Db}.
+
+teardown(_) ->
+    ok.
+
+index_all_test_() ->
+    {
+        setup,
+        fun setup_all/0,
+        fun teardown_all/1,
+        {
+            foreach,
+            fun setup/0,
+            fun teardown/1,
+            [
+                fun forbid_index_all/1,
+                fun default_and_false_index_all/1,
+                fun warn_index_all/1
+            ]
+        }
+    }.
+
+forbid_index_all({Idx, Db}) ->
+    ?_test(begin

Review Comment:
   Avoid using `?_test(begin ... end)` construct, use the `?TDEF_FE` macro instead. See https://github.com/apache/couchdb/blob/main/src/chttpd/test/eunit/chttpd_dbs_info_test.erl#L70-L83 as an example or search for `?TDEF_FE`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] nickva commented on a diff in pull request #4291: Import nouveau

Posted by "nickva (via GitHub)" <gi...@apache.org>.
nickva commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1170858100


##########
src/nouveau/src/nouveau_httpd.erl:
##########
@@ -0,0 +1,276 @@
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+
+%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*-
+
+-module(nouveau_httpd).
+
+-include_lib("couch/include/couch_db.hrl").
+
+-export([
+    handle_analyze_req/1,
+    handle_search_req/3,
+    handle_info_req/3,
+    handle_cleanup_req/2
+]).
+
+-import(chttpd, [
+    send_method_not_allowed/2,
+    send_json/2, send_json/3,
+    send_error/2
+]).
+
+-define(RETRY_LIMIT, 20).
+-define(RETRY_SLEEP, 500).
+
+handle_analyze_req(#httpd{method = 'POST'} = Req) ->

Review Comment:
   It would be good to have a basic integration test calling this API.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] nickva commented on a diff in pull request #4291: Import nouveau

Posted by "nickva (via GitHub)" <gi...@apache.org>.
nickva commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1170868332


##########
src/nouveau/src/nouveau_index_manager.erl:
##########
@@ -0,0 +1,161 @@
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+
+%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*-
+
+%% index manager ensures only one process is updating a nouveau index at a time.
+%% calling update_index will block until at least one attempt has been made to
+%% make the index as current as the database at the time update_index was called.
+
+-module(nouveau_index_manager).
+-behaviour(gen_server).
+-behaviour(config_listener).
+-include("nouveau.hrl").
+
+%% public api
+-export([
+    update_index/1
+]).
+
+%% gen_server bits
+-export([
+    start_link/0,
+    init/1,
+    handle_call/3,
+    handle_cast/2,
+    handle_info/2
+]).
+
+% config_listener api
+-export([handle_config_change/5, handle_config_terminate/3]).
+
+-export([handle_db_event/3]).
+
+-define(BY_DBSIG, nouveau_by_dbsig).
+-define(BY_REF, nouveau_by_ref).
+
+start_link() ->
+    gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
+
+update_index(#index{} = Index) ->
+    case nouveau:enabled() of
+        true ->
+            gen_server:call(?MODULE, {update, Index}, infinity);
+        false ->
+            {error, nouveau_not_enabled}
+    end.
+
+init(_) ->
+    couch_util:set_mqd_off_heap(?MODULE),
+    ets:new(?BY_DBSIG, [set, named_table]),
+    ets:new(?BY_REF, [set, named_table]),
+    couch_event:link_listener(?MODULE, handle_db_event, nil, [all_dbs]),
+    configure_ibrowse(nouveau_util:nouveau_url()),
+    ok = config:listen_for_changes(?MODULE, nil),
+    {ok, nil}.
+
+handle_call({update, #index{} = Index0}, From, State) ->
+    DbSig = {Index0#index.dbname, Index0#index.sig},
+    case ets:lookup(?BY_DBSIG, DbSig) of
+        [] ->
+            {_IndexerPid, IndexerRef} = spawn_monitor(nouveau_index_updater, update, [Index0]),
+            Queue = queue:in(From, queue:new()),
+            true = ets:insert(?BY_DBSIG, {DbSig, Index0, Queue}),
+            true = ets:insert(?BY_REF, {IndexerRef, DbSig});
+        [{_DbSig, Index1, Queue}] ->
+            ets:insert(?BY_DBSIG, {DbSig, Index1, queue:in(From, Queue)})
+    end,
+    {noreply, State};
+handle_call(_Msg, _From, State) ->
+    {reply, unexpected_msg, State}.
+
+handle_cast(_Msg, State) ->
+    {noreply, State}.
+
+handle_info({'DOWN', IndexerRef, process, _Pid, Reason}, State) ->
+    case ets:lookup(?BY_REF, IndexerRef) of
+        [] ->
+            % not one of ours, somehow...

Review Comment:
   Would we ever expect it? May make sense to log an error at least.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] nickva commented on a diff in pull request #4291: Import nouveau

Posted by "nickva (via GitHub)" <gi...@apache.org>.
nickva commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1170780372


##########
nouveau/README.md:
##########
@@ -0,0 +1,118 @@
+# nouveau
+
+Nouveau is a modern replacement for dreyfus/clouseau and is built on;
+
+1) the Dropwizard framework (https://dropwizard.io)
+2) Java 11+
+3) Lucene 9
+
+Nouveau transforms Apache CouchDB databases into Apache Lucene indexes at the shard level and then merges the results together.
+
+This work is currently EXPERIMENTAL and may change in ways that invalidate any existing Nouveau index.
+
+## What works?
+
+* you can define a default analyzer and different analyzers by field name.
+* sorting on text and numbers (and combinations of fields)
+* classic lucene query syntax
+* count and range facets
+* bookmark support for paginating efficiently through large results sets
+* indexes automatically deleted if database is deleted (as long as nouveau is running!)
+* integration with ken
+* integration with mango
+* integration with resharding
+* update=false
+* `_nouveau_info`
+* `_search_cleanup`
+* /openapi.{json.yaml}
+
+## What doesn't work yet?
+
+* results grouping
+* configurable stop words for analyzers
+* Makefile.win or Windows generally
+
+I don't intend to add grouping support, it seems barely used. Would accept a tidy contribution, though.
+
+## Why is this better than dreyfus/clouseau?
+
+* No scalang (or Scala!)
+* Supports any version of Java that Lucene 9 supports
+* memory-mapped I/O for performance (which works best on Java 19)
+* direct I/O used for segment merging (so we don't evict useful data from disk cache)
+
+## Getting started
+
+Configure CouchDB with `--enable-nouveau'
+
+Build Nouveau with;
+
+`make`
+
+Run Nouvea with;

Review Comment:
   `Nouvea` -> `Nouveau`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] rnewson commented on pull request #4291: Import nouveau

Posted by "rnewson (via GitHub)" <gi...@apache.org>.
rnewson commented on PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#issuecomment-1485747436

   * Nouveau endpoints return 404 if disabled and no attempt is made by the nouveau erlang app to contact the JVM side for background indexing.
   * update=true|false is supported but stable=true|false is not. nouveau always sorts by _id as the tie-breaker so the instability that clouseau exhibits is much reduced.
   * mango integration is done.
   * I've not yet written a new _search_cleanup endpoint but the java side supports it (and is regularly exercised)
   * indexes are deleted if nouveau is up when the database is deleted (same as dreyfus/clouseau)
   * include_docs=true not yet implemented (the README says so)
   * nouveau indexes are independent, same as dreyfus
   * not sure what you mean specifically by "Collation (sort order) checks."
   
   
   
     


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] jaydoane commented on a diff in pull request #4291: Import nouveau

Posted by "jaydoane (via GitHub)" <gi...@apache.org>.
jaydoane commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1153750099


##########
src/chttpd/src/chttpd.erl:
##########
@@ -1118,6 +1124,8 @@ error_info(all_workers_died) ->
         "Nodes are unable to service this "
         "request due to overloading or maintenance mode."
     >>};
+error_info({internal_server_error, Reason}) ->
+    {500, <<"internal server error">>, Reason};

Review Comment:
   I wonder if maybe this _should_ have underscores, since most of the other seem to?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] nickva commented on a diff in pull request #4291: Import nouveau

Posted by "nickva (via GitHub)" <gi...@apache.org>.
nickva commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1170852313


##########
src/nouveau/src/nouveau_fabric_search.erl:
##########
@@ -0,0 +1,213 @@
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+
+%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*-
+
+-module(nouveau_fabric_search).
+
+-export([go/4]).
+
+-include_lib("mem3/include/mem3.hrl").
+-include_lib("couch/include/couch_db.hrl").
+
+-record(state, {
+    limit,
+    sort,
+    counters,
+    search_results
+}).
+
+go(DbName, GroupId, IndexName, QueryArgs0) when is_binary(GroupId) ->
+    {ok, DDoc} = fabric:open_doc(
+        DbName,
+        <<"_design/", GroupId/binary>>,
+        [ejson_body]
+    ),
+    go(DbName, DDoc, IndexName, QueryArgs0);
+go(DbName, #doc{} = DDoc, IndexName, QueryArgs0) ->
+    {ok, Index} = nouveau_util:design_doc_to_index(DbName, DDoc, IndexName),
+    Shards = mem3:shards(DbName),
+    Bookmark = nouveau_bookmark:unpack(DbName, maps:get(bookmark, QueryArgs0)),
+    QueryArgs1 = maps:remove(bookmark, QueryArgs0),
+    Counters0 = lists:map(
+        fun(#shard{} = Shard) ->
+            After = maps:get(Shard#shard.range, Bookmark, null),
+            Ref = rexi:cast(
+                Shard#shard.node,
+                {nouveau_rpc, search, [Shard#shard.name, Index, QueryArgs1#{'after' => After}]}
+            ),
+            Shard#shard{ref = Ref}
+        end,
+        Shards
+    ),
+    Counters = fabric_dict:init(Counters0, nil),
+    Workers = fabric_dict:fetch_keys(Counters),
+    RexiMon = fabric_util:create_monitors(Workers),
+    State = #state{
+        limit = maps:get(limit, QueryArgs0),
+        sort = maps:get(sort, QueryArgs0),
+        counters = Counters,
+        search_results = #{}
+    },
+    try
+        rexi_utils:recv(Workers, #shard.ref, fun handle_message/3, State, infinity, 1000 * 60 * 60)

Review Comment:
   Use a config a macro at least indicating what 1000*60*60 is.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] nickva commented on a diff in pull request #4291: Import nouveau

Posted by "nickva (via GitHub)" <gi...@apache.org>.
nickva commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1170843618


##########
src/nouveau/src/nouveau_bookmark.erl:
##########
@@ -0,0 +1,68 @@
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+
+%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*-
+
+-module(nouveau_bookmark).
+
+-include_lib("mem3/include/mem3.hrl").
+
+-export([new/0, update/3, unpack/2, pack/1, to_ejson/1]).
+
+new() ->
+    #{}.
+
+%% Form a bookmark from the last contribution from each shard range
+update(DbName, PreviousBookmark, SearchResults) when is_binary(PreviousBookmark) ->
+    update(DbName, unpack(DbName, PreviousBookmark), SearchResults);
+update(DbName, {EJson}, SearchResults) when is_list(EJson) ->
+    update(DbName, from_ejson({EJson}), SearchResults);
+update(DbName, PreviousBookmark, SearchResults) when is_map(PreviousBookmark) ->
+    Hits = maps:get(<<"hits">>, SearchResults),
+    NewBookmark0 = lists:foldl(
+        fun(Hit, Acc) ->
+            maps:put(range_of(DbName, maps:get(<<"id">>, Hit)), maps:get(<<"order">>, Hit), Acc)

Review Comment:
   It's a bit hard to see what's the key and value in the `maps:put` line. Could match `Order` and `Id` in the fun head as `fun(#{<<"id">> := Id, <<"order">> := Order}, Acc)` then do:```Acc#{range_of(DbName, Id) => Hit}```?
   
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] nickva commented on a diff in pull request #4291: Import nouveau

Posted by "nickva (via GitHub)" <gi...@apache.org>.
nickva commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1170865112


##########
src/nouveau/src/nouveau_httpd_handlers.erl:
##########
@@ -0,0 +1,35 @@
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+
+%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*-
+
+-module(nouveau_httpd_handlers).
+
+-export([url_handler/1, db_handler/1, design_handler/1]).
+
+url_handler(<<"_nouveau_analyze">>) ->
+    fun nouveau_httpd:handle_analyze_req/1;
+url_handler(_) ->
+    no_match.
+
+db_handler(<<"_nouveau_cleanup">>) ->

Review Comment:
   It may be tricky having a full test here and asserting cleanup happened but at least a basic call and check for a success return code would be good to have.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] nickva commented on a diff in pull request #4291: Import nouveau

Posted by "nickva (via GitHub)" <gi...@apache.org>.
nickva commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1170784316


##########
nouveau/README.md:
##########
@@ -0,0 +1,118 @@
+# nouveau
+
+Nouveau is a modern replacement for dreyfus/clouseau and is built on;
+
+1) the Dropwizard framework (https://dropwizard.io)
+2) Java 11+
+3) Lucene 9
+
+Nouveau transforms Apache CouchDB databases into Apache Lucene indexes at the shard level and then merges the results together.
+
+This work is currently EXPERIMENTAL and may change in ways that invalidate any existing Nouveau index.
+
+## What works?
+
+* you can define a default analyzer and different analyzers by field name.
+* sorting on text and numbers (and combinations of fields)
+* classic lucene query syntax
+* count and range facets
+* bookmark support for paginating efficiently through large results sets
+* indexes automatically deleted if database is deleted (as long as nouveau is running!)
+* integration with ken
+* integration with mango
+* integration with resharding
+* update=false
+* `_nouveau_info`
+* `_search_cleanup`
+* /openapi.{json.yaml}
+

Review Comment:
   Would basic geo queries work as well as described in https://blog.cloudant.com/2022/06/28/Simple-Geospatial-Queries.html?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] nickva commented on a diff in pull request #4291: Import nouveau

Posted by "nickva (via GitHub)" <gi...@apache.org>.
nickva commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1170790578


##########
nouveau/TODO:
##########
@@ -0,0 +1,27 @@
+targeted dreyfus feature parity
+
+* pagination (bookmark I guess)
+* grouping
+* faceting
+* partitioned db support
+* ken integration

Review Comment:
   This seems to be at odds with the README which says ken integration is supported.
   
   What about just making the TODO a part of README?. It's developer documentation anyway, users would use look official CouchDB docs most likely.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] nickva commented on a diff in pull request #4291: Import nouveau

Posted by "nickva (via GitHub)" <gi...@apache.org>.
nickva commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1170811625


##########
src/chttpd/src/chttpd.erl:
##########
@@ -1118,6 +1126,8 @@ error_info(all_workers_died) ->
         "Nodes are unable to service this "
         "request due to overloading or maintenance mode."
     >>};
+error_info({internal_server_error, Reason}) ->
+    {500, <<"internal server error">>, Reason};

Review Comment:
   Agree with @jaydoane and `_` for consistency would make sense.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] big-r81 commented on a diff in pull request #4291: Import nouveau

Posted by "big-r81 (via GitHub)" <gi...@apache.org>.
big-r81 commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1117251486


##########
java/nouveau/api/src/main/java/org/apache/couchdb/nouveau/api/After.java:
##########
@@ -0,0 +1,59 @@
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package org.apache.couchdb.nouveau.api;
+
+import org.apache.couchdb.nouveau.core.ser.AfterDeserializer;
+import org.apache.couchdb.nouveau.core.ser.AfterSerializer;
+
+import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
+import com.fasterxml.jackson.databind.annotation.JsonSerialize;
+
+import io.dropwizard.jackson.JsonSnakeCase;
+
+@JsonSnakeCase
+@JsonSerialize(using = AfterSerializer.class)
+@JsonDeserialize(using = AfterDeserializer.class)
+public class After {
+
+    private Object[] fields;
+
+    public After() {
+    }
+
+    public After(final Object... fields) {
+        this.fields = fields;
+    }
+
+    public Object[] getFields() {
+        return fields;
+    }
+
+    public void setFields(Object[] fields) {
+        this.fields = fields;
+    }
+
+    @Override
+    public String toString() {
+        final StringBuilder result = new StringBuilder();
+        result.append("After [fields=");

Review Comment:
   Is there a `]` missing?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] big-r81 commented on a diff in pull request #4291: Import nouveau

Posted by "big-r81 (via GitHub)" <gi...@apache.org>.
big-r81 commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1117266227


##########
java/nouveau/api/src/main/java/org/apache/couchdb/nouveau/core/Index.java:
##########
@@ -0,0 +1,145 @@
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package org.apache.couchdb.nouveau.core;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.couchdb.nouveau.api.DocumentDeleteRequest;
+import org.apache.couchdb.nouveau.api.DocumentUpdateRequest;
+import org.apache.couchdb.nouveau.api.IndexInfo;
+import org.apache.couchdb.nouveau.api.SearchRequest;
+import org.apache.couchdb.nouveau.api.SearchResults;
+
+public abstract class Index implements Closeable {
+
+    private long updateSeq;
+
+    private boolean deleteOnClose = false;
+
+    private int refCount = 1;
+
+    private long lastUsed = now();
+
+    protected Index(final long updateSeq) {
+        this.updateSeq = updateSeq;
+    }
+
+    public final IndexInfo info() throws IOException {
+        updateLastUsed();
+        final int numDocs = doNumDocs();
+        return new IndexInfo(updateSeq, numDocs);
+    }
+
+    protected abstract int doNumDocs() throws IOException;
+
+    public final synchronized void update(final String docId, final DocumentUpdateRequest request) throws IOException {
+        assertUpdateSeqIsLower(request.getSeq());
+        updateLastUsed();
+        doUpdate(docId, request);
+        incrementUpdateSeq(request.getSeq());
+    }
+
+    protected abstract void doUpdate(final String docId, final DocumentUpdateRequest request) throws IOException;
+
+    public final synchronized void delete(final String docId, final DocumentDeleteRequest request) throws IOException {
+        assertUpdateSeqIsLower(request.getSeq());
+        updateLastUsed();
+        doDelete(docId, request);
+        incrementUpdateSeq(request.getSeq());
+    }
+
+    protected abstract void doDelete(final String docId, final DocumentDeleteRequest request) throws IOException;
+
+    public final SearchResults search(final SearchRequest request) throws IOException {
+        updateLastUsed();
+        return doSearch(request);
+    }
+
+    protected abstract SearchResults doSearch(final SearchRequest request) throws IOException;
+
+    public final boolean commit() throws IOException {
+        final long updateSeq;
+        synchronized (this) {
+            updateSeq = this.updateSeq;
+        }
+        return doCommit(updateSeq);
+    }
+
+    protected abstract boolean doCommit(final long updateSeq) throws IOException;
+
+    @Override
+    public final void close() throws IOException {
+        decRef();
+    }
+
+    protected abstract void doClose() throws IOException;
+
+    public abstract boolean isOpen();
+
+    public boolean isDeleteOnClose() {
+        return deleteOnClose;
+    }
+
+    public void setDeleteOnClose(final boolean deleteOnClose) {
+        this.deleteOnClose = deleteOnClose;
+    }
+
+    protected final void assertUpdateSeqIsLower(final long updateSeq) throws UpdatesOutOfOrderException {
+        if (!(updateSeq > this.updateSeq)) {
+            throw new UpdatesOutOfOrderException();
+        }
+    }
+
+    protected final void incrementUpdateSeq(final long updateSeq) throws IOException {
+        assertUpdateSeqIsLower(updateSeq);
+        this.updateSeq = updateSeq;
+    }
+
+    private synchronized void updateLastUsed() {
+        this.lastUsed = now();
+    }
+
+    public final synchronized void incRef() {
+        // zero is forever.
+        if (refCount > 0) {
+            refCount++;
+        }
+    }
+
+    public final synchronized int getRefCount() {
+        return refCount;
+    }
+
+    public final boolean decRef() throws IOException {
+        boolean close;
+        synchronized (this) {
+            close = --refCount == 0;
+        }
+        if (close) {
+            doClose();
+        }
+        return close;
+    }
+
+    public synchronized long secondsSinceLastUse() {
+        return TimeUnit.NANOSECONDS.toMillis(now() - lastUsed);
+    }
+
+    private long now() {
+        return System.nanoTime();
+    }
+
+}

Review Comment:
   Small nit: Missing newline at eof...



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] big-r81 commented on a diff in pull request #4291: Import nouveau

Posted by "big-r81 (via GitHub)" <gi...@apache.org>.
big-r81 commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1117270044


##########
java/nouveau/api/src/test/resources/fixtures/SearchRequest.json:
##########
@@ -0,0 +1,17 @@
+{
+    "query": "*:*",
+    "limit": 10,
+    "sort": null,
+    "counts": [
+        "bar"
+    ],
+    "ranges": {
+        "foo": [
+            {
+                "label": "0 to 100 inc",
+                "min": 0.0,
+                "max": 100.0
+            }
+        ]
+    }
+}

Review Comment:
   nit: missing \n at eof



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] rnewson commented on pull request #4291: Import nouveau

Posted by GitBox <gi...@apache.org>.
rnewson commented on PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#issuecomment-1341152139

   API choices.
   
   Currently you can define a nouveau index almost identically to clouseau, the `index()` function behaves the same way, you just need to use "nouveau" as a top-level key instead of "indexes";
   
   clouseau;
   
   ```
   {
     "indexes": {
       "bar": {
         "default_analyzer": "standard",
         "field_analyzers": {
           "foo": "english"
         },
         "index": "function(doc) { index(\"foo\", \"bar\", {\"store\":true}); index(\"bar\", doc.bar, {\"store\":true,\"facet\":true}); }"
       }
     }
   }
   ```
   
   nouveau
   ```
   {
     "nouveau": {
       "bar": {
         "default_analyzer": "standard",
         "field_analyzers": {
           "foo": "english"
         },
         "index": "function(doc) { index(\"foo\", \"bar\", {\"store\":true}); index(\"bar\", doc.bar, {\"store\":true,\"facet\":true}); }"
       }
     }
   }
   ```
   
   the index() function is enhanced for nouveau (https://github.com/rnewson/nouveau#index-function) giving the user better control over the indexing process but the basic shape for clouseau works the same.
   
   The alternative proposal is to use a version attribute
   
   ```
     "indexes": {
       "bar": {
         "version": 2,
         "default_analyzer": "standard",
         "field_analyzers": {
           "foo": "english"
         },
         "index": "function(doc) { index(\"foo\", \"bar\", {\"store\":true}); index(\"bar\", doc.bar, {\"store\":true,\"facet\":true}); }"
       }
     }
   }
   ```
   
   This could either be a `"version": 2` or perhaps a `"engine": "nouveau"`
   
   dreyfus and erlang-side nouveau would be adjusted to take the appropriate subset from the "indexes" object if we went this way.
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] rnewson commented on a diff in pull request #4291: Import nouveau

Posted by GitBox <gi...@apache.org>.
rnewson commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1040895505


##########
java/nouveau/pom.xml:
##########
@@ -0,0 +1,216 @@
+<!--
+    Copyright 2022 Cloudant. All rights reserved.

Review Comment:
   ah oof, yes, I copied from clouseau's pom.xml, my bad
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] janl commented on a diff in pull request #4291: Import nouveau

Posted by GitBox <gi...@apache.org>.
janl commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1040869629


##########
java/nouveau/pom.xml:
##########
@@ -0,0 +1,216 @@
+<!--
+    Copyright 2022 Cloudant. All rights reserved.

Review Comment:
   there are others, too, just leaving this as a placeholder. We probably also don't need a dedicated LICENSE copy



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] big-r81 commented on a diff in pull request #4291: Import nouveau

Posted by "big-r81 (via GitHub)" <gi...@apache.org>.
big-r81 commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1117290057


##########
java/nouveau/server/src/main/java/org/apache/couchdb/nouveau/resources/IndexResource.java:
##########
@@ -0,0 +1,117 @@
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package org.apache.couchdb.nouveau.resources;
+
+import java.io.IOException;
+import java.util.concurrent.ExecutionException;
+
+import javax.validation.Valid;
+import javax.validation.constraints.NotNull;
+import javax.ws.rs.Consumes;
+import javax.ws.rs.DELETE;
+import javax.ws.rs.GET;
+import javax.ws.rs.POST;
+import javax.ws.rs.PUT;
+import javax.ws.rs.Path;
+import javax.ws.rs.PathParam;
+import javax.ws.rs.Produces;
+import javax.ws.rs.core.MediaType;
+
+import org.apache.couchdb.nouveau.api.DocumentDeleteRequest;
+import org.apache.couchdb.nouveau.api.DocumentUpdateRequest;
+import org.apache.couchdb.nouveau.api.IndexDefinition;
+import org.apache.couchdb.nouveau.api.IndexInfo;
+import org.apache.couchdb.nouveau.api.SearchRequest;
+import org.apache.couchdb.nouveau.api.SearchResults;
+import org.apache.couchdb.nouveau.core.Index;
+import org.apache.couchdb.nouveau.core.IndexManager;
+
+import com.codahale.metrics.annotation.Timed;
+
+@Path("/index/{name}")
+@Consumes(MediaType.APPLICATION_JSON)
+@Produces(MediaType.APPLICATION_JSON)
+public class IndexResource {
+
+    private final IndexManager indexManager;
+
+    public IndexResource(final IndexManager indexManager) {
+        this.indexManager = indexManager;
+    }
+
+    @GET
+    @SuppressWarnings("resource")
+    public IndexInfo indexInfo(@PathParam("name") String name)
+            throws IOException, InterruptedException, ExecutionException {
+        final Index index = indexManager.acquire(name);
+        try {
+            return index.info();
+        } finally {
+            indexManager.release(name, index);
+        }
+    }
+
+    @DELETE
+    public void deletePath(@PathParam("name") String path) throws IOException {
+        indexManager.deleteAll(path);
+    }
+
+    @PUT
+    public void createIndex(@PathParam("name") String name, @NotNull @Valid IndexDefinition indexDefinition)
+            throws IOException {
+        indexManager.create(name, indexDefinition);
+    }
+
+    @DELETE
+    @Timed
+    @Path("/doc/{docId}")
+    public void deleteDoc(@PathParam("name") String name, @PathParam("docId") String docId,
+            @NotNull @Valid final DocumentDeleteRequest request)
+            throws IOException, InterruptedException, ExecutionException {
+        final Index index = indexManager.acquire(name);
+        try {
+            index.delete(docId, request);
+        } finally {
+            indexManager.release(name, index);
+        }
+    }
+
+    @PUT
+    @Timed
+    @Path("/doc/{docId}")
+    public void updateDoc(@PathParam("name") String name, @PathParam("docId") String docId,
+            @NotNull @Valid final DocumentUpdateRequest request)
+            throws IOException, InterruptedException, ExecutionException {
+        final Index index = indexManager.acquire(name);
+        try {
+            index.update(docId, request);
+        } finally {
+            indexManager.release(name, index);
+        }
+    }
+
+    @POST
+    @Timed
+    @Path("/search")
+    public SearchResults searchIndex(@PathParam("name") String name, @NotNull @Valid SearchRequest request)
+            throws IOException, InterruptedException, ExecutionException {
+        final Index index = indexManager.acquire(name);
+        try {
+            return index.search(request);
+        } finally {
+            indexManager.release(name, index);
+        }
+    }
+
+}

Review Comment:
   small nit: missing \n @ eof



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] big-r81 commented on a diff in pull request #4291: Import nouveau

Posted by "big-r81 (via GitHub)" <gi...@apache.org>.
big-r81 commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1129234203


##########
java/nouveau/lucene9/src/main/java/org/apache/couchdb/nouveau/lucene9/core/IndexableFieldSerializer.java:
##########
@@ -0,0 +1,59 @@
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package org.apache.couchdb.nouveau.lucene9.core;
+
+import java.io.IOException;
+
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.util.BytesRef;
+
+import com.fasterxml.jackson.core.JsonGenerationException;
+import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.databind.SerializerProvider;
+import com.fasterxml.jackson.databind.ser.std.StdSerializer;
+
+public class IndexableFieldSerializer extends StdSerializer<IndexableField> {
+
+    IndexableFieldSerializer() {
+        this(null);
+    }
+
+    IndexableFieldSerializer(Class<IndexableField> vc) {
+        super(vc);
+    }
+
+    @Override
+    public void serialize(final IndexableField field, final JsonGenerator gen, final SerializerProvider provider)
+            throws IOException {
+        if (!(field instanceof StoredField)) {
+            throw new JsonGenerationException(field.getClass() + " not supported", gen);
+        }
+        gen.writeStartObject();
+        gen.writeStringField("@type", "stored");
+        gen.writeStringField("name", field.name());
+        if (field.numericValue() != null) {
+            gen.writeNumberField("value", (double) field.numericValue());
+        } else if (field.stringValue() != null) {
+            gen.writeStringField("value", field.stringValue());
+        } else if (field.binaryValue() != null) {
+            final BytesRef bytesRef = field.binaryValue();
+            gen.writeFieldName("value");
+            gen.writeBinary(bytesRef.bytes, bytesRef.offset, bytesRef.length);
+            gen.writeBooleanField("encoded", true);
+        }
+        gen.writeEndObject();
+    }
+
+}

Review Comment:
   nit: missing empty newline



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] nickva commented on a diff in pull request #4291: Import nouveau

Posted by "nickva (via GitHub)" <gi...@apache.org>.
nickva commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1170852313


##########
src/nouveau/src/nouveau_fabric_search.erl:
##########
@@ -0,0 +1,213 @@
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+
+%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*-
+
+-module(nouveau_fabric_search).
+
+-export([go/4]).
+
+-include_lib("mem3/include/mem3.hrl").
+-include_lib("couch/include/couch_db.hrl").
+
+-record(state, {
+    limit,
+    sort,
+    counters,
+    search_results
+}).
+
+go(DbName, GroupId, IndexName, QueryArgs0) when is_binary(GroupId) ->
+    {ok, DDoc} = fabric:open_doc(
+        DbName,
+        <<"_design/", GroupId/binary>>,
+        [ejson_body]
+    ),
+    go(DbName, DDoc, IndexName, QueryArgs0);
+go(DbName, #doc{} = DDoc, IndexName, QueryArgs0) ->
+    {ok, Index} = nouveau_util:design_doc_to_index(DbName, DDoc, IndexName),
+    Shards = mem3:shards(DbName),
+    Bookmark = nouveau_bookmark:unpack(DbName, maps:get(bookmark, QueryArgs0)),
+    QueryArgs1 = maps:remove(bookmark, QueryArgs0),
+    Counters0 = lists:map(
+        fun(#shard{} = Shard) ->
+            After = maps:get(Shard#shard.range, Bookmark, null),
+            Ref = rexi:cast(
+                Shard#shard.node,
+                {nouveau_rpc, search, [Shard#shard.name, Index, QueryArgs1#{'after' => After}]}
+            ),
+            Shard#shard{ref = Ref}
+        end,
+        Shards
+    ),
+    Counters = fabric_dict:init(Counters0, nil),
+    Workers = fabric_dict:fetch_keys(Counters),
+    RexiMon = fabric_util:create_monitors(Workers),
+    State = #state{
+        limit = maps:get(limit, QueryArgs0),
+        sort = maps:get(sort, QueryArgs0),
+        counters = Counters,
+        search_results = #{}
+    },
+    try
+        rexi_utils:recv(Workers, #shard.ref, fun handle_message/3, State, infinity, 1000 * 60 * 60)

Review Comment:
   Use a macro at least indicating what 1000*60*60 is?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] nickva commented on a diff in pull request #4291: Import nouveau

Posted by "nickva (via GitHub)" <gi...@apache.org>.
nickva commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1170848858


##########
src/nouveau/src/nouveau_fabric_search.erl:
##########
@@ -0,0 +1,213 @@
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+
+%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*-
+
+-module(nouveau_fabric_search).
+
+-export([go/4]).
+
+-include_lib("mem3/include/mem3.hrl").
+-include_lib("couch/include/couch_db.hrl").
+
+-record(state, {
+    limit,
+    sort,
+    counters,
+    search_results
+}).
+
+go(DbName, GroupId, IndexName, QueryArgs0) when is_binary(GroupId) ->
+    {ok, DDoc} = fabric:open_doc(
+        DbName,
+        <<"_design/", GroupId/binary>>,
+        [ejson_body]
+    ),
+    go(DbName, DDoc, IndexName, QueryArgs0);
+go(DbName, #doc{} = DDoc, IndexName, QueryArgs0) ->
+    {ok, Index} = nouveau_util:design_doc_to_index(DbName, DDoc, IndexName),
+    Shards = mem3:shards(DbName),
+    Bookmark = nouveau_bookmark:unpack(DbName, maps:get(bookmark, QueryArgs0)),

Review Comment:
   If we're getting the bookmark with maps:get/2 we assume it's always going to be present. That's should be the case here?
   
   Since we intend on removing, then `https://www.erlang.org/doc/man/maps.html#take-2` might work here instead of get and remove.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] nickva commented on a diff in pull request #4291: Import nouveau

Posted by "nickva (via GitHub)" <gi...@apache.org>.
nickva commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1170848858


##########
src/nouveau/src/nouveau_fabric_search.erl:
##########
@@ -0,0 +1,213 @@
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+
+%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*-
+
+-module(nouveau_fabric_search).
+
+-export([go/4]).
+
+-include_lib("mem3/include/mem3.hrl").
+-include_lib("couch/include/couch_db.hrl").
+
+-record(state, {
+    limit,
+    sort,
+    counters,
+    search_results
+}).
+
+go(DbName, GroupId, IndexName, QueryArgs0) when is_binary(GroupId) ->
+    {ok, DDoc} = fabric:open_doc(
+        DbName,
+        <<"_design/", GroupId/binary>>,
+        [ejson_body]
+    ),
+    go(DbName, DDoc, IndexName, QueryArgs0);
+go(DbName, #doc{} = DDoc, IndexName, QueryArgs0) ->
+    {ok, Index} = nouveau_util:design_doc_to_index(DbName, DDoc, IndexName),
+    Shards = mem3:shards(DbName),
+    Bookmark = nouveau_bookmark:unpack(DbName, maps:get(bookmark, QueryArgs0)),

Review Comment:
   If we're getting the bookmark with `maps:get/2` we assume it's always going to be present. That's should be the case here?
   
   Also [maps:take/2](https://www.erlang.org/doc/man/maps.html#take-2) might work here for getting and removing at the same time, instead of a `get` followed by a `remove`?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] nickva commented on a diff in pull request #4291: Import nouveau

Posted by "nickva (via GitHub)" <gi...@apache.org>.
nickva commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1170790578


##########
nouveau/TODO:
##########
@@ -0,0 +1,27 @@
+targeted dreyfus feature parity
+
+* pagination (bookmark I guess)
+* grouping
+* faceting
+* partitioned db support
+* ken integration

Review Comment:
   This seems to be at odds with the README which says ken integration is supported.
   
   What about just making the TODO a part of README?. It's developer documentation anyway, users would use the official CouchDB docs most likely.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] nickva commented on a diff in pull request #4291: Import nouveau

Posted by "nickva (via GitHub)" <gi...@apache.org>.
nickva commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1170797708


##########
nouveau/src/main/java/org/apache/couchdb/nouveau/api/Range.java:
##########
@@ -0,0 +1,145 @@
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package org.apache.couchdb.nouveau.api;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.databind.PropertyNamingStrategies;
+import com.fasterxml.jackson.databind.annotation.JsonNaming;
+
+
+import jakarta.validation.constraints.NotEmpty;
+import jakarta.validation.constraints.NotNull;
+
+@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class)
+public class Range<T> {
+
+    @NotEmpty
+    private String label;
+
+    @NotNull
+    private T min;
+
+    private boolean minInclusive = true;
+
+    @NotNull
+    private T max;
+
+    private boolean maxInclusive = true;
+
+    public Range() {
+    }
+
+    public Range(String label, T min, boolean minInclusive, T max, boolean maxInclusive) {
+        this.label = label;
+        this.min = min;
+        this.minInclusive = minInclusive;
+        this.max = max;
+        this.maxInclusive = maxInclusive;
+    }
+
+    @JsonProperty
+    public String getLabel() {
+        return label;
+    }
+
+    public void setLabel(String label) {
+        this.label = label;
+    }
+
+    @JsonProperty
+    public T getMin() {
+        return min;
+    }
+
+    public void setMin(T min) {
+        this.min = min;
+    }
+
+    @JsonProperty("min_inclusive")
+    public boolean isMinInclusive() {
+        return minInclusive;
+    }
+
+    public void setMinInclusive(boolean minInclusive) {
+        this.minInclusive = minInclusive;
+    }
+
+    @JsonProperty
+    public T getMax() {
+        return max;
+    }
+
+    public void setMax(T max) {
+        this.max = max;
+    }
+
+    @JsonProperty("max_inclusive")
+    public boolean isMaxInclusive() {
+        return maxInclusive;
+    }
+
+    public void setMaxInclusive(boolean maxInclusive) {
+        this.maxInclusive = maxInclusive;
+    }
+
+    @Override
+    public int hashCode() {
+        final int prime = 31;
+        int result = 1;
+        result = prime * result + ((label == null) ? 0 : label.hashCode());
+        result = prime * result + ((min == null) ? 0 : min.hashCode());
+        result = prime * result + (minInclusive ? 1231 : 1237);

Review Comment:
   What's the intuition behind using 1231 and 1237 as opposed to 3 and 5 for example?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] nickva commented on a diff in pull request #4291: Import nouveau

Posted by "nickva (via GitHub)" <gi...@apache.org>.
nickva commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1170852313


##########
src/nouveau/src/nouveau_fabric_search.erl:
##########
@@ -0,0 +1,213 @@
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+
+%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*-
+
+-module(nouveau_fabric_search).
+
+-export([go/4]).
+
+-include_lib("mem3/include/mem3.hrl").
+-include_lib("couch/include/couch_db.hrl").
+
+-record(state, {
+    limit,
+    sort,
+    counters,
+    search_results
+}).
+
+go(DbName, GroupId, IndexName, QueryArgs0) when is_binary(GroupId) ->
+    {ok, DDoc} = fabric:open_doc(
+        DbName,
+        <<"_design/", GroupId/binary>>,
+        [ejson_body]
+    ),
+    go(DbName, DDoc, IndexName, QueryArgs0);
+go(DbName, #doc{} = DDoc, IndexName, QueryArgs0) ->
+    {ok, Index} = nouveau_util:design_doc_to_index(DbName, DDoc, IndexName),
+    Shards = mem3:shards(DbName),
+    Bookmark = nouveau_bookmark:unpack(DbName, maps:get(bookmark, QueryArgs0)),
+    QueryArgs1 = maps:remove(bookmark, QueryArgs0),
+    Counters0 = lists:map(
+        fun(#shard{} = Shard) ->
+            After = maps:get(Shard#shard.range, Bookmark, null),
+            Ref = rexi:cast(
+                Shard#shard.node,
+                {nouveau_rpc, search, [Shard#shard.name, Index, QueryArgs1#{'after' => After}]}
+            ),
+            Shard#shard{ref = Ref}
+        end,
+        Shards
+    ),
+    Counters = fabric_dict:init(Counters0, nil),
+    Workers = fabric_dict:fetch_keys(Counters),
+    RexiMon = fabric_util:create_monitors(Workers),
+    State = #state{
+        limit = maps:get(limit, QueryArgs0),
+        sort = maps:get(sort, QueryArgs0),
+        counters = Counters,
+        search_results = #{}
+    },
+    try
+        rexi_utils:recv(Workers, #shard.ref, fun handle_message/3, State, infinity, 1000 * 60 * 60)

Review Comment:
   Use a macro at least indicating what `1000*60*60` refers to? One of those is the global timeout, one is the per-messages but I never remember which is which.



##########
src/nouveau/src/nouveau_fabric_search.erl:
##########
@@ -0,0 +1,213 @@
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+
+%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*-
+
+-module(nouveau_fabric_search).
+
+-export([go/4]).
+
+-include_lib("mem3/include/mem3.hrl").
+-include_lib("couch/include/couch_db.hrl").
+
+-record(state, {
+    limit,
+    sort,
+    counters,
+    search_results
+}).
+
+go(DbName, GroupId, IndexName, QueryArgs0) when is_binary(GroupId) ->
+    {ok, DDoc} = fabric:open_doc(
+        DbName,
+        <<"_design/", GroupId/binary>>,
+        [ejson_body]
+    ),
+    go(DbName, DDoc, IndexName, QueryArgs0);
+go(DbName, #doc{} = DDoc, IndexName, QueryArgs0) ->
+    {ok, Index} = nouveau_util:design_doc_to_index(DbName, DDoc, IndexName),
+    Shards = mem3:shards(DbName),
+    Bookmark = nouveau_bookmark:unpack(DbName, maps:get(bookmark, QueryArgs0)),
+    QueryArgs1 = maps:remove(bookmark, QueryArgs0),
+    Counters0 = lists:map(
+        fun(#shard{} = Shard) ->
+            After = maps:get(Shard#shard.range, Bookmark, null),
+            Ref = rexi:cast(
+                Shard#shard.node,
+                {nouveau_rpc, search, [Shard#shard.name, Index, QueryArgs1#{'after' => After}]}
+            ),
+            Shard#shard{ref = Ref}
+        end,
+        Shards
+    ),
+    Counters = fabric_dict:init(Counters0, nil),
+    Workers = fabric_dict:fetch_keys(Counters),
+    RexiMon = fabric_util:create_monitors(Workers),
+    State = #state{
+        limit = maps:get(limit, QueryArgs0),
+        sort = maps:get(sort, QueryArgs0),
+        counters = Counters,
+        search_results = #{}
+    },
+    try
+        rexi_utils:recv(Workers, #shard.ref, fun handle_message/3, State, infinity, 1000 * 60 * 60)

Review Comment:
   Use a macro at least indicating what `1000*60*60` refers to? One of those is the global timeout, one is the per-message but I never remember which is which.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] jaydoane commented on a diff in pull request #4291: Import nouveau

Posted by "jaydoane (via GitHub)" <gi...@apache.org>.
jaydoane commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1153745291


##########
configure:
##########
@@ -121,6 +129,7 @@ parse_opts() {
             --dev)
                 WITH_DOCS=0
                 WITH_FAUXTON=0
+                WITH_NOUVEAU=1

Review Comment:
   Are you sure this should default to on? It seems like maybe the least surprising thing would be to require explicitly enabling it with `--enable-nouveau`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] rnewson commented on a diff in pull request #4291: Import nouveau

Posted by "rnewson (via GitHub)" <gi...@apache.org>.
rnewson commented on code in PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#discussion_r1173547536


##########
src/mango/src/mango_idx_nouveau.erl:
##########
@@ -0,0 +1,459 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(mango_idx_nouveau).
+
+-export([
+    validate_new/2,
+    validate_fields/1,
+    validate_index_def/1,
+    add/2,
+    remove/2,
+    from_ddoc/1,
+    to_json/1,
+    columns/1,
+    is_usable/3,
+    get_default_field_options/1
+]).
+
+-include_lib("couch/include/couch_db.hrl").
+-include("mango.hrl").
+-include("mango_idx.hrl").
+
+validate_new(#idx{} = Idx, Db) ->
+    {ok, Def} = do_validate(Idx#idx.def),
+    maybe_reject_index_all_req(Def, Db),
+    {ok, Idx#idx{def = Def}}.
+
+validate_index_def(IndexInfo) ->
+    do_validate(IndexInfo).
+
+add(#doc{body = {Props0}} = DDoc, Idx) ->
+    Texts1 =
+        case proplists:get_value(<<"nouveau">>, Props0) of
+            {Texts0} -> Texts0;
+            _ -> []
+        end,
+    NewText = make_text(Idx),
+    Texts2 = lists:keystore(element(1, NewText), 1, Texts1, NewText),
+    Props1 = lists:keystore(<<"nouveau">>, 1, Props0, {<<"nouveau">>, {Texts2}}),
+    {ok, DDoc#doc{body = {Props1}}}.
+
+remove(#doc{body = {Props0}} = DDoc, Idx) ->
+    Texts1 =
+        case proplists:get_value(<<"nouveau">>, Props0) of
+            {Texts0} ->
+                Texts0;
+            _ ->
+                ?MANGO_ERROR({index_not_found, Idx#idx.name})
+        end,
+    Texts2 = lists:keydelete(Idx#idx.name, 1, Texts1),
+    if
+        Texts2 /= Texts1 -> ok;
+        true -> ?MANGO_ERROR({index_not_found, Idx#idx.name})
+    end,
+    Props1 =
+        case Texts2 of
+            [] ->
+                lists:keydelete(<<"nouveau">>, 1, Props0);
+            _ ->
+                lists:keystore(<<"nouveau">>, 1, Props0, {<<"nouveau">>, {Texts2}})
+        end,
+    {ok, DDoc#doc{body = {Props1}}}.
+
+from_ddoc({Props}) ->
+    case lists:keyfind(<<"nouveau">>, 1, Props) of
+        {<<"nouveau">>, {Texts}} when is_list(Texts) ->
+            lists:flatmap(
+                fun({Name, {VProps}}) ->
+                    case validate_ddoc(VProps) of
+                        invalid_ddoc ->
+                            [];
+                        Def ->
+                            I = #idx{
+                                type = <<"nouveau">>,
+                                name = Name,
+                                def = Def
+                            },
+                            [I]
+                    end
+                end,
+                Texts
+            );
+        _ ->
+            []
+    end.
+
+to_json(Idx) ->
+    {[
+        {ddoc, Idx#idx.ddoc},
+        {name, Idx#idx.name},
+        {type, Idx#idx.type},
+        {partitioned, Idx#idx.partitioned},
+        {def, {def_to_json(Idx#idx.def)}}
+    ]}.
+
+columns(Idx) ->
+    {Props} = Idx#idx.def,
+    {<<"fields">>, Fields} = lists:keyfind(<<"fields">>, 1, Props),
+    case Fields of
+        <<"all_fields">> ->
+            all_fields;
+        _ ->
+            {DFProps} = couch_util:get_value(<<"default_field">>, Props, {[]}),
+            Enabled = couch_util:get_value(<<"enabled">>, DFProps, true),
+            Default =
+                case Enabled of
+                    true -> [<<"$default">>];
+                    false -> []
+                end,
+            Default ++
+                lists:map(
+                    fun({FProps}) ->
+                        {_, Name} = lists:keyfind(<<"name">>, 1, FProps),
+                        {_, Type} = lists:keyfind(<<"type">>, 1, FProps),
+                        iolist_to_binary([Name, ":", Type])
+                    end,
+                    Fields
+                )
+    end.
+
+is_usable(_, Selector, _) when Selector =:= {[]} ->
+    false;
+is_usable(Idx, Selector, _) ->
+    case columns(Idx) of
+        all_fields ->
+            true;
+        Cols ->
+            Fields = indexable_fields(Selector),
+            sets:is_subset(sets:from_list(Fields), sets:from_list(Cols))
+    end.
+
+do_validate({Props}) ->
+    {ok, Opts} = mango_opts:validate(Props, opts()),
+    {ok, {Opts}};
+do_validate(Else) ->
+    ?MANGO_ERROR({invalid_index_text, Else}).
+
+def_to_json({Props}) ->
+    def_to_json(Props);
+def_to_json([]) ->
+    [];
+def_to_json([{<<"fields">>, <<"all_fields">>} | Rest]) ->
+    [{<<"fields">>, []} | def_to_json(Rest)];
+def_to_json([{fields, Fields} | Rest]) ->
+    [{<<"fields">>, fields_to_json(Fields)} | def_to_json(Rest)];
+def_to_json([{<<"fields">>, Fields} | Rest]) ->
+    [{<<"fields">>, fields_to_json(Fields)} | def_to_json(Rest)];
+% Don't include partial_filter_selector in the json conversion
+% if its the default value
+def_to_json([{<<"partial_filter_selector">>, {[]}} | Rest]) ->
+    def_to_json(Rest);
+def_to_json([{Key, Value} | Rest]) ->
+    [{Key, Value} | def_to_json(Rest)].
+
+fields_to_json([]) ->
+    [];
+fields_to_json([{[{<<"name">>, Name}, {<<"type">>, Type0}]} | Rest]) ->
+    ok = validate_field_name(Name),
+    Type = validate_field_type(Type0),
+    [{[{Name, Type}]} | fields_to_json(Rest)];
+fields_to_json([{[{<<"type">>, Type0}, {<<"name">>, Name}]} | Rest]) ->
+    ok = validate_field_name(Name),
+    Type = validate_field_type(Type0),
+    [{[{Name, Type}]} | fields_to_json(Rest)].
+
+%% In the future, we can possibly add more restrictive validation.
+%% For now, let's make sure the field name is not blank.
+validate_field_name(<<"">>) ->
+    throw(invalid_field_name);
+validate_field_name(Else) when is_binary(Else) ->
+    ok;
+validate_field_name(_) ->
+    throw(invalid_field_name).
+
+validate_field_type(<<"string">>) ->
+    <<"string">>;
+validate_field_type(<<"number">>) ->
+    <<"number">>;
+validate_field_type(<<"boolean">>) ->
+    <<"boolean">>.
+
+validate_fields(<<"all_fields">>) ->
+    {ok, all_fields};
+validate_fields(Fields) ->
+    try fields_to_json(Fields) of
+        _ ->
+            mango_fields:new(Fields)
+    catch
+        error:function_clause ->
+            ?MANGO_ERROR({invalid_index_fields_definition, Fields});
+        throw:invalid_field_name ->
+            ?MANGO_ERROR({invalid_index_fields_definition, Fields})
+    end.
+
+validate_ddoc(VProps) ->
+    try
+        Def = proplists:get_value(<<"index">>, VProps),
+        validate_index_def(Def),
+        Def
+    catch
+        Error:Reason ->
+            couch_log:error(
+                "Invalid Index Def ~p: Error. ~p, Reason: ~p",
+                [VProps, Error, Reason]
+            ),
+            invalid_ddoc
+    end.
+
+opts() ->
+    [
+        {<<"default_analyzer">>, [
+            {tag, default_analyzer},
+            {optional, true},
+            {default, <<"keyword">>}
+        ]},
+        {<<"default_field">>, [
+            {tag, default_field},
+            {optional, true},
+            {default, {[]}}
+        ]},
+        {<<"partial_filter_selector">>, [
+            {tag, partial_filter_selector},
+            {optional, true},
+            {default, {[]}},
+            {validator, fun mango_opts:validate_selector/1}
+        ]},
+        {<<"selector">>, [
+            {tag, selector},
+            {optional, true},
+            {default, {[]}},
+            {validator, fun mango_opts:validate_selector/1}
+        ]},
+        {<<"fields">>, [
+            {tag, fields},
+            {optional, true},
+            {default, []},
+            {validator, fun ?MODULE:validate_fields/1}
+        ]},
+        {<<"index_array_lengths">>, [
+            {tag, index_array_lengths},
+            {optional, true},
+            {default, true},
+            {validator, fun mango_opts:is_boolean/1}
+        ]}
+    ].
+
+make_text(Idx) ->
+    Text =
+        {[
+            {<<"index">>, Idx#idx.def},
+            {<<"analyzer">>, construct_analyzer(Idx#idx.def)}
+        ]},
+    {Idx#idx.name, Text}.
+
+get_default_field_options(Props) ->
+    Default = couch_util:get_value(default_field, Props, {[]}),
+    case Default of
+        Bool when is_boolean(Bool) ->
+            {Bool, <<"standard">>};
+        {[]} ->
+            {true, <<"standard">>};
+        {Opts} ->
+            Enabled = couch_util:get_value(<<"enabled">>, Opts, true),
+            Analyzer = couch_util:get_value(
+                <<"analyzer">>,
+                Opts,
+                <<"standard">>
+            ),
+            {Enabled, Analyzer}
+    end.
+
+construct_analyzer({Props}) ->
+    DefaultAnalyzer = couch_util:get_value(
+        default_analyzer,
+        Props,
+        <<"keyword">>
+    ),
+    {DefaultField, DefaultFieldAnalyzer} = get_default_field_options(Props),
+    DefaultAnalyzerDef =
+        case DefaultField of
+            true ->
+                [{<<"$default">>, DefaultFieldAnalyzer}];
+            _ ->
+                []
+        end,
+    case DefaultAnalyzerDef of
+        [] ->
+            <<"keyword">>;
+        _ ->
+            {[
+                {<<"name">>, <<"perfield">>},
+                {<<"default">>, DefaultAnalyzer},
+                {<<"fields">>, {DefaultAnalyzerDef}}
+            ]}
+    end.
+
+indexable_fields(Selector) ->
+    TupleTree = mango_selector_text:convert([], Selector),
+    indexable_fields([], TupleTree).
+
+indexable_fields(Fields, {op_and, Args}) when is_list(Args) ->
+    lists:foldl(
+        fun(Arg, Fields0) -> indexable_fields(Fields0, Arg) end,
+        Fields,
+        Args
+    );
+%% For queries that use array element access or $in operations, two
+%% fields get generated by mango_selector_text:convert. At index
+%% definition time, only one field gets defined. In this situation, we
+%% remove the extra generated field so that the index can be used. For
+%% all other situations, we include the fields as normal.
+indexable_fields(
+    Fields,
+    {op_or, [
+        {op_field, Field0},
+        {op_field, {[Name | _], _}} = Field1
+    ]}
+) ->
+    case lists:member(<<"[]">>, Name) of
+        true ->
+            indexable_fields(Fields, {op_field, Field0});
+        false ->
+            Fields1 = indexable_fields(Fields, {op_field, Field0}),
+            indexable_fields(Fields1, Field1)
+    end;
+indexable_fields(Fields, {op_or, Args}) when is_list(Args) ->
+    lists:foldl(
+        fun(Arg, Fields0) -> indexable_fields(Fields0, Arg) end,
+        Fields,
+        Args
+    );
+indexable_fields(Fields, {op_not, {ExistsQuery, Arg}}) when is_tuple(Arg) ->
+    Fields0 = indexable_fields(Fields, ExistsQuery),
+    indexable_fields(Fields0, Arg);
+% forces "$exists" : false to use _all_docs
+indexable_fields(_, {op_not, {_, false}}) ->
+    [];
+indexable_fields(Fields, {op_insert, Arg}) when is_binary(Arg) ->
+    Fields;
+%% fieldname.[]:length is not a user defined field.
+indexable_fields(Fields, {op_field, {[_, <<":length">>], _}}) ->
+    Fields;
+indexable_fields(Fields, {op_field, {Name, _}}) ->
+    [iolist_to_binary(Name) | Fields];
+%% In this particular case, the lucene index is doing a field_exists query
+%% so it is looking at all sorts of combinations of field:* and field.*
+%% We don't add the field because we cannot pre-determine what field will exist.
+%% Hence we just return Fields and make it less restrictive.
+indexable_fields(Fields, {op_fieldname, {_, _}}) ->
+    Fields;
+%% Similar idea to op_fieldname but with fieldname:null
+indexable_fields(Fields, {op_null, {_, _}}) ->
+    Fields;
+indexable_fields(Fields, {op_default, _}) ->
+    [<<"$default">> | Fields].
+
+maybe_reject_index_all_req({Def}, Db) ->
+    DbName = couch_db:name(Db),
+    #user_ctx{name = User} = couch_db:get_user_ctx(Db),
+    Fields = couch_util:get_value(fields, Def),
+    case {Fields, forbid_index_all()} of
+        {all_fields, "true"} ->
+            ?MANGO_ERROR(index_all_disabled);
+        {all_fields, "warn"} ->
+            couch_log:warning(
+                "User ~p is indexing all fields in db ~p",
+                [User, DbName]
+            );
+        _ ->
+            ok
+    end.
+
+forbid_index_all() ->
+    config:get("mango", "index_all_disabled", "false").
+
+-ifdef(TEST).
+-include_lib("eunit/include/eunit.hrl").
+
+setup_all() ->
+    Ctx = test_util:start_couch(),
+    meck:expect(
+        couch_log,
+        warning,
+        2,
+        fun(_, _) ->
+            throw({test_error, logged_warning})
+        end
+    ),
+    Ctx.
+
+teardown_all(Ctx) ->
+    meck:unload(),
+    test_util:stop_couch(Ctx).
+
+setup() ->
+    %default index all def that generates {fields, all_fields}
+    Index = #idx{def = {[]}},
+    DbName = <<"testdb">>,
+    UserCtx = #user_ctx{name = <<"u1">>},
+    {ok, Db} = couch_db:clustered_db(DbName, UserCtx),
+    {Index, Db}.
+
+teardown(_) ->
+    ok.
+
+index_all_test_() ->
+    {
+        setup,
+        fun setup_all/0,
+        fun teardown_all/1,
+        {
+            foreach,
+            fun setup/0,
+            fun teardown/1,
+            [
+                fun forbid_index_all/1,
+                fun default_and_false_index_all/1,
+                fun warn_index_all/1
+            ]
+        }
+    }.
+
+forbid_index_all({Idx, Db}) ->
+    ?_test(begin

Review Comment:
   hm, these tests were copied from the idx_text, they're probably not useful in context.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [couchdb] rnewson commented on pull request #4291: Import nouveau

Posted by "rnewson (via GitHub)" <gi...@apache.org>.
rnewson commented on PR #4291:
URL: https://github.com/apache/couchdb/pull/4291#issuecomment-1517689907

   @nickva thank you for the detailed review, I've pushed a commit with fixes for pretty much all of it. It helped find a silly error in _nouveau_analyze (the core of it worked, I somehow mangled the io_lib bit at some point).


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@couchdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org