You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by wi...@apache.org on 2023/10/26 13:30:09 UTC
[couchdb] 01/01: Fix lucene support
This is an automated email from the ASF dual-hosted git repository.
willholley pushed a commit to branch mango-beginswith
in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit cdfb26d8d891f995ca739f448b60f8ef5ab760cb
Author: Will Holley <wi...@uk.ibm.com>
AuthorDate: Thu Oct 26 12:42:02 2023 +0000
Fix lucene support
---
src/docs/src/api/database/find.rst | 136 ++++++++++++++++++----------------
src/mango/src/mango_selector_text.erl | 3 +-
src/mango/test/03-operator-test.py | 41 +++++++---
3 files changed, 103 insertions(+), 77 deletions(-)
diff --git a/src/docs/src/api/database/find.rst b/src/docs/src/api/database/find.rst
index d25350708..153aa9a09 100644
--- a/src/docs/src/api/database/find.rst
+++ b/src/docs/src/api/database/find.rst
@@ -673,68 +673,74 @@ In addition, some 'meta' condition operators are available. Some condition
operators accept any valid JSON content as the argument. Other condition
operators require the argument to be in a specific JSON format.
-+---------------+-------------+------------+-----------------------------------+
-| Operator type | Operator | Argument | Purpose |
-+===============+=============+============+===================================+
-| (In)equality | ``$lt`` | Any JSON | The field is less than the |
-| | | | argument. |
-+---------------+-------------+------------+-----------------------------------+
-| | ``$lte`` | Any JSON | The field is less than or equal to|
-| | | | the argument. |
-+---------------+-------------+------------+-----------------------------------+
-| | ``$eq`` | Any JSON | The field is equal to the argument|
-+---------------+-------------+------------+-----------------------------------+
-| | ``$ne`` | Any JSON | The field is not equal to the |
-| | | | argument. |
-+---------------+-------------+------------+-----------------------------------+
-| | ``$gte`` | Any JSON | The field is greater than or equal|
-| | | | to the argument. |
-+---------------+-------------+------------+-----------------------------------+
-| | ``$gt`` | Any JSON | The field is greater than the |
-| | | | to the argument. |
-+---------------+-------------+------------+-----------------------------------+
-| Object | ``$exists`` | Boolean | Check whether the field exists or |
-| | | | not, regardless of its value. |
-+---------------+-------------+------------+-----------------------------------+
-| | ``$type`` | String | Check the document field's type. |
-| | | | Valid values are ``"null"``, |
-| | | | ``"boolean"``, ``"number"``, |
-| | | | ``"string"``, ``"array"``, and |
-| | | | ``"object"``. |
-+---------------+-------------+------------+-----------------------------------+
-| Array | ``$in`` | Array of | The document field must exist in |
-| | | JSON values| the list provided. |
-+---------------+-------------+------------+-----------------------------------+
-| | ``$nin`` | Array of | The document field not must exist |
-| | | JSON values| in the list provided. |
-+---------------+-------------+------------+-----------------------------------+
-| | ``$size`` | Integer | Special condition to match the |
-| | | | length of an array field in a |
-| | | | document. Non-array fields cannot |
-| | | | match this condition. |
-+---------------+-------------+------------+-----------------------------------+
-| Miscellaneous | ``$mod`` | [Divisor, | Divisor is a non-zero integer, |
-| | | Remainder] | Remainder is any integer. |
-| | | | Non-integer values result in a |
-| | | | 404. Matches documents where |
-| | | | ``field % Divisor == Remainder`` |
-| | | | is true, and only when the |
-| | | | document field is an integer. |
-+---------------+-------------+------------+-----------------------------------+
-| | ``$regex`` | String | A regular expression pattern to |
-| | | | match against the document field. |
-| | | | Only matches when the field is a |
-| | | | string value and matches the |
-| | | | supplied regular expression. The |
-| | | | matching algorithms are based on |
-| | | | the Perl Compatible Regular |
-| | | | Expression (PCRE) library. For |
-| | | | more information about what is |
-| | | | implemented, see the see the |
-| | | | `Erlang Regular Expression |
-| | | | <http://erlang.org/doc |
-| | | | /man/re.html>`_. |
-+---------------+-------------+------------+-----------------------------------+
++---------------+-----------------+-------------+------------------------------------+
+| Operator type | Operator | Argument | Purpose |
++===============+=================+=============+====================================+
+| (In)equality | ``$lt`` | Any JSON | The field is less than the |
+| | | | argument. |
++---------------+-----------------+-------------+------------------------------------+
+| | ``$lte`` | Any JSON | The field is less than or equal to |
+| | | | the argument. |
++---------------+-----------------+-------------+------------------------------------+
+| | ``$eq`` | Any JSON | The field is equal to the argument |
++---------------+-----------------+-------------+------------------------------------+
+| | ``$ne`` | Any JSON | The field is not equal to the |
+| | | | argument. |
++---------------+-----------------+-------------+------------------------------------+
+| | ``$gte`` | Any JSON | The field is greater than or equal |
+| | | | to the argument. |
++---------------+-----------------+-------------+------------------------------------+
+| | ``$gt`` | Any JSON | The field is greater than the |
+| | | | to the argument. |
++---------------+-----------------+-------------+------------------------------------+
+| Object | ``$exists`` | Boolean | Check whether the field exists or |
+| | | | not, regardless of its value. |
++---------------+-----------------+-------------+------------------------------------+
+| | ``$type`` | String | Check the document field's type. |
+| | | | Valid values are ``"null"``, |
+| | | | ``"boolean"``, ``"number"``, |
+| | | | ``"string"``, ``"array"``, and |
+| | | | ``"object"``. |
++---------------+-----------------+-------------+------------------------------------+
+| Array | ``$in`` | Array of | The document field must exist in |
+| | | JSON values | the list provided. |
++---------------+-----------------+-------------+------------------------------------+
+| | ``$nin`` | Array of | The document field not must exist |
+| | | JSON values | in the list provided. |
++---------------+-----------------+-------------+------------------------------------+
+| | ``$size`` | Integer | Special condition to match the |
+| | | | length of an array field in a |
+| | | | document. Non-array fields cannot |
+| | | | match this condition. |
++---------------+-----------------+-------------+------------------------------------+
+| Miscellaneous | ``$mod`` | [Divisor, | Divisor is a non-zero integer, |
+| | | Remainder] | Remainder is any integer. |
+| | | | Non-integer values result in a |
+| | | | 404. Matches documents where |
+| | | | ``field % Divisor == Remainder`` |
+| | | | is true, and only when the |
+| | | | document field is an integer. |
++---------------+-----------------+-------------+------------------------------------+
+| | ``$regex`` | String | A regular expression pattern to |
+| | | | match against the document field. |
+| | | | Only matches when the field is a |
+| | | | string value and matches the |
+| | | | supplied regular expression. The |
+| | | | matching algorithms are based on |
+| | | | the Perl Compatible Regular |
+| | | | Expression (PCRE) library. For |
+| | | | more information about what is |
+| | | | implemented, see the see the |
+| | | | `Erlang Regular Expression |
+| | | | <http://erlang.org/doc |
+| | | | /man/re.html>`_. |
++---------------+-----------------+-------------+------------------------------------+
+| | ``$beginsWith`` | String | Matches where the document field |
+| | | | begins with the specified prefix |
+| | | | (case-sensitive). If the document |
+| | | | field contains a non-string value, |
+| | | | the document is not matched. |
++---------------+-----------------+-------------+------------------------------------+
.. warning::
Regular expressions do not work with indexes, so they should not be used to
@@ -754,8 +760,10 @@ can itself be another operator with arguments of its own. This enables us to
build up more complex selector expressions.
However, only equality operators such as ``$eq``, ``$gt``, ``$gte``, ``$lt``,
-and ``$lte`` (but not ``$ne``) can be used as the basis of a query. You should
-include at least one of these in a selector.
+``$lte`` and ``$beginsWith`` (but not ``$ne``) can be used as the basis
+of a query that can make efficient use of a ``json`` index. You should
+include at least one of these in a selector, or consider using
+a ``text`` index if more flexibility is required.
For example, if you try to perform a query that attempts to match all documents
that have a field called `afieldname` containing a value that begins with the
diff --git a/src/mango/src/mango_selector_text.erl b/src/mango/src/mango_selector_text.erl
index 4a50ff9ba..7d8f73923 100644
--- a/src/mango/src/mango_selector_text.erl
+++ b/src/mango/src/mango_selector_text.erl
@@ -143,8 +143,9 @@ convert(Path, {[{<<"$exists">>, ShouldExist}]}) ->
false -> {op_not, {FieldExists, false}}
end;
convert(Path, {[{<<"$beginsWith">>, Arg}]}) when is_binary(Arg) ->
+ Prefix = mango_util:lucene_escape_query_value(Arg),
Suffix = <<"*">>,
- PrefixSearch = value_str(<<Arg/binary, Suffix/binary>>),
+ PrefixSearch = <<Prefix/binary, Suffix/binary>>,
{op_field, {make_field(Path, Arg), PrefixSearch}};
% We're not checking the actual type here, just looking for
% anything that has a possibility of matching by checking
diff --git a/src/mango/test/03-operator-test.py b/src/mango/test/03-operator-test.py
index b43aacf5f..3b1a46565 100644
--- a/src/mango/test/03-operator-test.py
+++ b/src/mango/test/03-operator-test.py
@@ -10,12 +10,13 @@
# License for the specific language governing permissions and limitations under
# the License.
+from requests.exceptions import HTTPError
import mango
import unittest
class BaseOperatorTests:
- class Common(object):
+ class Common(unittest.TestCase):
def assertUserIds(self, user_ids, docs):
user_ids_returned = list(d["user_id"] for d in docs)
user_ids.sort()
@@ -142,20 +143,36 @@ class BaseOperatorTests:
self.assertNotIn("twitter", d)
def test_beginswith(self):
- docs = self.db.find({"location.state": {"$beginsWith": "New"}})
- self.assertEqual(len(docs), 2)
- self.assertUserIds([2, 10], docs)
+ cases = [
+ {"prefix": "New", "user_ids": [2, 10]},
+ {
+ # test escaped characters - note the space in the test string
+ "prefix": "New ",
+ "user_ids": [2, 10],
+ },
+ {
+ # non-string values in documents should not match the prefix,
+ # but should not error
+ "prefix": "Foo",
+ "user_ids": [],
+ },
+ {"prefix": " New", "user_ids": []},
+ ]
- # non-string prefixes should return an error
- def test_beginswith_invalid_prefix(self):
- docs = self.db.find({"location.state": {"$beginsWith": 123}})
- self.assertEqual(len(docs), 2)
+ for case in cases:
+ with self.subTest(prefix=case["prefix"]):
+ selector = {"location.state": {"$beginsWith": case["prefix"]}}
+ docs = self.db.find(selector)
+ self.assertEqual(len(docs), len(case["user_ids"]))
+ self.assertUserIds(case["user_ids"], docs)
- # non-string values in documents should not match the prefix,
- # but should not error
+ # non-string prefixes should return an error
def test_beginswith_invalid_prefix(self):
- docs = self.db.find({"user_id": {"$beginsWith": "Foo"}})
- self.assertEqual(len(docs), 0)
+ cases = [123, True, [], {}]
+ for prefix in cases:
+ with self.subTest(prefix=prefix):
+ with self.assertRaises(HTTPError):
+ self.db.find({"location.state": {"$beginsWith": prefix}})
class OperatorJSONTests(mango.UserDocsTests, BaseOperatorTests.Common):