You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by fo...@apache.org on 2022/02/11 09:12:30 UTC
[jackrabbit-oak] branch trunk updated: OAK-9691 Improve fulltext query syntax support for ElasticSearch (#489)
This is an automated email from the ASF dual-hosted git repository.
fortino pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
The following commit(s) were added to refs/heads/trunk by this push:
new 3fffce6 OAK-9691 Improve fulltext query syntax support for ElasticSearch (#489)
3fffce6 is described below
commit 3fffce60331a922084837e9924c139d7eb5496af
Author: Jun Zhang <zj...@adobe.com>
AuthorDate: Fri Feb 11 01:12:22 2022 -0800
OAK-9691 Improve fulltext query syntax support for ElasticSearch (#489)
---
.../lucene/dynamicBoost/DynamicBoostTest.java | 29 ++------
.../index/elastic/query/ElasticRequestHandler.java | 86 ++++++++--------------
.../index/search/spi/query/FulltextIndex.java | 2 +-
.../oak/plugins/index/IndexQueryCommonTest.java | 41 +++++++++++
4 files changed, 77 insertions(+), 81 deletions(-)
diff --git a/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/dynamicBoost/DynamicBoostTest.java b/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/dynamicBoost/DynamicBoostTest.java
index 77f7c0d..e0a8f56 100644
--- a/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/dynamicBoost/DynamicBoostTest.java
+++ b/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/dynamicBoost/DynamicBoostTest.java
@@ -52,7 +52,6 @@ import org.apache.jackrabbit.oak.spi.commit.Observer;
import org.apache.jackrabbit.oak.spi.mount.Mounts;
import org.apache.jackrabbit.oak.spi.query.QueryIndexProvider;
import org.apache.jackrabbit.oak.spi.security.OpenSecurityProvider;
-import org.junit.Ignore;
import org.junit.Test;
import ch.qos.logback.classic.Level;
@@ -134,27 +133,6 @@ public class DynamicBoostTest extends AbstractQueryTest {
assertEquals("[]", log);
}
- // verifying fulltext query syntax ability, put here for comparison with dynamic boost and dynamic boost lite
- @Test
- public void testQueryFullTextWithTitle() throws Exception {
- createAssetsIndexAndProperties(false, false);
- prepareTestAssets();
-
- assertQuery("//element(*, dam:Asset)[jcr:contains(., 'titleone')]", XPATH, Arrays.asList("/test/asset1"));
- assertQuery("//element(*, dam:Asset)[jcr:contains(., 'long')]", XPATH, Arrays.asList("/test/asset1", "/test/asset2"));
- // case insensitive
- assertQuery("//element(*, dam:Asset)[jcr:contains(., 'LONG')]", XPATH, Arrays.asList("/test/asset1", "/test/asset2"));
- // wildcard works
- assertQuery("//element(*, dam:Asset)[jcr:contains(., 'title*')]", XPATH,
- Arrays.asList("/test/asset1", "/test/asset2", "/test/asset3"));
- // space is treated as AND
- assertQuery("//element(*, dam:Asset)[jcr:contains(., 'long titleone')]", XPATH, Arrays.asList("/test/asset1"));
- // OR works
- assertQuery("//element(*, dam:Asset)[jcr:contains(., 'long OR titleone')]", XPATH, Arrays.asList("/test/asset1", "/test/asset2"));
- // minus works
- assertQuery("//element(*, dam:Asset)[jcr:contains(., 'long -titleone')]", XPATH, Arrays.asList("/test/asset2"));
- }
-
@Test
public void testQueryDynamicBoostBasic() throws Exception {
createAssetsIndexAndProperties(false, false);
@@ -224,7 +202,12 @@ public class DynamicBoostTest extends AbstractQueryTest {
createAssetsIndexAndProperties(true, true);
prepareTestAssets();
- assertQuery("select [jcr:path] from [dam:Asset] where contains(*, 'blu*')", SQL2, Arrays.asList("/test/asset3"));
+ assertQuery("select [jcr:path] from [dam:Asset] where contains(*, 'blu?')", SQL2, Arrays.asList("/test/asset3"));
+ assertQuery("select [jcr:path] from [dam:Asset] where contains(*, 'bl?e')", SQL2, Arrays.asList("/test/asset3"));
+ assertQuery("select [jcr:path] from [dam:Asset] where contains(*, '?lue')", SQL2, Arrays.asList("/test/asset3"));
+ assertQuery("select [jcr:path] from [dam:Asset] where contains(*, 'coff*')", SQL2, Arrays.asList("/test/asset2"));
+ assertQuery("select [jcr:path] from [dam:Asset] where contains(*, 'co*ee')", SQL2, Arrays.asList("/test/asset2"));
+ assertQuery("select [jcr:path] from [dam:Asset] where contains(*, '*ffee')", SQL2, Arrays.asList("/test/asset2"));
}
@Test
diff --git a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
index 0960d52..93bb4f7 100644
--- a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
+++ b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
@@ -16,12 +16,29 @@
*/
package org.apache.jackrabbit.oak.plugins.index.elastic.query;
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.function.BiConsumer;
+import java.util.function.BiPredicate;
+import java.util.function.Consumer;
+import java.util.stream.Stream;
+import java.util.stream.StreamSupport;
+
+import javax.jcr.PropertyType;
+
import org.apache.http.entity.ContentType;
import org.apache.http.nio.entity.NByteArrayEntity;
import org.apache.jackrabbit.oak.api.Blob;
import org.apache.jackrabbit.oak.api.PropertyState;
import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.commons.PathUtils;
+import org.apache.jackrabbit.oak.plugins.index.IndexConstants;
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticIndexDefinition;
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticPropertyDefinition;
import org.apache.jackrabbit.oak.plugins.index.elastic.query.async.facets.ElasticFacetProvider;
@@ -34,7 +51,6 @@ import org.apache.jackrabbit.oak.plugins.index.search.spi.binary.BlobByteSource;
import org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndex;
import org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndexPlanner;
import org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndexPlanner.PlanResult;
-import org.apache.jackrabbit.oak.plugins.index.IndexConstants;
import org.apache.jackrabbit.oak.spi.query.Filter;
import org.apache.jackrabbit.oak.spi.query.QueryConstants;
import org.apache.jackrabbit.oak.spi.query.QueryIndex;
@@ -48,13 +64,10 @@ import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextVisitor;
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.join.ScoreMode;
-import org.elasticsearch.common.Strings;
-import org.elasticsearch.xcontent.XContentBuilder;
-import org.elasticsearch.xcontent.json.JsonXContent;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.client.Request;
+import org.elasticsearch.common.Strings;
import org.elasticsearch.common.xcontent.XContentHelper;
-import org.elasticsearch.xcontent.XContentType;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.InnerHitBuilder;
import org.elasticsearch.index.query.MatchBoolPrefixQueryBuilder;
@@ -66,6 +79,7 @@ import org.elasticsearch.index.query.NestedQueryBuilder;
import org.elasticsearch.index.query.Operator;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
+import org.elasticsearch.index.query.QueryStringQueryBuilder;
import org.elasticsearch.index.query.WrapperQueryBuilder;
import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders;
import org.elasticsearch.search.aggregations.AggregationBuilders;
@@ -77,26 +91,14 @@ import org.elasticsearch.search.sort.SortOrder;
import org.elasticsearch.search.suggest.SuggestBuilders;
import org.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorBuilder;
import org.elasticsearch.search.suggest.phrase.PhraseSuggestionBuilder;
+import org.elasticsearch.xcontent.XContentBuilder;
+import org.elasticsearch.xcontent.XContentType;
+import org.elasticsearch.xcontent.json.JsonXContent;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import javax.jcr.PropertyType;
-import java.io.IOException;
-import java.nio.charset.Charset;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.atomic.AtomicReference;
-import java.util.function.BiConsumer;
-import java.util.function.BiPredicate;
-import java.util.function.Consumer;
-import java.util.stream.Stream;
-import java.util.stream.StreamSupport;
-
import static org.apache.jackrabbit.JcrConstants.JCR_MIXINTYPES;
import static org.apache.jackrabbit.JcrConstants.JCR_PRIMARYTYPE;
import static org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexUtils.toDoubles;
@@ -113,19 +115,17 @@ import static org.apache.jackrabbit.oak.plugins.index.elastic.util.TermQueryBuil
import static org.apache.jackrabbit.oak.spi.query.QueryConstants.JCR_PATH;
import static org.apache.jackrabbit.oak.spi.query.QueryConstants.JCR_SCORE;
import static org.apache.jackrabbit.util.ISO8601.parse;
-import static org.elasticsearch.xcontent.ToXContent.EMPTY_PARAMS;
import static org.elasticsearch.index.query.MoreLikeThisQueryBuilder.Item;
import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
import static org.elasticsearch.index.query.QueryBuilders.functionScoreQuery;
import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
import static org.elasticsearch.index.query.QueryBuilders.moreLikeThisQuery;
-import static org.elasticsearch.index.query.QueryBuilders.multiMatchQuery;
import static org.elasticsearch.index.query.QueryBuilders.nestedQuery;
import static org.elasticsearch.index.query.QueryBuilders.queryStringQuery;
-import static org.elasticsearch.index.query.QueryBuilders.simpleQueryStringQuery;
import static org.elasticsearch.index.query.QueryBuilders.termQuery;
import static org.elasticsearch.index.query.QueryBuilders.wrapperQuery;
+import static org.elasticsearch.xcontent.ToXContent.EMPTY_PARAMS;
/**
* Class to map query plans into Elastic request objects.
@@ -779,41 +779,13 @@ public class ElasticRequestHandler {
}
private static QueryBuilder fullTextQuery(String text, String fieldName, PlanResult pr) {
- // default match query are executed in OR, we need to use AND instead to avoid that
- // every document having at least one term in the `text` will match. If there are multiple
- // contains clause they will go to different match queries and will be executed in OR
- if (FieldNames.FULLTEXT.equals(fieldName) && !pr.indexingRule.getNodeScopeAnalyzedProps().isEmpty()) {
- MultiMatchQueryBuilder multiMatchQuery = multiMatchQuery(text)
- .operator(Operator.AND)
- .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS);
- pr.indexingRule.getNodeScopeAnalyzedProps().forEach(pd -> multiMatchQuery.field(pd.name, pd.boost));
- // Add the query for actual fulltext field also. That query would not be boosted
- // and could contain other parts like renditions, node name, etc
- return multiMatchQuery.field(fieldName);
- } else {
- // https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html
- // simpleQueryStringQuery does not support leading wildcards whereas it's supported by default in queryStringQuery
- // Not using queryStringQuery by default , since some functional cases break.
- // simpleQueryStringQuery is less Strict, for instance searches for terms starting with / work, whereas
- // with queryStringQuery, they throw an Exception (which ultimately results in an empty result set in oak),
- // so using simpleQueryStringQuery by default would break certain functional cases.
- // So only support this in case any term in the text String actually starts with *
- // For example *hello or Hello *world
- String[] textTerms = text.split(" ");
- boolean allowLeadingWildCard = false;
- for(String textTerm : textTerms) {
- if (textTerm.startsWith("*")) {
- allowLeadingWildCard = true;
- break;
- }
- }
-
- if (allowLeadingWildCard) {
- return queryStringQuery(text).field(fieldName).defaultOperator(Operator.AND);
- } else {
- return simpleQueryStringQuery(text).analyzeWildcard(true).field(fieldName).defaultOperator(Operator.AND);
- }
+ LOG.debug("fullTextQuery for text: '{}', fieldName: '{}'", text, fieldName);
+ QueryStringQueryBuilder queryStringQueryBuilder = queryStringQuery(FulltextIndex.rewriteQueryText(text)).defaultOperator(
+ Operator.AND).type(MultiMatchQueryBuilder.Type.CROSS_FIELDS);
+ if (FieldNames.FULLTEXT.equals(fieldName)) {
+ pr.indexingRule.getNodeScopeAnalyzedProps().forEach(pd -> queryStringQueryBuilder.field(pd.name, pd.boost));
}
+ return queryStringQueryBuilder.field(fieldName);
}
private QueryBuilder createQuery(String propertyName, Filter.PropertyRestriction pr,
diff --git a/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndex.java b/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndex.java
index 277715e..5272338 100644
--- a/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndex.java
+++ b/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndex.java
@@ -288,7 +288,7 @@ public abstract class FulltextIndex implements AdvancedQueryIndex, QueryIndex, N
/**
* Following logic is taken from org.apache.jackrabbit.core.query.lucene.JackrabbitQueryParser#parse(java.lang.String)
*/
- protected static String rewriteQueryText(String textsearch) {
+ public static String rewriteQueryText(String textsearch) {
// replace escaped ' with just '
StringBuilder rewritten = new StringBuilder();
// most query parsers recognize 'AND' and 'NOT' as
diff --git a/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/IndexQueryCommonTest.java b/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/IndexQueryCommonTest.java
index 614da4b..670ed15 100644
--- a/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/IndexQueryCommonTest.java
+++ b/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/IndexQueryCommonTest.java
@@ -16,6 +16,7 @@
*/
package org.apache.jackrabbit.oak.plugins.index;
+import org.apache.jackrabbit.oak.api.CommitFailedException;
import org.apache.jackrabbit.oak.api.PropertyState;
import org.apache.jackrabbit.oak.api.Tree;
import org.apache.jackrabbit.oak.api.Type;
@@ -527,6 +528,46 @@ public abstract class IndexQueryCommonTest extends AbstractQueryTest {
}
@Test
+ public void fullTextQueryGeneric() throws Exception {
+ Tree test = root.getTree("/").addChild("test");
+
+ Tree testNodeA = test.addChild("nodea");
+ testNodeA.setProperty("a", "hello");
+ testNodeA.setProperty("b", "ocean");
+
+ Tree testNodeB = test.addChild("nodeb");
+ testNodeB.setProperty("a", "hello world");
+ testNodeB.setProperty("b", "soccer-shoe");
+
+ Tree testNodeC = test.addChild("nodec");
+ testNodeC.setProperty("a", "hello");
+ testNodeC.setProperty("b", "world");
+ root.commit();
+
+ assertEventually(() -> {
+ // case insensitive
+ assertQuery("//*[jcr:contains(., 'WORLD')] ", XPATH, Arrays.asList("/test/nodeb", "/test/nodec"));
+
+ // wild card
+ assertQuery("//*[jcr:contains(., 'Hell*')] ", XPATH, Arrays.asList("/test/nodea", "/test/nodeb", "/test/nodec"));
+ assertQuery("//*[jcr:contains(., 'He*o')] ", XPATH, Arrays.asList("/test/nodea", "/test/nodeb", "/test/nodec"));
+ assertQuery("//*[jcr:contains(., '*llo')] ", XPATH, Arrays.asList("/test/nodea", "/test/nodeb", "/test/nodec"));
+ assertQuery("//*[jcr:contains(., '?orld')] ", XPATH, Arrays.asList("/test/nodeb", "/test/nodec"));
+ assertQuery("//*[jcr:contains(., 'wo?ld')] ", XPATH, Arrays.asList("/test/nodeb", "/test/nodec"));
+ assertQuery("//*[jcr:contains(., 'worl?')] ", XPATH, Arrays.asList("/test/nodeb", "/test/nodec"));
+
+ // space explained as AND
+ assertQuery("//*[jcr:contains(., 'hello world')] ", XPATH, Arrays.asList("/test/nodeb", "/test/nodec"));
+
+ // exclude
+ assertQuery("//*[jcr:contains(., 'hello -world')] ", XPATH, Arrays.asList("/test/nodea"));
+
+ // explicit OR
+ assertQuery("//*[jcr:contains(., 'ocean OR world')] ", XPATH, Arrays.asList("/test/nodea", "/test/nodeb", "/test/nodec"));
+ });
+ }
+
+ @Test
public void testInequalityQuery_native() throws Exception {
Tree test = root.getTree("/").addChild("test");