You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by fo...@apache.org on 2022/09/23 07:34:03 UTC

[jackrabbit-oak] branch trunk updated: OAK-9948 - Migrate search suggestions request from RHLC to new Java Client (#711)

This is an automated email from the ASF dual-hosted git repository.

fortino pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git


The following commit(s) were added to refs/heads/trunk by this push:
     new 46c0f6de24 OAK-9948 - Migrate search suggestions request from RHLC to new Java Client (#711)
46c0f6de24 is described below

commit 46c0f6de2488f85b51f1139ff6cc2a4dd4b25311
Author: Nuno Santos <ns...@adobe.com>
AuthorDate: Fri Sep 23 09:33:58 2022 +0200

    OAK-9948 - Migrate search suggestions request from RHLC to new Java Client (#711)
    
    * Migrate Elasticsearch usage of HLRC to new Java Client in call to suggestions for spellchecking and in a test for similarity.
    
    * Force a re-run of the PR
    
    * Dummy commit to trigger unit tests.
    
    * Add a comment with the Elasticsearch Java Client GitHub issue describing the bug with the phrase suggestion query without the highlight field.
---
 .../index/elastic/query/ElasticRequestHandler.java |  7 ++++
 .../elastic/query/ElasticSpellcheckIterator.java   | 37 +++++++---------
 .../index/elastic/ElasticSimilarQueryTest.java     | 49 ++++++++++++----------
 .../elastic/index/ElasticIndexHelperTest.java      |  1 -
 4 files changed, 49 insertions(+), 45 deletions(-)

diff --git a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
index 4d50d6b662..c9c6606a29 100644
--- a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
+++ b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
@@ -445,6 +445,13 @@ public class ElasticRequestHandler {
         return PhraseSuggester.of(ps -> ps
                 .field(FieldNames.SPELLCHECK)
                 .size(10)
+                // The Elasticsearch Java client fails parsing a response to suggest queries if the highlight is not set.
+                // Caused by: co.elastic.clients.util.MissingRequiredPropertyException: Missing required property 'PhraseSuggestOption.highlighted'
+                //	at co.elastic.clients.util.ApiTypeHelper.requireNonNull(ApiTypeHelper.java:76)
+                //	at co.elastic.clients.elasticsearch.core.search.PhraseSuggestOption.<init>(PhraseSuggestOption.java:64)
+                // Happens with Elasticsearch server 8.4.2 and client 7.17.6
+                // https://github.com/elastic/elasticsearch-java/issues/404
+                .highlight(f -> f.preTag("").postTag(""))
                 .directGenerator(d -> d.field(FieldNames.SPELLCHECK).suggestMode(SuggestMode.Missing).size(10))
                 .collate(c -> c.query(q -> q.source(ElasticIndexUtils.toString(query))))
         );
diff --git a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticSpellcheckIterator.java b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticSpellcheckIterator.java
index f4d3c3d8e9..db13bcade9 100644
--- a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticSpellcheckIterator.java
+++ b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticSpellcheckIterator.java
@@ -21,13 +21,12 @@ import java.util.ArrayDeque;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.stream.Stream;
-import java.util.stream.StreamSupport;
 
+import co.elastic.clients.elasticsearch.ElasticsearchClient;
+import co.elastic.clients.elasticsearch.core.SearchResponse;
+import co.elastic.clients.elasticsearch.core.search.PhraseSuggestOption;
 import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticIndexNode;
-import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexUtils;
 import org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndex.FulltextResultRow;
-import org.elasticsearch.client.Request;
-import org.elasticsearch.client.Response;
 import org.jetbrains.annotations.NotNull;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -56,7 +55,7 @@ import co.elastic.clients.elasticsearch.core.search.Hit;
  */
 class ElasticSpellcheckIterator implements Iterator<FulltextResultRow> {
 
-    private static final  Logger LOG = LoggerFactory.getLogger(ElasticSpellcheckIterator.class);
+    private static final Logger LOG = LoggerFactory.getLogger(ElasticSpellcheckIterator.class);
     protected static final String SPELLCHECK_PREFIX = "spellcheck?term=";
 
     private static final ObjectMapper JSON_MAPPER;
@@ -137,28 +136,24 @@ class ElasticSpellcheckIterator implements Iterator<FulltextResultRow> {
 
     }
 
-    /**
-     * TODO: this query still uses the old RHLC because of this bug in the Elasticsearch Java client
-     * <a href="https://github.com/elastic/elasticsearch-java/issues/214">https://github.com/elastic/elasticsearch-java/issues/214</a>
-     * Migrate when resolved
-     */
     private Stream<String> suggestions() throws IOException {
         SearchRequest searchReq = SearchRequest.of(sr -> sr
                 .index(indexNode.getDefinition().getIndexAlias())
-                .suggest(sb -> sb.text(spellCheckQuery)
-                        .suggesters("oak:suggestion", fs -> fs.phrase(requestHandler.suggestQuery()))
+                .suggest(sb -> sb
+                        .text(spellCheckQuery)
+                        .suggesters("oak:suggestion",
+                                fs -> fs.phrase(requestHandler.suggestQuery()))
                 ));
 
-        String endpoint = "/" + String.join(",", searchReq.index()) + "/_search?filter_path=suggest";
-        Request request = new Request("POST", endpoint);
-        request.setJsonEntity(ElasticIndexUtils.toString(searchReq));
+        ElasticsearchClient esClient = indexNode.getConnection().getClient();
 
-        Response searchRes = indexNode.getConnection().getOldClient().getLowLevelClient().performRequest(request);
-        ObjectNode responseNode = JSON_MAPPER.readValue(searchRes.getEntity().getContent(), ObjectNode.class);
+        SearchResponse<ObjectNode> searchRes = esClient.search(searchReq, ObjectNode.class);
 
-        return StreamSupport
-                .stream(responseNode.get("suggest").get("oak:suggestion").spliterator(), false)
-                .flatMap(node -> StreamSupport.stream(node.get("options").spliterator(), false))
-                .map(node -> node.get("text").asText());
+        return searchRes
+                .suggest()
+                .get("oak:suggestion")
+                .stream()
+                .flatMap(node -> node.phrase().options().stream())
+                .map(PhraseSuggestOption::text);
     }
 }
diff --git a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticSimilarQueryTest.java b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticSimilarQueryTest.java
index eaaa34d3b9..e01dafe289 100644
--- a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticSimilarQueryTest.java
+++ b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticSimilarQueryTest.java
@@ -16,6 +16,11 @@
  */
 package org.apache.jackrabbit.oak.plugins.index.elastic;
 
+import co.elastic.clients.elasticsearch._types.mapping.FieldMapping;
+import co.elastic.clients.elasticsearch._types.mapping.Property;
+import co.elastic.clients.elasticsearch.indices.GetFieldMappingResponse;
+import co.elastic.clients.elasticsearch.indices.get_field_mapping.TypeFieldMappings;
+import jakarta.json.JsonObject;
 import org.apache.commons.io.IOUtils;
 import org.apache.jackrabbit.oak.api.Blob;
 import org.apache.jackrabbit.oak.api.Tree;
@@ -23,9 +28,6 @@ import org.apache.jackrabbit.oak.api.Type;
 import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
 import org.apache.jackrabbit.oak.plugins.index.search.FulltextIndexConstants;
 import org.apache.jackrabbit.oak.plugins.index.search.util.IndexDefinitionBuilder;
-import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.client.indices.GetFieldMappingsRequest;
-import org.elasticsearch.client.indices.GetFieldMappingsResponse;
 import org.junit.Test;
 
 import java.io.ByteArrayInputStream;
@@ -134,7 +136,7 @@ public class ElasticSimilarQueryTest extends ElasticAbstractQueryTest {
         String nativeQueryStringWithStopWords = "select [jcr:path] from [nt:base] where " +
                 "native('elastic-sim', 'mlt?stream.body=/test/a&mlt.fl=:path&mlt.mindf=0&mlt.mintf=0&mlt.stopwords=Hello,bye')";
 
-        String nativeQueryStringWithoutStopWords =  "select [jcr:path] from [nt:base] where " +
+        String nativeQueryStringWithoutStopWords = "select [jcr:path] from [nt:base] where " +
                 "native('elastic-sim', 'mlt?stream.body=/test/a&mlt.fl=:path&mlt.mindf=0&mlt.mintf=0&mlt.minshouldmatch=20%')";
 
         Tree test = root.getTree("/").addChild("test");
@@ -185,8 +187,8 @@ public class ElasticSimilarQueryTest extends ElasticAbstractQueryTest {
         createIndex(false);
         Tree test = root.getTree("/").addChild("test");
         Tree longPath = test.addChild("a");
-        for (int i = 0; i < 258; i ++) {
-            longPath = longPath.addChild("a"+i);
+        for (int i = 0; i < 258; i++) {
+            longPath = longPath.addChild("a" + i);
         }
         longPath.setProperty("text", "Hello World Hello World");
         test.addChild("b").setProperty("text", "Hello World");
@@ -243,22 +245,23 @@ public class ElasticSimilarQueryTest extends ElasticAbstractQueryTest {
         setIndex(indexName, builder);
         root.commit();
 
-        // TODO: migrate when https://github.com/elastic/elasticsearch-java/issues/249 gets fixed
-        String alias =  ElasticIndexNameHelper.getElasticSafeIndexName(esConnection.getIndexPrefix(), "/oak:index/" + indexName);
-        GetFieldMappingsRequest fieldMappingsRequest = new GetFieldMappingsRequest();
-        fieldMappingsRequest.indices(alias).fields(similarityFieldName1);
-        GetFieldMappingsResponse mappingsResponse = esConnection.getOldClient().indices().
-                getFieldMapping(fieldMappingsRequest, RequestOptions.DEFAULT);
-        final Map<String, Map<String, GetFieldMappingsResponse.FieldMappingMetadata>> mappings =
-                mappingsResponse.mappings();
-        assertEquals("More than one index found", 1, mappings.keySet().size());
-        @SuppressWarnings("unchecked")
-        Map<String, Object> map1 = (Map<String, Object>)(((Map<String, Object>)mappings.entrySet().iterator().next().getValue().
-                get(similarityFieldName1).sourceAsMap().get(similarityFieldName1)).get("elastiknn"));
-        assertEquals("Dense vector size doesn't match", 2048, (int)map1.get("dims"));
-        assertEquals("Similarity doesn't match", "cosine", map1.get("similarity"));
-        assertEquals("Similarity doesn't match", 10, map1.get("L"));
-        assertEquals("Similarity doesn't match", 12, map1.get("k"));
+        String alias = ElasticIndexNameHelper.getElasticSafeIndexName(esConnection.getIndexPrefix(), "/oak:index/" + indexName);
+        GetFieldMappingResponse mappingsResponse = esConnection.getClient()
+                .indices()
+                .getFieldMapping(b -> b
+                        .index(alias)
+                        .fields(similarityFieldName1)
+                );
+
+        Map<String, TypeFieldMappings> mappings = mappingsResponse.result();
+        assertEquals("More than one index found", 1, mappings.size());
+        Map<String, FieldMapping> typeFieldMappings = mappings.entrySet().iterator().next().getValue().mappings();
+        Property v = typeFieldMappings.get(similarityFieldName1).mapping().get(similarityFieldName1);
+        JsonObject map1 = v._custom().toJson().asJsonObject().get("elastiknn").asJsonObject();
+        assertEquals("Dense vector size doesn't match", 2048, map1.getInt("dims"));
+        assertEquals("Similarity doesn't match", "cosine", map1.getString("similarity"));
+        assertEquals("Similarity doesn't match", 10, map1.getInt("L"));
+        assertEquals("Similarity doesn't match", 12, map1.getInt("k"));
     }
 
     @Test
@@ -358,7 +361,7 @@ public class ElasticSimilarQueryTest extends ElasticAbstractQueryTest {
                     found.add(next);
                     resultNum++;
                 }
-                double per = (expectedList.stream().filter(found::contains).count() * 100.0)/expectedList.size();
+                double per = (expectedList.stream().filter(found::contains).count() * 100.0) / expectedList.size();
                 assertEquals("expected: " + expectedList + " got: " + found, expected, per, delta);
             });
         }
diff --git a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelperTest.java b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelperTest.java
index 5103c1bbc4..a48594f7b8 100644
--- a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelperTest.java
+++ b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelperTest.java
@@ -65,7 +65,6 @@ public class ElasticIndexHelperTest {
         IndexDefinitionBuilder.IndexRule indexRuleB = builder.indexRule("typeB");
         indexRuleB.property("foo").type("Boolean");
         NodeState nodeState = builder.build();
-
         ElasticIndexDefinition definition =
                 new ElasticIndexDefinition(nodeState, nodeState, "path", "prefix");