You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by fo...@apache.org on 2022/09/26 09:19:33 UTC

[jackrabbit-oak] branch trunk updated: OAK-9945 - Migrate index creation from Rest High Level Client to the new Java API Client (#709)

This is an automated email from the ASF dual-hosted git repository.

fortino pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git


The following commit(s) were added to refs/heads/trunk by this push:
     new 03178055c6 OAK-9945 - Migrate index creation from Rest High Level Client to the new Java API Client (#709)
03178055c6 is described below

commit 03178055c68fe7913286c8a203e6c7459ba1ef88
Author: Nuno Santos <ns...@adobe.com>
AuthorDate: Mon Sep 26 11:19:27 2022 +0200

    OAK-9945 - Migrate index creation from Rest High Level Client to the new Java API Client (#709)
    
    * Migrate index creation from using the deprecated high level REST Java Elasticsearch client to new Java client.
    
    * Add @NotNull checks.
    
    * Dummy commit to trigger unit tests.
    
    * Update oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexWriter.java
    
    Co-authored-by: Fabrizio Fortino <fa...@gmail.com>
    
    * Add test to check that the configurable index settings are property set in the index creation request.
    
    Co-authored-by: Fabrizio Fortino <fa...@gmail.com>
---
 .../index/elastic/ElasticIndexDefinition.java      |   8 +-
 .../index/elastic/index/ElasticIndexHelper.java    | 376 +++++++++------------
 .../index/elastic/index/ElasticIndexWriter.java    |  39 ++-
 .../elastic/index/ElasticIndexHelperTest.java      | 123 +++++--
 4 files changed, 290 insertions(+), 256 deletions(-)

diff --git a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java
index 9fb5c0d06d..2ce461dd00 100644
--- a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java
+++ b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java
@@ -84,10 +84,12 @@ public class ElasticIndexDefinition extends IndexDefinition {
     /**
      * Boolean property indicating if in-built analyzer should preserve original term
      */
-    private static final String INDEX_ORIGINAL_TERM = "indexOriginalTerm";
+    public static final String INDEX_ORIGINAL_TERM = "indexOriginalTerm";
 
-    private static final String SPLIT_ON_CASE_CHANGE = "splitOnCaseChange";
-    private static final String SPLIT_ON_NUMERICS = "splitOnNumerics";
+    public static final String SPLIT_ON_CASE_CHANGE = "splitOnCaseChange";
+    public static final String SPLIT_ON_NUMERICS = "splitOnNumerics";
+
+    public static final String ELASTIKNN = "elastiknn";
 
     private static final String SIMILARITY_TAGS_ENABLED = "similarityTagsEnabled";
     private static final boolean SIMILARITY_TAGS_ENABLED_DEFAULT = true;
diff --git a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java
index b9f74ddb8b..8a520d77df 100644
--- a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java
+++ b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java
@@ -16,18 +16,25 @@
  */
 package org.apache.jackrabbit.oak.plugins.index.elastic.index;
 
+import co.elastic.clients.elasticsearch._types.Time;
+import co.elastic.clients.elasticsearch._types.mapping.Property;
+import co.elastic.clients.elasticsearch._types.mapping.TypeMapping;
+import co.elastic.clients.elasticsearch.indices.CreateIndexRequest;
+import co.elastic.clients.elasticsearch.indices.IndexSettings;
+import co.elastic.clients.json.JsonData;
+import co.elastic.clients.util.ObjectBuilder;
+import jakarta.json.Json;
+import jakarta.json.JsonObject;
+import jakarta.json.JsonValue;
 import org.apache.jackrabbit.oak.api.Type;
 import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticIndexDefinition;
 import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticPropertyDefinition;
 import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
 import org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition;
 import org.elasticsearch.action.admin.indices.settings.put.UpdateSettingsRequest;
-import org.elasticsearch.client.indices.CreateIndexRequest;
 import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.xcontent.XContentBuilder;
-import org.elasticsearch.xcontent.XContentFactory;
+import org.jetbrains.annotations.NotNull;
 
-import java.io.IOException;
 import java.util.List;
 import java.util.Map;
 import java.util.stream.Collectors;
@@ -41,51 +48,65 @@ class ElasticIndexHelper {
 
     // Unset the refresh interval and disable replicas at index creation to optimize for initial loads
     // https://www.elastic.co/guide/en/elasticsearch/reference/current/tune-for-indexing-speed.html
-    private static final String INITIAL_REFRESH_INTERVAL = "-1";
-    private static final int INITIAL_NUMBER_OF_REPLICAS = 0;
+    private static final Time INITIAL_REFRESH_INTERVAL = Time.of(b -> b.time("-1"));
+    private static final String INITIAL_NUMBER_OF_REPLICAS = "0";
 
     /**
      * Returns a {@code CreateIndexRequest} with settings and mappings translated from the specified {@code ElasticIndexDefinition}.
      * The returned object can be used to create and index optimized for bulk loads (eg: reindexing) but not for queries.
      * To make it usable, a #enableIndexRequest needs to be performed.
+     *
      * @param remoteIndexName the final index name
      * @param indexDefinition the definition used to read settings/mappings
      * @return a {@code CreateIndexRequest}
-     * @throws IOException if an error happens while creating the request
-     *
-     * TODO: index create cannot be migrated to the ES Java client: it does not support custom mappings/settings needed to configure elastiknn.
-     * See discussion in https://discuss.elastic.co/t/elasticsearch-java-client-support-for-custom-mappings-settings/303172
-     * The migration will continue when this roadmap item gets fixed https://github.com/elastic/elasticsearch-java/issues/252
      */
-    public static CreateIndexRequest createIndexRequest(String remoteIndexName, ElasticIndexDefinition indexDefinition) throws IOException {
-        final CreateIndexRequest request = new CreateIndexRequest(remoteIndexName);
-
-        // provision settings
-        request.settings(loadSettings(indexDefinition));
+    public static CreateIndexRequest createIndexRequest(@NotNull String remoteIndexName,
+                                                        @NotNull ElasticIndexDefinition indexDefinition) {
+        return new CreateIndexRequest.Builder()
+                .index(remoteIndexName)
+                .settings(s -> loadSettings(s, indexDefinition))
+                .mappings(s -> loadMappings(s, indexDefinition))
+                .build();
+    }
 
-        // provision mappings
-        final XContentBuilder mappingBuilder = XContentFactory.jsonBuilder();
-        mappingBuilder.startObject();
-        {
-            mappingBuilder.startObject("properties");
-            {
-                mapInternalProperties(mappingBuilder);
-                mapIndexRules(indexDefinition, mappingBuilder);
-            }
-            mappingBuilder.endObject();
-        }
-        mappingBuilder.endObject();
-        request.mapping(mappingBuilder);
+    private static ObjectBuilder<TypeMapping> loadMappings(@NotNull TypeMapping.Builder builder,
+                                                           @NotNull ElasticIndexDefinition indexDefinition) {
+        mapInternalProperties(builder);
+        mapIndexRules(builder, indexDefinition);
+        return builder;
+    }
 
-        return request;
+    private static void mapInternalProperties(@NotNull TypeMapping.Builder builder) {
+        builder.properties(FieldNames.PATH,
+                        b1 -> b1.keyword(builder3 -> builder3))
+                .properties(FieldNames.ANCESTORS,
+                        b1 -> b1.text(
+                                b2 -> b2.analyzer("ancestor_analyzer")
+                                        .searchAnalyzer("keyword")
+                                        .searchQuoteAnalyzer("keyword")))
+                .properties(FieldNames.PATH_DEPTH,
+                        b1 -> b1.integer(
+                                b2 -> b2.docValues(false)))
+                .properties(FieldNames.FULLTEXT,
+                        b1 -> b1.text(
+                                b2 -> b2.analyzer("oak_analyzer")));
+        // TODO: the mapping below is for features currently not supported. These need to be reviewed
+        // mappingBuilder.startObject(FieldNames.NOT_NULL_PROPS)
+        //  .field("type", "keyword")
+        //  .endObject();
+        // mappingBuilder.startObject(FieldNames.NULL_PROPS)
+        // .field("type", "keyword")
+        // .endObject();
     }
 
+
     /**
      * Returns a {@code UpdateSettingsRequest} to make an index ready to be queried and updated in near real time.
+     *
      * @param remoteIndexName the final index name (no alias)
      * @param indexDefinition the definition used to read settings/mappings
      * @return an {@code UpdateSettingsRequest}
-     *
+     * <p>
      * TODO: migrate to Elasticsearch Java client when the following issue will be fixed
      * <a href="https://github.com/elastic/elasticsearch-java/issues/283">https://github.com/elastic/elasticsearch-java/issues/283</a>
      */
@@ -99,102 +120,53 @@ class ElasticIndexHelper {
         return request.settings(settingsBuilder);
     }
 
-    private static XContentBuilder loadSettings(ElasticIndexDefinition indexDefinition) throws IOException {
-        final XContentBuilder settingsBuilder = XContentFactory.jsonBuilder();
-        settingsBuilder.startObject();
+
+    private static ObjectBuilder<IndexSettings> loadSettings(@NotNull IndexSettings.Builder builder,
+                                                             @NotNull ElasticIndexDefinition indexDefinition) {
         if (indexDefinition.getSimilarityProperties().size() > 0) {
-            settingsBuilder.field("elastiknn", true);
+            builder.otherSettings(ElasticIndexDefinition.ELASTIKNN, JsonData.of(JsonValue.TRUE));
         }
-        // static setting: cannot be changed after the index gets created
-        settingsBuilder.field("index.number_of_shards", indexDefinition.numberOfShards);
-
-        // dynamic settings: see #enableIndexRequest
-        settingsBuilder.field("index.refresh_interval", INITIAL_REFRESH_INTERVAL);
-        settingsBuilder.field("index.number_of_replicas", INITIAL_NUMBER_OF_REPLICAS);
-        {
-            settingsBuilder.startObject("analysis");
-            {
-                settingsBuilder.startObject("filter");
-                {
-                    settingsBuilder.startObject("oak_word_delimiter_graph_filter");
-                    {
-                        settingsBuilder.field("type", "word_delimiter_graph");
-                        settingsBuilder.field("generate_word_parts", true);
-                        settingsBuilder.field("stem_english_possessive", true);
-                        settingsBuilder.field("generate_number_parts", true);
-                        settingsBuilder.field("split_on_numerics", indexDefinition.analyzerConfigSplitOnNumerics());
-                        settingsBuilder.field("split_on_case_change", indexDefinition.analyzerConfigSplitOnCaseChange());
-                        settingsBuilder.field("preserve_original", indexDefinition.analyzerConfigIndexOriginalTerms());
-                    }
-                    settingsBuilder.endObject();
-
-                    settingsBuilder.startObject("shingle")
-                            .field("type", "shingle")
-                            .field("min_shingle_size", 2)
-                            .field("max_shingle_size", 3)
-                            .endObject();
-                }
-                settingsBuilder.endObject();
+        builder.index(indexBuilder -> indexBuilder
+                        // static setting: cannot be changed after the index gets created
+                        .numberOfShards(Integer.toString(indexDefinition.numberOfShards))
+                        // dynamic settings: see #enableIndexRequest
+                        .refreshInterval(INITIAL_REFRESH_INTERVAL)
+                        .numberOfReplicas(INITIAL_NUMBER_OF_REPLICAS))
+                .analysis(b1 ->
+                        b1.filter("oak_word_delimiter_graph_filter",
+                                        b2 -> b2.definition(
+                                                b3 -> b3.wordDelimiterGraph(
+                                                        wdgBuilder -> wdgBuilder.generateWordParts(true)
+                                                                .stemEnglishPossessive(true)
+                                                                .generateNumberParts(true)
+                                                                .splitOnNumerics(indexDefinition.analyzerConfigSplitOnNumerics())
+                                                                .splitOnCaseChange(indexDefinition.analyzerConfigSplitOnCaseChange())
+                                                                .preserveOriginal(indexDefinition.analyzerConfigIndexOriginalTerms()))
+                                        ))
+                                .filter("shingle",
+                                        b2 -> b2.definition(
+                                                b3 -> b3.shingle(
+                                                        b4 -> b4.minShingleSize("2")
+                                                                .maxShingleSize("3"))))
+                                .analyzer("oak_analyzer",
+                                        b2 -> b2.custom(
+                                                b3 -> b3.tokenizer("standard")
+                                                        .filter("lowercase", "oak_word_delimiter_graph_filter")))
+                                .analyzer("ancestor_analyzer",
+                                        b2 -> b2.custom(
+                                                b3 -> b3.tokenizer("path_hierarchy")))
+                                .analyzer("trigram",
+                                        b2 -> b2.custom(
+                                                b3 -> b3.tokenizer("standard")
+                                                        .filter("lowercase", "shingle")))
 
-                settingsBuilder.startObject("analyzer");
-                {
-                    settingsBuilder.startObject("oak_analyzer");
-                    {
-                        settingsBuilder.field("type", "custom");
-                        settingsBuilder.field("tokenizer", "standard");
-                        settingsBuilder.field("filter", new String[]{"lowercase", "oak_word_delimiter_graph_filter"});
-                    }
-                    settingsBuilder.endObject();
-                    // https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-pathhierarchy-tokenizer.html
-                    settingsBuilder.startObject("ancestor_analyzer");
-                    {
-                        settingsBuilder.field("type", "custom");
-                        settingsBuilder.field("tokenizer", "path_hierarchy");
-                    }
-                    settingsBuilder.endObject();
+                );
 
-                    settingsBuilder.startObject("trigram")
-                            .field("type", "custom")
-                            .field("tokenizer", "standard")
-                            .array("filter", "lowercase", "shingle")
-                            .endObject();
-                }
-                settingsBuilder.endObject();
-            }
-            settingsBuilder.endObject();
-        }
-        settingsBuilder.endObject();
-        return settingsBuilder;
-    }
-
-    private static void mapInternalProperties(XContentBuilder mappingBuilder) throws IOException {
-        mappingBuilder.startObject(FieldNames.PATH)
-                .field("type", "keyword")
-                .endObject();
-        mappingBuilder.startObject(FieldNames.ANCESTORS)
-                .field("type", "text")
-                .field("analyzer", "ancestor_analyzer")
-                .field("search_analyzer", "keyword")
-                .field("search_quote_analyzer", "keyword")
-                .endObject();
-        mappingBuilder.startObject(FieldNames.PATH_DEPTH)
-                .field("type", "integer")
-                .field("doc_values", false) // no need to sort/aggregate here
-                .endObject();
-        mappingBuilder.startObject(FieldNames.FULLTEXT)
-                .field("type", "text")
-                .field("analyzer", "oak_analyzer")
-                .endObject();
-        // TODO: the mapping below is for features currently not supported. These need to be reviewed
-        // mappingBuilder.startObject(FieldNames.NOT_NULL_PROPS)
-        //  .field("type", "keyword")
-        //  .endObject();
-        // mappingBuilder.startObject(FieldNames.NULL_PROPS)
-        // .field("type", "keyword")
-        // .endObject();
+        return builder;
     }
 
-    private static void mapIndexRules(ElasticIndexDefinition indexDefinition, XContentBuilder mappingBuilder) throws IOException {
+    private static void mapIndexRules(@NotNull TypeMapping.Builder builder,
+                                      @NotNull ElasticIndexDefinition indexDefinition) {
         checkIndexRules(indexDefinition);
         boolean useInSuggest = false;
         for (Map.Entry<String, List<PropertyDefinition>> entry : indexDefinition.getPropertiesByName().entrySet()) {
@@ -208,107 +180,89 @@ class ElasticIndexHelper {
                 }
             }
 
-            mappingBuilder.startObject(name);
-            {
-                // https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-types.html
-                if (Type.BINARY.equals(type)) {
-                    mappingBuilder.field("type", "binary");
-                } else if (Type.LONG.equals(type)) {
-                    mappingBuilder.field("type", "long");
-                } else if (Type.DOUBLE.equals(type) || Type.DECIMAL.equals(type)) {
-                    mappingBuilder.field("type", "double");
-                } else if (Type.DATE.equals(type)) {
-                    mappingBuilder.field("type", "date");
-                } else if (Type.BOOLEAN.equals(type)) {
-                    mappingBuilder.field("type", "boolean");
+            Property.Builder pBuilder = new Property.Builder();
+            // https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-types.html
+            if (Type.BINARY.equals(type)) {
+                pBuilder.binary(b -> b);
+            } else if (Type.LONG.equals(type)) {
+                pBuilder.long_(b -> b);
+            } else if (Type.DOUBLE.equals(type) || Type.DECIMAL.equals(type)) {
+                pBuilder.double_(b -> b);
+            } else if (Type.DATE.equals(type)) {
+                pBuilder.date(b -> b);
+            } else if (Type.BOOLEAN.equals(type)) {
+                pBuilder.boolean_(b -> b);
+            } else {
+                if (indexDefinition.isAnalyzed(propertyDefinitions)) {
+                    // always add keyword for sorting / faceting as sub-field
+                    pBuilder.text(
+                            b1 -> b1.analyzer("oak_analyzer")
+                                    .fields("keyword",
+                                            b2 -> b2.keyword(
+                                                    b3 -> b3.ignoreAbove(256))));
                 } else {
-                    if (indexDefinition.isAnalyzed(propertyDefinitions)) {
-                        mappingBuilder.field("type", "text");
-                        mappingBuilder.field("analyzer", "oak_analyzer");
-                        // always add keyword for sorting / faceting as sub-field
-                        mappingBuilder.startObject("fields");
-                        {
-                            mappingBuilder.startObject("keyword")
-                                    .field("type", "keyword")
-                                    .field("ignore_above", 256)
-                                    .endObject();
-                        }
-                        mappingBuilder.endObject();
-                    } else {
-                        // always add keyword for sorting / faceting
-                        mappingBuilder
-                                .field("type", "keyword")
-                                .field("ignore_above", 256);
-                    }
+                    // always add keyword for sorting / faceting
+                    pBuilder.keyword(b1 -> b1.ignoreAbove(256));
                 }
             }
-            mappingBuilder.endObject();
-        }
+            builder.properties(name, pBuilder.build());
 
-        mappingBuilder.startObject(FieldNames.SPELLCHECK)
-                .field("type", "text").field("analyzer", "trigram")
-                .endObject();
+            builder.properties(FieldNames.SPELLCHECK,
+                    b1 -> b1.text(
+                            b2 -> b2.analyzer("trigram"))
+            );
 
-        if (useInSuggest) {
-            mappingBuilder.startObject(FieldNames.SUGGEST);
-            {
-                mappingBuilder.field("type", "nested");
-                mappingBuilder.startObject("properties");
-                {
-                    // TODO: evaluate https://www.elastic.co/guide/en/elasticsearch/reference/current/faster-prefix-queries.html
-                    mappingBuilder.startObject("value")
-                            .field("type", "text")
-                            .field("analyzer", "oak_analyzer")
-                            .endObject();
-                }
-                mappingBuilder.endObject();
+            if (useInSuggest) {
+                builder.properties(FieldNames.SUGGEST,
+                        b1 -> b1.nested(
+                                // TODO: evaluate https://www.elastic.co/guide/en/elasticsearch/reference/current/faster-prefix-queries.html
+                                b2 -> b2.properties("value",
+                                        b3 -> b3.text(
+                                                b4 -> b4.analyzer("oak_analyzer")
+                                        )
+                                )
+                        )
+                );
             }
-            mappingBuilder.endObject();
-        }
 
-        for (PropertyDefinition pd : indexDefinition.getDynamicBoostProperties()) {
-            mappingBuilder.startObject(pd.nodeName);
-            {
-                mappingBuilder.field("type", "nested");
-                mappingBuilder.startObject("properties");
-                {
-                    mappingBuilder.startObject("value")
-                            .field("type", "text")
-                            .field("analyzer", "oak_analyzer")
-                            .endObject();
-                    mappingBuilder.startObject("boost")
-                            .field("type", "double")
-                            .endObject();
-                }
-                mappingBuilder.endObject();
+            for (PropertyDefinition pd : indexDefinition.getDynamicBoostProperties()) {
+                builder.properties(pd.nodeName,
+                        b1 -> b1.nested(
+                                b2 -> b2.properties("value",
+                                                b3 -> b3.text(
+                                                        b4 -> b4.analyzer("oak_analyzer")))
+                                        .properties("boost",
+                                                b3 -> b3.double_(f -> f)
+                                        )
+                        )
+                );
             }
-            mappingBuilder.endObject();
-        }
 
-        for (PropertyDefinition propertyDefinition : indexDefinition.getSimilarityProperties()) {
-            ElasticPropertyDefinition pd = (ElasticPropertyDefinition) propertyDefinition;
-            int denseVectorSize = pd.getSimilaritySearchDenseVectorSize();
-            mappingBuilder.startObject(FieldNames.createSimilarityFieldName(pd.name));
-            {
-                mappingBuilder.field("type", "elastiknn_dense_float_vector");
-                mappingBuilder.startObject("elastiknn");
-                {
-                    mappingBuilder.field(ES_DENSE_VECTOR_DIM_PROP, denseVectorSize);
-                    mappingBuilder.field("model", "lsh");
-                    mappingBuilder.field("similarity", pd.getSimilaritySearchParameters().getIndexTimeSimilarityFunction());
-                    mappingBuilder.field("L", pd.getSimilaritySearchParameters().getL());
-                    mappingBuilder.field("k", pd.getSimilaritySearchParameters().getK());
-                    mappingBuilder.field("w", pd.getSimilaritySearchParameters().getW());
-                }
-                mappingBuilder.endObject();
+            for (PropertyDefinition propertyDefinition : indexDefinition.getSimilarityProperties()) {
+                ElasticPropertyDefinition pd = (ElasticPropertyDefinition) propertyDefinition;
+                int denseVectorSize = pd.getSimilaritySearchDenseVectorSize();
+                JsonObject value = Json.createObjectBuilder()
+                        .add("type", "elastiknn_dense_float_vector")
+                        .add("elastiknn",
+                                Json.createObjectBuilder()
+                                        .add(ES_DENSE_VECTOR_DIM_PROP, denseVectorSize)
+                                        .add("model", "lsh")
+                                        .add("similarity", pd.getSimilaritySearchParameters().getIndexTimeSimilarityFunction())
+                                        .add("L", pd.getSimilaritySearchParameters().getL())
+                                        .add("k", pd.getSimilaritySearchParameters().getK())
+                                        .add("w", pd.getSimilaritySearchParameters().getW())
+                                        .build()
+                        ).build();
+                builder.properties(FieldNames.createSimilarityFieldName(pd.name),
+                        b1 -> b1._custom("elastiknn_dense_float_vector", value));
             }
-            mappingBuilder.endObject();
-        }
 
-        mappingBuilder.startObject(ElasticIndexDefinition.SIMILARITY_TAGS)
-                .field("type", "text")
-                .field("analyzer", "oak_analyzer")
-                .endObject();
+            builder.properties(ElasticIndexDefinition.SIMILARITY_TAGS,
+                    b1 -> b1.text(
+                            b2 -> b2.analyzer("oak_analyzer")
+                    )
+            );
+        }
     }
 
     // we need to check if in the defined rules there are properties with the same name and different types
diff --git a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexWriter.java b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexWriter.java
index aa5526d791..3b4300ad5d 100644
--- a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexWriter.java
+++ b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexWriter.java
@@ -17,11 +17,14 @@
 package org.apache.jackrabbit.oak.plugins.index.elastic.index;
 
 import co.elastic.clients.elasticsearch._types.AcknowledgedResponseBase;
+import co.elastic.clients.elasticsearch.indices.CreateIndexRequest;
+import co.elastic.clients.elasticsearch.indices.CreateIndexResponse;
 import co.elastic.clients.elasticsearch.indices.DeleteIndexResponse;
 import co.elastic.clients.elasticsearch.indices.ElasticsearchIndicesClient;
 import co.elastic.clients.elasticsearch.indices.GetAliasResponse;
 import co.elastic.clients.elasticsearch.indices.UpdateAliasesRequest;
 import co.elastic.clients.elasticsearch.indices.UpdateAliasesResponse;
+import co.elastic.clients.json.JsonpUtils;
 import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticConnection;
 import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticIndexDefinition;
 import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticIndexNameHelper;
@@ -39,8 +42,6 @@ import org.elasticsearch.action.index.IndexRequest;
 import org.elasticsearch.action.support.master.AcknowledgedResponse;
 import org.elasticsearch.client.IndicesClient;
 import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.client.indices.CreateIndexRequest;
-import org.elasticsearch.client.indices.CreateIndexResponse;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.xcontent.XContentType;
 import org.jetbrains.annotations.NotNull;
@@ -158,24 +159,23 @@ class ElasticIndexWriter implements FulltextIndexWriter<ElasticDocument> {
     }
 
     private void provisionIndex() throws IOException {
-        ElasticsearchIndicesClient client = elasticConnection.getClient().indices();
+        final ElasticsearchIndicesClient esClient = elasticConnection.getClient().indices();
         // check if index already exists
-        if(client.exists(i -> i.index(indexName)).value()) {
+        if (esClient.exists(i -> i.index(indexName)).value()) {
             LOG.info("Index {} already exists. Skip index provision", indexName);
             return;
         }
 
-        // create the new index
         final CreateIndexRequest request = ElasticIndexHelper.createIndexRequest(indexName, indexDefinition);
+        if (LOG.isDebugEnabled()) {
+            StringBuilder sb = new StringBuilder();
+            JsonpUtils.toString(request, sb);
+            LOG.debug("Creating Index with request {}", sb);
+        }
+        // create the new index
         try {
-            if (LOG.isDebugEnabled()) {
-                final String requestMsg = Strings.toString(request.toXContent(jsonBuilder(), EMPTY_PARAMS));
-                LOG.debug("Creating Index with request {}", requestMsg);
-            }
-            //TODO migrate index creation and ingestion as well
-            final IndicesClient oldClient = elasticConnection.getOldClient().indices();
-            CreateIndexResponse response = oldClient.create(request, RequestOptions.DEFAULT);
-            LOG.info("Created index {}. Response acknowledged: {}", indexName, response.isAcknowledged());
+            final CreateIndexResponse response = esClient.create(request);
+            LOG.info("Created index {}. Response acknowledged: {}", indexName, response.acknowledged());
             checkResponseAcknowledgement(response, "Create index call not acknowledged for index " + indexName);
         } catch (ElasticsearchStatusException ese) {
             // We already check index existence as first thing in this method, if we get here it means we have got into
@@ -184,7 +184,9 @@ class ElasticIndexWriter implements FulltextIndexWriter<ElasticDocument> {
             // https://github.com/elastic/elasticsearch/issues/19862
             if (ese.status().getStatus() == 400 && ese.getDetailedMessage().contains("resource_already_exists_exception")) {
                 LOG.warn("Index {} already exists. Ignoring error", indexName);
-            } else throw ese;
+            } else {
+                throw ese;
+            }
         }
     }
 
@@ -202,8 +204,7 @@ class ElasticIndexWriter implements FulltextIndexWriter<ElasticDocument> {
         }
         IndicesClient oldClient = elasticConnection.getOldClient().indices();
         AcknowledgedResponse response = oldClient.putSettings(request, RequestOptions.DEFAULT);
-        LOG.info("Updated settings for index {}. Response acknowledged: {}",
-                indexName, response.isAcknowledged());
+        LOG.info("Updated settings for index {}. Response acknowledged: {}", indexName, response.isAcknowledged());
         checkResponseAcknowledgement(response, "Update index settings call not acknowledged for index " + indexName);
 
         // update the alias
@@ -238,6 +239,12 @@ class ElasticIndexWriter implements FulltextIndexWriter<ElasticDocument> {
         }
     }
 
+    private void checkResponseAcknowledgement(CreateIndexResponse response, String exceptionMessage) {
+        if (!response.acknowledged()) {
+            throw new IllegalStateException(exceptionMessage);
+        }
+    }
+
     private void deleteOldIndices(ElasticsearchIndicesClient indicesClient, Set<String> indices) throws IOException {
         if (indices.size() == 0)
             return;
diff --git a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelperTest.java b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelperTest.java
index a48594f7b8..801dbf3559 100644
--- a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelperTest.java
+++ b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelperTest.java
@@ -16,26 +16,35 @@
  */
 package org.apache.jackrabbit.oak.plugins.index.elastic.index;
 
-import com.fasterxml.jackson.databind.ObjectMapper;
+import co.elastic.clients.elasticsearch._types.analysis.TokenFilter;
+import co.elastic.clients.elasticsearch._types.analysis.TokenFilterDefinition;
+import co.elastic.clients.elasticsearch._types.analysis.WordDelimiterGraphTokenFilter;
+import co.elastic.clients.elasticsearch._types.mapping.Property;
+import co.elastic.clients.elasticsearch._types.mapping.TextProperty;
+import co.elastic.clients.elasticsearch._types.mapping.TypeMapping;
+import co.elastic.clients.elasticsearch.indices.CreateIndexRequest;
+import co.elastic.clients.elasticsearch.indices.IndexSettings;
+import co.elastic.clients.elasticsearch.indices.IndexSettingsAnalysis;
+import co.elastic.clients.json.JsonData;
+import jakarta.json.JsonValue;
 import org.apache.jackrabbit.oak.api.Tree;
 import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticIndexDefinition;
 import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexDefinitionBuilder;
 import org.apache.jackrabbit.oak.plugins.index.search.util.IndexDefinitionBuilder;
 import org.apache.jackrabbit.oak.spi.state.NodeState;
-import org.elasticsearch.client.indices.CreateIndexRequest;
+import org.jetbrains.annotations.NotNull;
 import org.junit.Test;
 
-import java.io.IOException;
 import java.util.Map;
 
+import static org.hamcrest.CoreMatchers.notNullValue;
 import static org.hamcrest.CoreMatchers.is;
-import static org.hamcrest.CoreMatchers.nullValue;
 import static org.hamcrest.MatcherAssert.assertThat;
 
 public class ElasticIndexHelperTest {
 
     @Test
-    public void multiRulesWithSamePropertyNames() throws IOException {
+    public void multiRulesWithSamePropertyNames() {
         IndexDefinitionBuilder builder = new ElasticIndexDefinitionBuilder();
         IndexDefinitionBuilder.IndexRule indexRuleA = builder.indexRule("typeA");
         indexRuleA.property("foo").type("String");
@@ -48,17 +57,20 @@ public class ElasticIndexHelperTest {
 
         CreateIndexRequest request = ElasticIndexHelper.createIndexRequest("prefix.path", definition);
 
-        ObjectMapper mapper = new ObjectMapper();
-        Map<String, Object> jsonMap = mapper.readValue(request.mappings().streamInput(), Map.class);
+        TypeMapping fooPropertyMappings = request.mappings();
+        assertThat(fooPropertyMappings, notNullValue());
+        Property fooProperty = fooPropertyMappings.properties().get("foo");
+        assertThat(fooProperty, is(notNullValue()));
+        assertThat(fooProperty._kind(), is(Property.Kind.Text));
+        TextProperty fooTextProperty = fooProperty.text();
 
-        Map fooMapping = (Map) ((Map) jsonMap.get("properties")).get("foo");
-        assertThat(fooMapping.get("type"), is("text"));
-        Map fooKeywordMapping = (Map) ((Map) fooMapping.get("fields")).get("keyword");
-        assertThat(fooKeywordMapping.get("type"), is("keyword"));
+        Property keywordField = fooTextProperty.fields().get("keyword");
+        assertThat(keywordField, is(notNullValue()));
+        assertThat(keywordField._kind(), is(Property.Kind.Keyword));
     }
 
     @Test(expected = IllegalStateException.class)
-    public void multiRulesWithSamePropertyNamesDifferentTypes() throws IOException {
+    public void multiRulesWithSamePropertyNamesDifferentTypes() {
         IndexDefinitionBuilder builder = new ElasticIndexDefinitionBuilder();
         IndexDefinitionBuilder.IndexRule indexRuleA = builder.indexRule("typeA");
         indexRuleA.property("foo").type("String");
@@ -67,12 +79,53 @@ public class ElasticIndexHelperTest {
         NodeState nodeState = builder.build();
         ElasticIndexDefinition definition =
                 new ElasticIndexDefinition(nodeState, nodeState, "path", "prefix");
-
         ElasticIndexHelper.createIndexRequest("prefix.path", definition);
     }
 
+    @Test()
+    public void indexSettingsAreCorrectlySet() {
+        IndexDefinitionBuilder builder = new ElasticIndexDefinitionBuilder();
+        IndexDefinitionBuilder.IndexRule indexRule = builder.indexRule("idxRule");
+        indexRule.property("foo").type("String").useInSimilarity();
+
+        final String expectedNumberOfShards = "2";
+        final boolean expectedIndexOriginalTerm = true;
+        final boolean expectedSplitOnCaseChange = true;
+        final boolean expectedSplitOnNumerics = true;
+
+        Tree analyzer = builder.getBuilderTree().addChild("analyzers");
+        analyzer.setProperty(ElasticIndexDefinition.INDEX_ORIGINAL_TERM, expectedIndexOriginalTerm);
+        analyzer.setProperty(ElasticIndexDefinition.SPLIT_ON_CASE_CHANGE, expectedSplitOnCaseChange);
+        analyzer.setProperty(ElasticIndexDefinition.SPLIT_ON_NUMERICS, expectedSplitOnNumerics);
+
+        NodeState nodeState = builder.build();
+
+        @NotNull NodeState defn = nodeState.builder()
+                .setProperty(ElasticIndexDefinition.NUMBER_OF_SHARDS, expectedNumberOfShards)
+                .getNodeState();
+
+        ElasticIndexDefinition definition =
+                new ElasticIndexDefinition(nodeState, defn, "path", "prefix");
+        CreateIndexRequest req = ElasticIndexHelper.createIndexRequest("prefix.path", definition);
+
+        IndexSettings indexSettings = req.settings().index();
+        assertThat(expectedNumberOfShards, is(indexSettings.numberOfShards()));
+
+        WordDelimiterGraphTokenFilter wdgfDef = req.settings()
+                .analysis()
+                .filter().get("oak_word_delimiter_graph_filter")
+                .definition()
+                .wordDelimiterGraph();
+        assertThat(wdgfDef.preserveOriginal(), is(expectedIndexOriginalTerm));
+        assertThat(wdgfDef.splitOnCaseChange(), is(expectedSplitOnCaseChange));
+        assertThat(wdgfDef.splitOnNumerics(), is(expectedSplitOnNumerics));
+
+        Map<String, JsonData> otherSettings = req.settings().otherSettings();
+        assertThat(otherSettings.get(ElasticIndexDefinition.ELASTIKNN).toJson(), is(JsonValue.TRUE));
+    }
+
     @Test
-    public void oakAnalyzer() throws IOException {
+    public void oakAnalyzer() {
         IndexDefinitionBuilder builder = new ElasticIndexDefinitionBuilder();
         IndexDefinitionBuilder.IndexRule indexRule = builder.indexRule("type");
         indexRule.property("foo").type("String").analyzed();
@@ -85,23 +138,30 @@ public class ElasticIndexHelperTest {
 
         CreateIndexRequest request = ElasticIndexHelper.createIndexRequest("prefix.path", definition);
 
-        assertThat(request.settings().get("analysis.filter.oak_word_delimiter_graph_filter.preserve_original"), is("false"));
-
-        ObjectMapper mapper = new ObjectMapper();
-        Map<String, Object> jsonMappings = mapper.readValue(request.mappings().streamInput(), Map.class);
-        Map fooMapping = (Map) ((Map) jsonMappings.get("properties")).get("foo");
-        assertThat(fooMapping.get("analyzer"), is("oak_analyzer"));
-        Map barMapping = (Map) ((Map) jsonMappings.get("properties")).get("bar");
-        assertThat(barMapping.get("analyzer"), nullValue());
+        checkAnalyzerPreservesOriginalTerm(request, false);
+
+        TypeMapping fooMappings = request.mappings();
+        assertThat(fooMappings, notNullValue());
+        Property fooProperty = fooMappings.properties().get("foo");
+        assertThat(fooProperty, is(notNullValue()));
+        TextProperty textProperty = fooProperty.text();
+        assertThat(textProperty.analyzer(), is("oak_analyzer"));
+        Property keywordField = textProperty.fields().get("keyword");
+        assertThat(keywordField._kind(), is(Property.Kind.Keyword));
+
+        TypeMapping barMappings = request.mappings();
+        assertThat(barMappings, notNullValue());
+        Property barProperty = barMappings.properties().get("bar");
+        assertThat(barProperty._kind(), is(Property.Kind.Keyword));
     }
 
     @Test
-    public void oakAnalyzerWithOriginalTerm() throws IOException {
+    public void oakAnalyzerWithOriginalTerm() {
         IndexDefinitionBuilder builder = new ElasticIndexDefinitionBuilder();
         IndexDefinitionBuilder.IndexRule indexRule = builder.indexRule("type");
         indexRule.property("foo").type("String").analyzed();
         Tree analyzer = builder.getBuilderTree().addChild("analyzers");
-        analyzer.setProperty("indexOriginalTerm", "true");
+        analyzer.setProperty(ElasticIndexDefinition.INDEX_ORIGINAL_TERM, "true");
 
         NodeState nodeState = builder.build();
 
@@ -109,8 +169,19 @@ public class ElasticIndexHelperTest {
                 new ElasticIndexDefinition(nodeState, nodeState, "path", "prefix");
 
         CreateIndexRequest request = ElasticIndexHelper.createIndexRequest("prefix.path", definition);
-
-        assertThat(request.settings().get("analysis.filter.oak_word_delimiter_graph_filter.preserve_original"), is("true"));
+        checkAnalyzerPreservesOriginalTerm(request, true);
     }
 
+    private void checkAnalyzerPreservesOriginalTerm(CreateIndexRequest request, boolean expected) {
+        IndexSettings requestSettings = request.settings();
+        assertThat(requestSettings, notNullValue());
+        IndexSettingsAnalysis analysisSettings = requestSettings.analysis();
+        assertThat(analysisSettings, notNullValue());
+        TokenFilter filter = analysisSettings.filter().get("oak_word_delimiter_graph_filter");
+        assertThat(filter, notNullValue());
+        TokenFilterDefinition tokenFilterDefinition = filter.definition();
+        assertThat(tokenFilterDefinition._kind(), is(TokenFilterDefinition.Kind.WordDelimiterGraph));
+        WordDelimiterGraphTokenFilter wdg = tokenFilterDefinition.wordDelimiterGraph();
+        assertThat(wdg.preserveOriginal(), is(expected));
+    }
 }