You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by fo...@apache.org on 2022/09/26 09:19:33 UTC
[jackrabbit-oak] branch trunk updated: OAK-9945 - Migrate index creation from Rest High Level Client to the new Java API Client (#709)
This is an automated email from the ASF dual-hosted git repository.
fortino pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
The following commit(s) were added to refs/heads/trunk by this push:
new 03178055c6 OAK-9945 - Migrate index creation from Rest High Level Client to the new Java API Client (#709)
03178055c6 is described below
commit 03178055c68fe7913286c8a203e6c7459ba1ef88
Author: Nuno Santos <ns...@adobe.com>
AuthorDate: Mon Sep 26 11:19:27 2022 +0200
OAK-9945 - Migrate index creation from Rest High Level Client to the new Java API Client (#709)
* Migrate index creation from using the deprecated high level REST Java Elasticsearch client to new Java client.
* Add @NotNull checks.
* Dummy commit to trigger unit tests.
* Update oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexWriter.java
Co-authored-by: Fabrizio Fortino <fa...@gmail.com>
* Add test to check that the configurable index settings are property set in the index creation request.
Co-authored-by: Fabrizio Fortino <fa...@gmail.com>
---
.../index/elastic/ElasticIndexDefinition.java | 8 +-
.../index/elastic/index/ElasticIndexHelper.java | 376 +++++++++------------
.../index/elastic/index/ElasticIndexWriter.java | 39 ++-
.../elastic/index/ElasticIndexHelperTest.java | 123 +++++--
4 files changed, 290 insertions(+), 256 deletions(-)
diff --git a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java
index 9fb5c0d06d..2ce461dd00 100644
--- a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java
+++ b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java
@@ -84,10 +84,12 @@ public class ElasticIndexDefinition extends IndexDefinition {
/**
* Boolean property indicating if in-built analyzer should preserve original term
*/
- private static final String INDEX_ORIGINAL_TERM = "indexOriginalTerm";
+ public static final String INDEX_ORIGINAL_TERM = "indexOriginalTerm";
- private static final String SPLIT_ON_CASE_CHANGE = "splitOnCaseChange";
- private static final String SPLIT_ON_NUMERICS = "splitOnNumerics";
+ public static final String SPLIT_ON_CASE_CHANGE = "splitOnCaseChange";
+ public static final String SPLIT_ON_NUMERICS = "splitOnNumerics";
+
+ public static final String ELASTIKNN = "elastiknn";
private static final String SIMILARITY_TAGS_ENABLED = "similarityTagsEnabled";
private static final boolean SIMILARITY_TAGS_ENABLED_DEFAULT = true;
diff --git a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java
index b9f74ddb8b..8a520d77df 100644
--- a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java
+++ b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java
@@ -16,18 +16,25 @@
*/
package org.apache.jackrabbit.oak.plugins.index.elastic.index;
+import co.elastic.clients.elasticsearch._types.Time;
+import co.elastic.clients.elasticsearch._types.mapping.Property;
+import co.elastic.clients.elasticsearch._types.mapping.TypeMapping;
+import co.elastic.clients.elasticsearch.indices.CreateIndexRequest;
+import co.elastic.clients.elasticsearch.indices.IndexSettings;
+import co.elastic.clients.json.JsonData;
+import co.elastic.clients.util.ObjectBuilder;
+import jakarta.json.Json;
+import jakarta.json.JsonObject;
+import jakarta.json.JsonValue;
import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticIndexDefinition;
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticPropertyDefinition;
import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
import org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition;
import org.elasticsearch.action.admin.indices.settings.put.UpdateSettingsRequest;
-import org.elasticsearch.client.indices.CreateIndexRequest;
import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.xcontent.XContentBuilder;
-import org.elasticsearch.xcontent.XContentFactory;
+import org.jetbrains.annotations.NotNull;
-import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
@@ -41,51 +48,65 @@ class ElasticIndexHelper {
// Unset the refresh interval and disable replicas at index creation to optimize for initial loads
// https://www.elastic.co/guide/en/elasticsearch/reference/current/tune-for-indexing-speed.html
- private static final String INITIAL_REFRESH_INTERVAL = "-1";
- private static final int INITIAL_NUMBER_OF_REPLICAS = 0;
+ private static final Time INITIAL_REFRESH_INTERVAL = Time.of(b -> b.time("-1"));
+ private static final String INITIAL_NUMBER_OF_REPLICAS = "0";
/**
* Returns a {@code CreateIndexRequest} with settings and mappings translated from the specified {@code ElasticIndexDefinition}.
* The returned object can be used to create and index optimized for bulk loads (eg: reindexing) but not for queries.
* To make it usable, a #enableIndexRequest needs to be performed.
+ *
* @param remoteIndexName the final index name
* @param indexDefinition the definition used to read settings/mappings
* @return a {@code CreateIndexRequest}
- * @throws IOException if an error happens while creating the request
- *
- * TODO: index create cannot be migrated to the ES Java client: it does not support custom mappings/settings needed to configure elastiknn.
- * See discussion in https://discuss.elastic.co/t/elasticsearch-java-client-support-for-custom-mappings-settings/303172
- * The migration will continue when this roadmap item gets fixed https://github.com/elastic/elasticsearch-java/issues/252
*/
- public static CreateIndexRequest createIndexRequest(String remoteIndexName, ElasticIndexDefinition indexDefinition) throws IOException {
- final CreateIndexRequest request = new CreateIndexRequest(remoteIndexName);
-
- // provision settings
- request.settings(loadSettings(indexDefinition));
+ public static CreateIndexRequest createIndexRequest(@NotNull String remoteIndexName,
+ @NotNull ElasticIndexDefinition indexDefinition) {
+ return new CreateIndexRequest.Builder()
+ .index(remoteIndexName)
+ .settings(s -> loadSettings(s, indexDefinition))
+ .mappings(s -> loadMappings(s, indexDefinition))
+ .build();
+ }
- // provision mappings
- final XContentBuilder mappingBuilder = XContentFactory.jsonBuilder();
- mappingBuilder.startObject();
- {
- mappingBuilder.startObject("properties");
- {
- mapInternalProperties(mappingBuilder);
- mapIndexRules(indexDefinition, mappingBuilder);
- }
- mappingBuilder.endObject();
- }
- mappingBuilder.endObject();
- request.mapping(mappingBuilder);
+ private static ObjectBuilder<TypeMapping> loadMappings(@NotNull TypeMapping.Builder builder,
+ @NotNull ElasticIndexDefinition indexDefinition) {
+ mapInternalProperties(builder);
+ mapIndexRules(builder, indexDefinition);
+ return builder;
+ }
- return request;
+ private static void mapInternalProperties(@NotNull TypeMapping.Builder builder) {
+ builder.properties(FieldNames.PATH,
+ b1 -> b1.keyword(builder3 -> builder3))
+ .properties(FieldNames.ANCESTORS,
+ b1 -> b1.text(
+ b2 -> b2.analyzer("ancestor_analyzer")
+ .searchAnalyzer("keyword")
+ .searchQuoteAnalyzer("keyword")))
+ .properties(FieldNames.PATH_DEPTH,
+ b1 -> b1.integer(
+ b2 -> b2.docValues(false)))
+ .properties(FieldNames.FULLTEXT,
+ b1 -> b1.text(
+ b2 -> b2.analyzer("oak_analyzer")));
+ // TODO: the mapping below is for features currently not supported. These need to be reviewed
+ // mappingBuilder.startObject(FieldNames.NOT_NULL_PROPS)
+ // .field("type", "keyword")
+ // .endObject();
+ // mappingBuilder.startObject(FieldNames.NULL_PROPS)
+ // .field("type", "keyword")
+ // .endObject();
}
+
/**
* Returns a {@code UpdateSettingsRequest} to make an index ready to be queried and updated in near real time.
+ *
* @param remoteIndexName the final index name (no alias)
* @param indexDefinition the definition used to read settings/mappings
* @return an {@code UpdateSettingsRequest}
- *
+ * <p>
* TODO: migrate to Elasticsearch Java client when the following issue will be fixed
* <a href="https://github.com/elastic/elasticsearch-java/issues/283">https://github.com/elastic/elasticsearch-java/issues/283</a>
*/
@@ -99,102 +120,53 @@ class ElasticIndexHelper {
return request.settings(settingsBuilder);
}
- private static XContentBuilder loadSettings(ElasticIndexDefinition indexDefinition) throws IOException {
- final XContentBuilder settingsBuilder = XContentFactory.jsonBuilder();
- settingsBuilder.startObject();
+
+ private static ObjectBuilder<IndexSettings> loadSettings(@NotNull IndexSettings.Builder builder,
+ @NotNull ElasticIndexDefinition indexDefinition) {
if (indexDefinition.getSimilarityProperties().size() > 0) {
- settingsBuilder.field("elastiknn", true);
+ builder.otherSettings(ElasticIndexDefinition.ELASTIKNN, JsonData.of(JsonValue.TRUE));
}
- // static setting: cannot be changed after the index gets created
- settingsBuilder.field("index.number_of_shards", indexDefinition.numberOfShards);
-
- // dynamic settings: see #enableIndexRequest
- settingsBuilder.field("index.refresh_interval", INITIAL_REFRESH_INTERVAL);
- settingsBuilder.field("index.number_of_replicas", INITIAL_NUMBER_OF_REPLICAS);
- {
- settingsBuilder.startObject("analysis");
- {
- settingsBuilder.startObject("filter");
- {
- settingsBuilder.startObject("oak_word_delimiter_graph_filter");
- {
- settingsBuilder.field("type", "word_delimiter_graph");
- settingsBuilder.field("generate_word_parts", true);
- settingsBuilder.field("stem_english_possessive", true);
- settingsBuilder.field("generate_number_parts", true);
- settingsBuilder.field("split_on_numerics", indexDefinition.analyzerConfigSplitOnNumerics());
- settingsBuilder.field("split_on_case_change", indexDefinition.analyzerConfigSplitOnCaseChange());
- settingsBuilder.field("preserve_original", indexDefinition.analyzerConfigIndexOriginalTerms());
- }
- settingsBuilder.endObject();
-
- settingsBuilder.startObject("shingle")
- .field("type", "shingle")
- .field("min_shingle_size", 2)
- .field("max_shingle_size", 3)
- .endObject();
- }
- settingsBuilder.endObject();
+ builder.index(indexBuilder -> indexBuilder
+ // static setting: cannot be changed after the index gets created
+ .numberOfShards(Integer.toString(indexDefinition.numberOfShards))
+ // dynamic settings: see #enableIndexRequest
+ .refreshInterval(INITIAL_REFRESH_INTERVAL)
+ .numberOfReplicas(INITIAL_NUMBER_OF_REPLICAS))
+ .analysis(b1 ->
+ b1.filter("oak_word_delimiter_graph_filter",
+ b2 -> b2.definition(
+ b3 -> b3.wordDelimiterGraph(
+ wdgBuilder -> wdgBuilder.generateWordParts(true)
+ .stemEnglishPossessive(true)
+ .generateNumberParts(true)
+ .splitOnNumerics(indexDefinition.analyzerConfigSplitOnNumerics())
+ .splitOnCaseChange(indexDefinition.analyzerConfigSplitOnCaseChange())
+ .preserveOriginal(indexDefinition.analyzerConfigIndexOriginalTerms()))
+ ))
+ .filter("shingle",
+ b2 -> b2.definition(
+ b3 -> b3.shingle(
+ b4 -> b4.minShingleSize("2")
+ .maxShingleSize("3"))))
+ .analyzer("oak_analyzer",
+ b2 -> b2.custom(
+ b3 -> b3.tokenizer("standard")
+ .filter("lowercase", "oak_word_delimiter_graph_filter")))
+ .analyzer("ancestor_analyzer",
+ b2 -> b2.custom(
+ b3 -> b3.tokenizer("path_hierarchy")))
+ .analyzer("trigram",
+ b2 -> b2.custom(
+ b3 -> b3.tokenizer("standard")
+ .filter("lowercase", "shingle")))
- settingsBuilder.startObject("analyzer");
- {
- settingsBuilder.startObject("oak_analyzer");
- {
- settingsBuilder.field("type", "custom");
- settingsBuilder.field("tokenizer", "standard");
- settingsBuilder.field("filter", new String[]{"lowercase", "oak_word_delimiter_graph_filter"});
- }
- settingsBuilder.endObject();
- // https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-pathhierarchy-tokenizer.html
- settingsBuilder.startObject("ancestor_analyzer");
- {
- settingsBuilder.field("type", "custom");
- settingsBuilder.field("tokenizer", "path_hierarchy");
- }
- settingsBuilder.endObject();
+ );
- settingsBuilder.startObject("trigram")
- .field("type", "custom")
- .field("tokenizer", "standard")
- .array("filter", "lowercase", "shingle")
- .endObject();
- }
- settingsBuilder.endObject();
- }
- settingsBuilder.endObject();
- }
- settingsBuilder.endObject();
- return settingsBuilder;
- }
-
- private static void mapInternalProperties(XContentBuilder mappingBuilder) throws IOException {
- mappingBuilder.startObject(FieldNames.PATH)
- .field("type", "keyword")
- .endObject();
- mappingBuilder.startObject(FieldNames.ANCESTORS)
- .field("type", "text")
- .field("analyzer", "ancestor_analyzer")
- .field("search_analyzer", "keyword")
- .field("search_quote_analyzer", "keyword")
- .endObject();
- mappingBuilder.startObject(FieldNames.PATH_DEPTH)
- .field("type", "integer")
- .field("doc_values", false) // no need to sort/aggregate here
- .endObject();
- mappingBuilder.startObject(FieldNames.FULLTEXT)
- .field("type", "text")
- .field("analyzer", "oak_analyzer")
- .endObject();
- // TODO: the mapping below is for features currently not supported. These need to be reviewed
- // mappingBuilder.startObject(FieldNames.NOT_NULL_PROPS)
- // .field("type", "keyword")
- // .endObject();
- // mappingBuilder.startObject(FieldNames.NULL_PROPS)
- // .field("type", "keyword")
- // .endObject();
+ return builder;
}
- private static void mapIndexRules(ElasticIndexDefinition indexDefinition, XContentBuilder mappingBuilder) throws IOException {
+ private static void mapIndexRules(@NotNull TypeMapping.Builder builder,
+ @NotNull ElasticIndexDefinition indexDefinition) {
checkIndexRules(indexDefinition);
boolean useInSuggest = false;
for (Map.Entry<String, List<PropertyDefinition>> entry : indexDefinition.getPropertiesByName().entrySet()) {
@@ -208,107 +180,89 @@ class ElasticIndexHelper {
}
}
- mappingBuilder.startObject(name);
- {
- // https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-types.html
- if (Type.BINARY.equals(type)) {
- mappingBuilder.field("type", "binary");
- } else if (Type.LONG.equals(type)) {
- mappingBuilder.field("type", "long");
- } else if (Type.DOUBLE.equals(type) || Type.DECIMAL.equals(type)) {
- mappingBuilder.field("type", "double");
- } else if (Type.DATE.equals(type)) {
- mappingBuilder.field("type", "date");
- } else if (Type.BOOLEAN.equals(type)) {
- mappingBuilder.field("type", "boolean");
+ Property.Builder pBuilder = new Property.Builder();
+ // https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-types.html
+ if (Type.BINARY.equals(type)) {
+ pBuilder.binary(b -> b);
+ } else if (Type.LONG.equals(type)) {
+ pBuilder.long_(b -> b);
+ } else if (Type.DOUBLE.equals(type) || Type.DECIMAL.equals(type)) {
+ pBuilder.double_(b -> b);
+ } else if (Type.DATE.equals(type)) {
+ pBuilder.date(b -> b);
+ } else if (Type.BOOLEAN.equals(type)) {
+ pBuilder.boolean_(b -> b);
+ } else {
+ if (indexDefinition.isAnalyzed(propertyDefinitions)) {
+ // always add keyword for sorting / faceting as sub-field
+ pBuilder.text(
+ b1 -> b1.analyzer("oak_analyzer")
+ .fields("keyword",
+ b2 -> b2.keyword(
+ b3 -> b3.ignoreAbove(256))));
} else {
- if (indexDefinition.isAnalyzed(propertyDefinitions)) {
- mappingBuilder.field("type", "text");
- mappingBuilder.field("analyzer", "oak_analyzer");
- // always add keyword for sorting / faceting as sub-field
- mappingBuilder.startObject("fields");
- {
- mappingBuilder.startObject("keyword")
- .field("type", "keyword")
- .field("ignore_above", 256)
- .endObject();
- }
- mappingBuilder.endObject();
- } else {
- // always add keyword for sorting / faceting
- mappingBuilder
- .field("type", "keyword")
- .field("ignore_above", 256);
- }
+ // always add keyword for sorting / faceting
+ pBuilder.keyword(b1 -> b1.ignoreAbove(256));
}
}
- mappingBuilder.endObject();
- }
+ builder.properties(name, pBuilder.build());
- mappingBuilder.startObject(FieldNames.SPELLCHECK)
- .field("type", "text").field("analyzer", "trigram")
- .endObject();
+ builder.properties(FieldNames.SPELLCHECK,
+ b1 -> b1.text(
+ b2 -> b2.analyzer("trigram"))
+ );
- if (useInSuggest) {
- mappingBuilder.startObject(FieldNames.SUGGEST);
- {
- mappingBuilder.field("type", "nested");
- mappingBuilder.startObject("properties");
- {
- // TODO: evaluate https://www.elastic.co/guide/en/elasticsearch/reference/current/faster-prefix-queries.html
- mappingBuilder.startObject("value")
- .field("type", "text")
- .field("analyzer", "oak_analyzer")
- .endObject();
- }
- mappingBuilder.endObject();
+ if (useInSuggest) {
+ builder.properties(FieldNames.SUGGEST,
+ b1 -> b1.nested(
+ // TODO: evaluate https://www.elastic.co/guide/en/elasticsearch/reference/current/faster-prefix-queries.html
+ b2 -> b2.properties("value",
+ b3 -> b3.text(
+ b4 -> b4.analyzer("oak_analyzer")
+ )
+ )
+ )
+ );
}
- mappingBuilder.endObject();
- }
- for (PropertyDefinition pd : indexDefinition.getDynamicBoostProperties()) {
- mappingBuilder.startObject(pd.nodeName);
- {
- mappingBuilder.field("type", "nested");
- mappingBuilder.startObject("properties");
- {
- mappingBuilder.startObject("value")
- .field("type", "text")
- .field("analyzer", "oak_analyzer")
- .endObject();
- mappingBuilder.startObject("boost")
- .field("type", "double")
- .endObject();
- }
- mappingBuilder.endObject();
+ for (PropertyDefinition pd : indexDefinition.getDynamicBoostProperties()) {
+ builder.properties(pd.nodeName,
+ b1 -> b1.nested(
+ b2 -> b2.properties("value",
+ b3 -> b3.text(
+ b4 -> b4.analyzer("oak_analyzer")))
+ .properties("boost",
+ b3 -> b3.double_(f -> f)
+ )
+ )
+ );
}
- mappingBuilder.endObject();
- }
- for (PropertyDefinition propertyDefinition : indexDefinition.getSimilarityProperties()) {
- ElasticPropertyDefinition pd = (ElasticPropertyDefinition) propertyDefinition;
- int denseVectorSize = pd.getSimilaritySearchDenseVectorSize();
- mappingBuilder.startObject(FieldNames.createSimilarityFieldName(pd.name));
- {
- mappingBuilder.field("type", "elastiknn_dense_float_vector");
- mappingBuilder.startObject("elastiknn");
- {
- mappingBuilder.field(ES_DENSE_VECTOR_DIM_PROP, denseVectorSize);
- mappingBuilder.field("model", "lsh");
- mappingBuilder.field("similarity", pd.getSimilaritySearchParameters().getIndexTimeSimilarityFunction());
- mappingBuilder.field("L", pd.getSimilaritySearchParameters().getL());
- mappingBuilder.field("k", pd.getSimilaritySearchParameters().getK());
- mappingBuilder.field("w", pd.getSimilaritySearchParameters().getW());
- }
- mappingBuilder.endObject();
+ for (PropertyDefinition propertyDefinition : indexDefinition.getSimilarityProperties()) {
+ ElasticPropertyDefinition pd = (ElasticPropertyDefinition) propertyDefinition;
+ int denseVectorSize = pd.getSimilaritySearchDenseVectorSize();
+ JsonObject value = Json.createObjectBuilder()
+ .add("type", "elastiknn_dense_float_vector")
+ .add("elastiknn",
+ Json.createObjectBuilder()
+ .add(ES_DENSE_VECTOR_DIM_PROP, denseVectorSize)
+ .add("model", "lsh")
+ .add("similarity", pd.getSimilaritySearchParameters().getIndexTimeSimilarityFunction())
+ .add("L", pd.getSimilaritySearchParameters().getL())
+ .add("k", pd.getSimilaritySearchParameters().getK())
+ .add("w", pd.getSimilaritySearchParameters().getW())
+ .build()
+ ).build();
+ builder.properties(FieldNames.createSimilarityFieldName(pd.name),
+ b1 -> b1._custom("elastiknn_dense_float_vector", value));
}
- mappingBuilder.endObject();
- }
- mappingBuilder.startObject(ElasticIndexDefinition.SIMILARITY_TAGS)
- .field("type", "text")
- .field("analyzer", "oak_analyzer")
- .endObject();
+ builder.properties(ElasticIndexDefinition.SIMILARITY_TAGS,
+ b1 -> b1.text(
+ b2 -> b2.analyzer("oak_analyzer")
+ )
+ );
+ }
}
// we need to check if in the defined rules there are properties with the same name and different types
diff --git a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexWriter.java b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexWriter.java
index aa5526d791..3b4300ad5d 100644
--- a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexWriter.java
+++ b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexWriter.java
@@ -17,11 +17,14 @@
package org.apache.jackrabbit.oak.plugins.index.elastic.index;
import co.elastic.clients.elasticsearch._types.AcknowledgedResponseBase;
+import co.elastic.clients.elasticsearch.indices.CreateIndexRequest;
+import co.elastic.clients.elasticsearch.indices.CreateIndexResponse;
import co.elastic.clients.elasticsearch.indices.DeleteIndexResponse;
import co.elastic.clients.elasticsearch.indices.ElasticsearchIndicesClient;
import co.elastic.clients.elasticsearch.indices.GetAliasResponse;
import co.elastic.clients.elasticsearch.indices.UpdateAliasesRequest;
import co.elastic.clients.elasticsearch.indices.UpdateAliasesResponse;
+import co.elastic.clients.json.JsonpUtils;
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticConnection;
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticIndexDefinition;
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticIndexNameHelper;
@@ -39,8 +42,6 @@ import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.client.IndicesClient;
import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.client.indices.CreateIndexRequest;
-import org.elasticsearch.client.indices.CreateIndexResponse;
import org.elasticsearch.common.Strings;
import org.elasticsearch.xcontent.XContentType;
import org.jetbrains.annotations.NotNull;
@@ -158,24 +159,23 @@ class ElasticIndexWriter implements FulltextIndexWriter<ElasticDocument> {
}
private void provisionIndex() throws IOException {
- ElasticsearchIndicesClient client = elasticConnection.getClient().indices();
+ final ElasticsearchIndicesClient esClient = elasticConnection.getClient().indices();
// check if index already exists
- if(client.exists(i -> i.index(indexName)).value()) {
+ if (esClient.exists(i -> i.index(indexName)).value()) {
LOG.info("Index {} already exists. Skip index provision", indexName);
return;
}
- // create the new index
final CreateIndexRequest request = ElasticIndexHelper.createIndexRequest(indexName, indexDefinition);
+ if (LOG.isDebugEnabled()) {
+ StringBuilder sb = new StringBuilder();
+ JsonpUtils.toString(request, sb);
+ LOG.debug("Creating Index with request {}", sb);
+ }
+ // create the new index
try {
- if (LOG.isDebugEnabled()) {
- final String requestMsg = Strings.toString(request.toXContent(jsonBuilder(), EMPTY_PARAMS));
- LOG.debug("Creating Index with request {}", requestMsg);
- }
- //TODO migrate index creation and ingestion as well
- final IndicesClient oldClient = elasticConnection.getOldClient().indices();
- CreateIndexResponse response = oldClient.create(request, RequestOptions.DEFAULT);
- LOG.info("Created index {}. Response acknowledged: {}", indexName, response.isAcknowledged());
+ final CreateIndexResponse response = esClient.create(request);
+ LOG.info("Created index {}. Response acknowledged: {}", indexName, response.acknowledged());
checkResponseAcknowledgement(response, "Create index call not acknowledged for index " + indexName);
} catch (ElasticsearchStatusException ese) {
// We already check index existence as first thing in this method, if we get here it means we have got into
@@ -184,7 +184,9 @@ class ElasticIndexWriter implements FulltextIndexWriter<ElasticDocument> {
// https://github.com/elastic/elasticsearch/issues/19862
if (ese.status().getStatus() == 400 && ese.getDetailedMessage().contains("resource_already_exists_exception")) {
LOG.warn("Index {} already exists. Ignoring error", indexName);
- } else throw ese;
+ } else {
+ throw ese;
+ }
}
}
@@ -202,8 +204,7 @@ class ElasticIndexWriter implements FulltextIndexWriter<ElasticDocument> {
}
IndicesClient oldClient = elasticConnection.getOldClient().indices();
AcknowledgedResponse response = oldClient.putSettings(request, RequestOptions.DEFAULT);
- LOG.info("Updated settings for index {}. Response acknowledged: {}",
- indexName, response.isAcknowledged());
+ LOG.info("Updated settings for index {}. Response acknowledged: {}", indexName, response.isAcknowledged());
checkResponseAcknowledgement(response, "Update index settings call not acknowledged for index " + indexName);
// update the alias
@@ -238,6 +239,12 @@ class ElasticIndexWriter implements FulltextIndexWriter<ElasticDocument> {
}
}
+ private void checkResponseAcknowledgement(CreateIndexResponse response, String exceptionMessage) {
+ if (!response.acknowledged()) {
+ throw new IllegalStateException(exceptionMessage);
+ }
+ }
+
private void deleteOldIndices(ElasticsearchIndicesClient indicesClient, Set<String> indices) throws IOException {
if (indices.size() == 0)
return;
diff --git a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelperTest.java b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelperTest.java
index a48594f7b8..801dbf3559 100644
--- a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelperTest.java
+++ b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelperTest.java
@@ -16,26 +16,35 @@
*/
package org.apache.jackrabbit.oak.plugins.index.elastic.index;
-import com.fasterxml.jackson.databind.ObjectMapper;
+import co.elastic.clients.elasticsearch._types.analysis.TokenFilter;
+import co.elastic.clients.elasticsearch._types.analysis.TokenFilterDefinition;
+import co.elastic.clients.elasticsearch._types.analysis.WordDelimiterGraphTokenFilter;
+import co.elastic.clients.elasticsearch._types.mapping.Property;
+import co.elastic.clients.elasticsearch._types.mapping.TextProperty;
+import co.elastic.clients.elasticsearch._types.mapping.TypeMapping;
+import co.elastic.clients.elasticsearch.indices.CreateIndexRequest;
+import co.elastic.clients.elasticsearch.indices.IndexSettings;
+import co.elastic.clients.elasticsearch.indices.IndexSettingsAnalysis;
+import co.elastic.clients.json.JsonData;
+import jakarta.json.JsonValue;
import org.apache.jackrabbit.oak.api.Tree;
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticIndexDefinition;
import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexDefinitionBuilder;
import org.apache.jackrabbit.oak.plugins.index.search.util.IndexDefinitionBuilder;
import org.apache.jackrabbit.oak.spi.state.NodeState;
-import org.elasticsearch.client.indices.CreateIndexRequest;
+import org.jetbrains.annotations.NotNull;
import org.junit.Test;
-import java.io.IOException;
import java.util.Map;
+import static org.hamcrest.CoreMatchers.notNullValue;
import static org.hamcrest.CoreMatchers.is;
-import static org.hamcrest.CoreMatchers.nullValue;
import static org.hamcrest.MatcherAssert.assertThat;
public class ElasticIndexHelperTest {
@Test
- public void multiRulesWithSamePropertyNames() throws IOException {
+ public void multiRulesWithSamePropertyNames() {
IndexDefinitionBuilder builder = new ElasticIndexDefinitionBuilder();
IndexDefinitionBuilder.IndexRule indexRuleA = builder.indexRule("typeA");
indexRuleA.property("foo").type("String");
@@ -48,17 +57,20 @@ public class ElasticIndexHelperTest {
CreateIndexRequest request = ElasticIndexHelper.createIndexRequest("prefix.path", definition);
- ObjectMapper mapper = new ObjectMapper();
- Map<String, Object> jsonMap = mapper.readValue(request.mappings().streamInput(), Map.class);
+ TypeMapping fooPropertyMappings = request.mappings();
+ assertThat(fooPropertyMappings, notNullValue());
+ Property fooProperty = fooPropertyMappings.properties().get("foo");
+ assertThat(fooProperty, is(notNullValue()));
+ assertThat(fooProperty._kind(), is(Property.Kind.Text));
+ TextProperty fooTextProperty = fooProperty.text();
- Map fooMapping = (Map) ((Map) jsonMap.get("properties")).get("foo");
- assertThat(fooMapping.get("type"), is("text"));
- Map fooKeywordMapping = (Map) ((Map) fooMapping.get("fields")).get("keyword");
- assertThat(fooKeywordMapping.get("type"), is("keyword"));
+ Property keywordField = fooTextProperty.fields().get("keyword");
+ assertThat(keywordField, is(notNullValue()));
+ assertThat(keywordField._kind(), is(Property.Kind.Keyword));
}
@Test(expected = IllegalStateException.class)
- public void multiRulesWithSamePropertyNamesDifferentTypes() throws IOException {
+ public void multiRulesWithSamePropertyNamesDifferentTypes() {
IndexDefinitionBuilder builder = new ElasticIndexDefinitionBuilder();
IndexDefinitionBuilder.IndexRule indexRuleA = builder.indexRule("typeA");
indexRuleA.property("foo").type("String");
@@ -67,12 +79,53 @@ public class ElasticIndexHelperTest {
NodeState nodeState = builder.build();
ElasticIndexDefinition definition =
new ElasticIndexDefinition(nodeState, nodeState, "path", "prefix");
-
ElasticIndexHelper.createIndexRequest("prefix.path", definition);
}
+ @Test()
+ public void indexSettingsAreCorrectlySet() {
+ IndexDefinitionBuilder builder = new ElasticIndexDefinitionBuilder();
+ IndexDefinitionBuilder.IndexRule indexRule = builder.indexRule("idxRule");
+ indexRule.property("foo").type("String").useInSimilarity();
+
+ final String expectedNumberOfShards = "2";
+ final boolean expectedIndexOriginalTerm = true;
+ final boolean expectedSplitOnCaseChange = true;
+ final boolean expectedSplitOnNumerics = true;
+
+ Tree analyzer = builder.getBuilderTree().addChild("analyzers");
+ analyzer.setProperty(ElasticIndexDefinition.INDEX_ORIGINAL_TERM, expectedIndexOriginalTerm);
+ analyzer.setProperty(ElasticIndexDefinition.SPLIT_ON_CASE_CHANGE, expectedSplitOnCaseChange);
+ analyzer.setProperty(ElasticIndexDefinition.SPLIT_ON_NUMERICS, expectedSplitOnNumerics);
+
+ NodeState nodeState = builder.build();
+
+ @NotNull NodeState defn = nodeState.builder()
+ .setProperty(ElasticIndexDefinition.NUMBER_OF_SHARDS, expectedNumberOfShards)
+ .getNodeState();
+
+ ElasticIndexDefinition definition =
+ new ElasticIndexDefinition(nodeState, defn, "path", "prefix");
+ CreateIndexRequest req = ElasticIndexHelper.createIndexRequest("prefix.path", definition);
+
+ IndexSettings indexSettings = req.settings().index();
+ assertThat(expectedNumberOfShards, is(indexSettings.numberOfShards()));
+
+ WordDelimiterGraphTokenFilter wdgfDef = req.settings()
+ .analysis()
+ .filter().get("oak_word_delimiter_graph_filter")
+ .definition()
+ .wordDelimiterGraph();
+ assertThat(wdgfDef.preserveOriginal(), is(expectedIndexOriginalTerm));
+ assertThat(wdgfDef.splitOnCaseChange(), is(expectedSplitOnCaseChange));
+ assertThat(wdgfDef.splitOnNumerics(), is(expectedSplitOnNumerics));
+
+ Map<String, JsonData> otherSettings = req.settings().otherSettings();
+ assertThat(otherSettings.get(ElasticIndexDefinition.ELASTIKNN).toJson(), is(JsonValue.TRUE));
+ }
+
@Test
- public void oakAnalyzer() throws IOException {
+ public void oakAnalyzer() {
IndexDefinitionBuilder builder = new ElasticIndexDefinitionBuilder();
IndexDefinitionBuilder.IndexRule indexRule = builder.indexRule("type");
indexRule.property("foo").type("String").analyzed();
@@ -85,23 +138,30 @@ public class ElasticIndexHelperTest {
CreateIndexRequest request = ElasticIndexHelper.createIndexRequest("prefix.path", definition);
- assertThat(request.settings().get("analysis.filter.oak_word_delimiter_graph_filter.preserve_original"), is("false"));
-
- ObjectMapper mapper = new ObjectMapper();
- Map<String, Object> jsonMappings = mapper.readValue(request.mappings().streamInput(), Map.class);
- Map fooMapping = (Map) ((Map) jsonMappings.get("properties")).get("foo");
- assertThat(fooMapping.get("analyzer"), is("oak_analyzer"));
- Map barMapping = (Map) ((Map) jsonMappings.get("properties")).get("bar");
- assertThat(barMapping.get("analyzer"), nullValue());
+ checkAnalyzerPreservesOriginalTerm(request, false);
+
+ TypeMapping fooMappings = request.mappings();
+ assertThat(fooMappings, notNullValue());
+ Property fooProperty = fooMappings.properties().get("foo");
+ assertThat(fooProperty, is(notNullValue()));
+ TextProperty textProperty = fooProperty.text();
+ assertThat(textProperty.analyzer(), is("oak_analyzer"));
+ Property keywordField = textProperty.fields().get("keyword");
+ assertThat(keywordField._kind(), is(Property.Kind.Keyword));
+
+ TypeMapping barMappings = request.mappings();
+ assertThat(barMappings, notNullValue());
+ Property barProperty = barMappings.properties().get("bar");
+ assertThat(barProperty._kind(), is(Property.Kind.Keyword));
}
@Test
- public void oakAnalyzerWithOriginalTerm() throws IOException {
+ public void oakAnalyzerWithOriginalTerm() {
IndexDefinitionBuilder builder = new ElasticIndexDefinitionBuilder();
IndexDefinitionBuilder.IndexRule indexRule = builder.indexRule("type");
indexRule.property("foo").type("String").analyzed();
Tree analyzer = builder.getBuilderTree().addChild("analyzers");
- analyzer.setProperty("indexOriginalTerm", "true");
+ analyzer.setProperty(ElasticIndexDefinition.INDEX_ORIGINAL_TERM, "true");
NodeState nodeState = builder.build();
@@ -109,8 +169,19 @@ public class ElasticIndexHelperTest {
new ElasticIndexDefinition(nodeState, nodeState, "path", "prefix");
CreateIndexRequest request = ElasticIndexHelper.createIndexRequest("prefix.path", definition);
-
- assertThat(request.settings().get("analysis.filter.oak_word_delimiter_graph_filter.preserve_original"), is("true"));
+ checkAnalyzerPreservesOriginalTerm(request, true);
}
+ private void checkAnalyzerPreservesOriginalTerm(CreateIndexRequest request, boolean expected) {
+ IndexSettings requestSettings = request.settings();
+ assertThat(requestSettings, notNullValue());
+ IndexSettingsAnalysis analysisSettings = requestSettings.analysis();
+ assertThat(analysisSettings, notNullValue());
+ TokenFilter filter = analysisSettings.filter().get("oak_word_delimiter_graph_filter");
+ assertThat(filter, notNullValue());
+ TokenFilterDefinition tokenFilterDefinition = filter.definition();
+ assertThat(tokenFilterDefinition._kind(), is(TokenFilterDefinition.Kind.WordDelimiterGraph));
+ WordDelimiterGraphTokenFilter wdg = tokenFilterDefinition.wordDelimiterGraph();
+ assertThat(wdg.preserveOriginal(), is(expected));
+ }
}