You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by fo...@apache.org on 2020/11/13 11:06:11 UTC

svn commit: r1883383 - in /jackrabbit/oak/trunk: oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ oak-search-elastic/src/main/java/...

Author: fortino
Date: Fri Nov 13 11:06:10 2020
New Revision: 1883383

URL: http://svn.apache.org/viewvc?rev=1883383&view=rev
Log:
OAK-9276: oak-search-elastic -> make shards/replicas configurable

Modified:
    jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java
    jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java
    jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
    jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticSimilarQueryTest.java
    jackrabbit/oak/trunk/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/IndexDescendantSpellcheckCommonTest.java

Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java?rev=1883383&r1=1883382&r2=1883383&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java Fri Nov 13 11:06:10 2020
@@ -51,6 +51,12 @@ public class ElasticIndexDefinition exte
     public static final String BULK_RETRIES_BACKOFF = "bulkRetriesBackoff";
     public static final long BULK_RETRIES_BACKOFF_DEFAULT = 200;
 
+    public static final String NUMBER_OF_SHARDS = "numberOfShards";
+    public static final int NUMBER_OF_SHARDS_DEFAULT = 1;
+
+    public static final String NUMBER_OF_REPLICAS = "numberOfReplicas";
+    public static final int NUMBER_OF_REPLICAS_DEFAULT = 1;
+
     /**
      * Hidden property for storing a seed value to be used as suffix in remote index name.
      */
@@ -87,6 +93,8 @@ public class ElasticIndexDefinition exte
     public final int bulkRetries;
     public final long bulkRetriesBackoff;
     private final String remoteAlias;
+    public final int numberOfShards;
+    public final int numberOfReplicas;
 
     private final Map<String, List<PropertyDefinition>> propertiesByName;
     private final List<PropertyDefinition> dynamicBoostProperties;
@@ -99,6 +107,8 @@ public class ElasticIndexDefinition exte
         this.bulkFlushIntervalMs = getOptionalValue(defn, BULK_FLUSH_INTERVAL_MS, BULK_FLUSH_INTERVAL_MS_DEFAULT);
         this.bulkRetries = getOptionalValue(defn, BULK_RETRIES, BULK_RETRIES_DEFAULT);
         this.bulkRetriesBackoff = getOptionalValue(defn, BULK_RETRIES_BACKOFF, BULK_RETRIES_BACKOFF_DEFAULT);
+        this.numberOfShards = getOptionalValue(defn, NUMBER_OF_SHARDS, NUMBER_OF_SHARDS_DEFAULT);
+        this.numberOfReplicas = getOptionalValue(defn, NUMBER_OF_REPLICAS, NUMBER_OF_REPLICAS_DEFAULT);
 
         this.propertiesByName = getDefinedRules()
                 .stream()

Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java?rev=1883383&r1=1883382&r2=1883383&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java Fri Nov 13 11:06:10 2020
@@ -63,6 +63,8 @@ class ElasticIndexHelper {
     private static XContentBuilder loadSettings(ElasticIndexDefinition indexDefinition) throws IOException {
         final XContentBuilder settingsBuilder = XContentFactory.jsonBuilder();
         settingsBuilder.startObject();
+        settingsBuilder.field("number_of_shards", indexDefinition.numberOfShards);
+        settingsBuilder.field("number_of_replicas", indexDefinition.numberOfReplicas);
         {
             settingsBuilder.startObject("analysis");
             {

Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java?rev=1883383&r1=1883382&r2=1883383&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java Fri Nov 13 11:06:10 2020
@@ -82,7 +82,9 @@ import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.atomic.AtomicReference;
+import java.util.function.BiConsumer;
 import java.util.function.BiPredicate;
+import java.util.function.Consumer;
 import java.util.stream.Stream;
 import java.util.stream.StreamSupport;
 
@@ -370,10 +372,13 @@ public class ElasticRequestHandler {
        be useful in every situation based on the type of content)
      */
     private MoreLikeThisQueryBuilder mltQuery(Map<String, String> mltParams) {
-        String text = mltParams.get(MoreLikeThisHelperUtil.MLT_STREAM_BODY);
+        // creates a shallow copy of mltParams so we can remove the entries to
+        // improve validation without changing the original structure
+        Map<String, String> shallowMltParams = new HashMap<>(mltParams);
+        String text = shallowMltParams.remove(MoreLikeThisHelperUtil.MLT_STREAM_BODY);
 
         MoreLikeThisQueryBuilder mlt;
-        String fields = mltParams.get(MoreLikeThisHelperUtil.MLT_FILED);
+        String fields = shallowMltParams.remove(MoreLikeThisHelperUtil.MLT_FILED);
         // It's expected the text here to be the path of the doc
         // In case the path of a node is greater than 512 bytes,
         // we hash it before storing it as the _id for the elastic doc
@@ -390,30 +395,30 @@ public class ElasticRequestHandler {
             mlt = moreLikeThisQuery(fieldsArray, null, new Item[]{new Item(null, text)});
         }
 
-        for (String key : mltParams.keySet()) {
-            String val = mltParams.get(key);
-            if (MoreLikeThisHelperUtil.MLT_MIN_DOC_FREQ.equals(key)) {
-                mlt.minDocFreq(Integer.parseInt(val));
-            } else if (MoreLikeThisHelperUtil.MLT_MIN_TERM_FREQ.equals(key)) {
-                mlt.minTermFreq(Integer.parseInt(val));
-            } else if (MoreLikeThisHelperUtil.MLT_BOOST_FACTOR.equals(key)) {
-                mlt.boost(Float.parseFloat(val));
-            } else if (MoreLikeThisHelperUtil.MLT_MAX_DOC_FREQ.equals(key)) {
-                mlt.maxDocFreq(Integer.parseInt(val));
-            } else if (MoreLikeThisHelperUtil.MLT_MAX_QUERY_TERMS.equals(key)) {
-                mlt.maxQueryTerms(Integer.parseInt(val));
-            } else if (MoreLikeThisHelperUtil.MLT_MAX_WORD_LENGTH.equals(key)) {
-                mlt.maxWordLength(Integer.parseInt(val));
-            } else if (MoreLikeThisHelperUtil.MLT_MIN_WORD_LENGTH.equals(key)) {
-                mlt.minWordLength(Integer.parseInt(val));
-            } else if (MoreLikeThisHelperUtil.MLT_MIN_SHOULD_MATCH.equals(key)) {
-                mlt.minimumShouldMatch(val);
-            } else if (MoreLikeThisHelperUtil.MLT_STOP_WORDS.equals(key)) {
+        if (!shallowMltParams.isEmpty()) {
+            BiConsumer<String, Consumer<String>> mltParamSetter = (key, setter) -> {
+                String val = shallowMltParams.remove(key);
+                if (val != null) {
+                    setter.accept(val);
+                }
+            };
+
+            mltParamSetter.accept(MoreLikeThisHelperUtil.MLT_MIN_DOC_FREQ, (val) -> mlt.minDocFreq(Integer.parseInt(val)));
+            mltParamSetter.accept(MoreLikeThisHelperUtil.MLT_MIN_TERM_FREQ, (val) -> mlt.minTermFreq(Integer.parseInt(val)));
+            mltParamSetter.accept(MoreLikeThisHelperUtil.MLT_BOOST_FACTOR, (val) -> mlt.boost(Float.parseFloat(val)));
+            mltParamSetter.accept(MoreLikeThisHelperUtil.MLT_MAX_DOC_FREQ, (val) -> mlt.maxDocFreq(Integer.parseInt(val)));
+            mltParamSetter.accept(MoreLikeThisHelperUtil.MLT_MAX_QUERY_TERMS, (val) -> mlt.maxQueryTerms(Integer.parseInt(val)));
+            mltParamSetter.accept(MoreLikeThisHelperUtil.MLT_MAX_WORD_LENGTH, (val) -> mlt.maxWordLength(Integer.parseInt(val)));
+            mltParamSetter.accept(MoreLikeThisHelperUtil.MLT_MIN_WORD_LENGTH, (val) -> mlt.minWordLength(Integer.parseInt(val)));
+            mltParamSetter.accept(MoreLikeThisHelperUtil.MLT_MIN_SHOULD_MATCH, mlt::minimumShouldMatch);
+            mltParamSetter.accept(MoreLikeThisHelperUtil.MLT_STOP_WORDS, (val) -> {
                 // TODO : Read this from a stopwords text file, configured via index defn maybe ?
                 String[] stopWords = val.split(",");
                 mlt.stopWords(stopWords);
-            } else {
-                LOG.warn("Unrecognized param {} in the mlt query {}", key, mltParams);
+            });
+
+            if (!shallowMltParams.isEmpty()) {
+                LOG.warn("mlt query contains unrecognized params {} that will be skipped", shallowMltParams);
             }
         }
 

Modified: jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticSimilarQueryTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticSimilarQueryTest.java?rev=1883383&r1=1883382&r2=1883383&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticSimilarQueryTest.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticSimilarQueryTest.java Fri Nov 13 11:06:10 2020
@@ -130,7 +130,7 @@ public class ElasticSimilarQueryTest ext
         String nativeQueryStringWithStopWords = "select [jcr:path] from [nt:base] where " +
                 "native('elastic-sim', 'mlt?stream.body=/test/a&mlt.fl=:path&mlt.mindf=0&mlt.mintf=0&mlt.stopwords=Hello,bye')";
 
-        String nativeQueryStringWithouStopWords =  "select [jcr:path] from [nt:base] where " +
+        String nativeQueryStringWithoutStopWords =  "select [jcr:path] from [nt:base] where " +
                 "native('elastic-sim', 'mlt?stream.body=/test/a&mlt.fl=:path&mlt.mindf=0&mlt.mintf=0&mlt.minshouldmatch=20%')";
 
         Tree test = root.getTree("/").addChild("test");
@@ -147,7 +147,7 @@ public class ElasticSimilarQueryTest ext
         assertEventually(() -> assertQuery(nativeQueryStringWithStopWords,
                 Arrays.asList("/test/e", "/test/f")));
 
-        assertEventually(() -> assertQuery(nativeQueryStringWithouStopWords,
+        assertEventually(() -> assertQuery(nativeQueryStringWithoutStopWords,
                 Arrays.asList("/test/b", "/test/c", "/test/d", "/test/e", "/test/f")));
     }
 
@@ -168,7 +168,7 @@ public class ElasticSimilarQueryTest ext
         root.commit();
 
         // Matches because of term Hello should be ignored since wl <6 (so /test/ should NOT be in the match list)
-        // /test/d should be in match list (becuase of Worlds term)
+        // /test/d should be in match list (because of Worlds term)
         assertEventually(() -> assertQuery(nativeQueryStringWithMinWordLength,
                 Arrays.asList("/test/c", "/test/d")));
 
@@ -195,7 +195,7 @@ public class ElasticSimilarQueryTest ext
         test.addChild("h").setProperty("text", "Hello");
         root.commit();
 
-        String query = "select [jcr:path] from [nt:base] where similar(., '"+longPath.getPath()+"')";
+        String query = "select [jcr:path] from [nt:base] where similar(., '" + longPath.getPath() + "')";
 
         assertEventually(() -> assertQuery(query,
                 Arrays.asList("/test/b", "/test/c", "/test/d", "/test/f", "/test/g", "/test/h")));

Modified: jackrabbit/oak/trunk/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/IndexDescendantSpellcheckCommonTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/IndexDescendantSpellcheckCommonTest.java?rev=1883383&r1=1883382&r2=1883383&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/IndexDescendantSpellcheckCommonTest.java (original)
+++ jackrabbit/oak/trunk/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/IndexDescendantSpellcheckCommonTest.java Fri Nov 13 11:06:10 2020
@@ -145,9 +145,9 @@ public abstract class IndexDescendantSpe
         return suggestions;
     }
 
-    private void validateSpellchecks(String query, Set<String> expected) throws Exception {
+    private void validateSpellchecks(String query, Set<String> expected) {
         assertEventually(() -> {
-            Set<String> suggestions = null;
+            Set<String> suggestions;
             try {
                 suggestions = getSpellchecks(query);
             } catch (Exception e) {
@@ -160,7 +160,7 @@ public abstract class IndexDescendantSpe
     @Ignore
     //TODO ES Failing
     @Test
-    public void noDescendantSuggestsAll() throws Exception {
+    public void noDescendantSuggestsAll() {
         validateSpellchecks(
                 createSpellcheckQuery(NT_OAK_UNSTRUCTURED, "taste", null),
                 newHashSet("test1", "test2", "test3", "test4", "test5", "test6"));
@@ -168,14 +168,14 @@ public abstract class IndexDescendantSpe
 
     //OAK-3994
     @Test
-    public void rootIndexWithDescendantConstraint() throws Exception {
+    public void rootIndexWithDescendantConstraint() {
         validateSpellchecks(
                 createSpellcheckQuery(NT_OAK_UNSTRUCTURED, "taste", "/content1"),
                 newHashSet("test2", "test3"));
     }
 
     @Ignore
-    //TODO ES Failing: if path restriction is not enabled, all ruggestions should be returned
+    //TODO ES Failing: if path restriction is not enabled, all suggestions should be returned
     //OAK-3994
     @Test
     public void descendantSuggestionRequirePathRestrictionIndex() throws Exception {
@@ -194,7 +194,7 @@ public abstract class IndexDescendantSpe
     //TODO ES Failing
     //OAK-3994
     @Test
-    public void unambiguousSubtreeIndexWithDescendantConstraint() throws Exception {
+    public void unambiguousSubtreeIndexWithDescendantConstraint() {
         validateSpellchecks(
                 createSpellcheckQuery(NT_BASE, "taste", "/content3"),
                 newHashSet("test5", "test6"));
@@ -202,7 +202,7 @@ public abstract class IndexDescendantSpe
 
     //OAK-3994
     @Test
-    public void unambiguousSubtreeIndexWithSubDescendantConstraint() throws Exception {
+    public void unambiguousSubtreeIndexWithSubDescendantConstraint() {
         validateSpellchecks(
                 createSpellcheckQuery(NT_BASE, "taste", "/content3/sC"),
                 newHashSet("test6"));