You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2019/08/12 20:00:55 UTC

[lucene-solr] branch master updated: SOLR-13399: Adding splitByPrefix param to IndexSizeTrigger; some splitByPrefix test and code cleanup

This is an automated email from the ASF dual-hosted git repository.

yonik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/master by this push:
     new 768ca7c  SOLR-13399: Adding splitByPrefix param to IndexSizeTrigger; some splitByPrefix test and code cleanup
768ca7c is described below

commit 768ca7c5a7fe5fcd89f047275319eafe783eb96c
Author: Megan Carey <mc...@berkeley.edu>
AuthorDate: Fri Aug 9 14:47:14 2019 -0700

    SOLR-13399: Adding splitByPrefix param to IndexSizeTrigger; some splitByPrefix test and code cleanup
---
 solr/CHANGES.txt                                   |  2 +-
 .../solr/cloud/autoscaling/IndexSizeTrigger.java   | 28 +++++++++---
 .../org/apache/solr/handler/admin/SplitOp.java     | 14 ++----
 .../cloud/api/collections/SplitByPrefixTest.java   | 16 ++++---
 .../cloud/autoscaling/IndexSizeTriggerTest.java    | 53 +++++++++++++++++-----
 .../solr/handler/admin/SplitHandlerTest.java       |  6 ++-
 .../src/solrcloud-autoscaling-triggers.adoc        |  5 ++
 .../cloud/autoscaling/SplitShardSuggester.java     |  4 ++
 8 files changed, 91 insertions(+), 37 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index d6320e5..5e421e3 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -98,7 +98,7 @@ New Features
 
 * SOLR-13399: SPLITSHARD implements a new splitByPrefix option that takes into account the actual document distribution
   when using compositeIds.  Document distribution is calculated using the "id_prefix" field (if it exists) containing
-  just the compositeId prefixes, or directly from the indexed "id" field otherwise. (yonik)
+  just the compositeId prefixes, or directly from the indexed "id" field otherwise. (yonik, Megan Carey)
 
 * SOLR-13565: Node level runtime libs loaded from remote urls  (noble)
 
diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/IndexSizeTrigger.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/IndexSizeTrigger.java
index 33eb1d7..f32669c 100644
--- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/IndexSizeTrigger.java
+++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/IndexSizeTrigger.java
@@ -30,6 +30,7 @@ import java.util.TreeMap;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicLong;
+import java.util.Locale;
 
 import org.apache.solr.client.solrj.cloud.SolrCloudManager;
 import org.apache.solr.client.solrj.cloud.autoscaling.ReplicaInfo;
@@ -69,6 +70,7 @@ public class IndexSizeTrigger extends TriggerBase {
   public static final String MAX_OPS_PROP = "maxOps";
   public static final String SPLIT_FUZZ_PROP = CommonAdminParams.SPLIT_FUZZ;
   public static final String SPLIT_METHOD_PROP = CommonAdminParams.SPLIT_METHOD;
+  public static final String SPLIT_BY_PREFIX = CommonAdminParams.SPLIT_BY_PREFIX;
 
   public static final String BYTES_SIZE_PROP = "__bytes__";
   public static final String TOTAL_BYTES_SIZE_PROP = "__total_bytes__";
@@ -86,6 +88,7 @@ public class IndexSizeTrigger extends TriggerBase {
   private long aboveBytes, aboveDocs, belowBytes, belowDocs;
   private int maxOps;
   private SolrIndexSplitter.SplitMethod splitMethod;
+  private boolean splitByPrefix;
   private float splitFuzz;
   private CollectionParams.CollectionAction aboveOp, belowOp;
   private final Set<String> collections = new HashSet<>();
@@ -96,7 +99,7 @@ public class IndexSizeTrigger extends TriggerBase {
     super(TriggerEventType.INDEXSIZE, name);
     TriggerUtils.validProperties(validProperties,
         ABOVE_BYTES_PROP, ABOVE_DOCS_PROP, BELOW_BYTES_PROP, BELOW_DOCS_PROP,
-        COLLECTIONS_PROP, MAX_OPS_PROP, SPLIT_METHOD_PROP, SPLIT_FUZZ_PROP);
+        COLLECTIONS_PROP, MAX_OPS_PROP, SPLIT_METHOD_PROP, SPLIT_FUZZ_PROP, SPLIT_BY_PREFIX);
   }
 
   @Override
@@ -176,11 +179,10 @@ public class IndexSizeTrigger extends TriggerBase {
     } catch (Exception e) {
       throw new TriggerValidationException(getName(), MAX_OPS_PROP, "invalid value: '" + maxOpsStr + "': " + e.getMessage());
     }
-    String methodStr = (String)properties.getOrDefault(CommonAdminParams.SPLIT_METHOD, SolrIndexSplitter.SplitMethod.LINK.toLower());
+    String methodStr = (String)properties.getOrDefault(SPLIT_METHOD_PROP, SolrIndexSplitter.SplitMethod.LINK.toLower());
     splitMethod = SolrIndexSplitter.SplitMethod.get(methodStr);
     if (splitMethod == null) {
-      throw new TriggerValidationException(getName(), SPLIT_METHOD_PROP, "Unknown value '" + CommonAdminParams.SPLIT_METHOD +
-          ": " + methodStr);
+      throw new TriggerValidationException(getName(), SPLIT_METHOD_PROP, "unrecognized value of: '" + methodStr + "'");
     }
     String fuzzStr = String.valueOf(properties.getOrDefault(SPLIT_FUZZ_PROP, 0.0f));
     try {
@@ -188,6 +190,19 @@ public class IndexSizeTrigger extends TriggerBase {
     } catch (Exception e) {
       throw new TriggerValidationException(getName(), SPLIT_FUZZ_PROP, "invalid value: '" + fuzzStr + "': " + e.getMessage());
     }
+    String splitByPrefixStr = String.valueOf(properties.getOrDefault(SPLIT_BY_PREFIX, false));
+    try {
+      splitByPrefix = getValidBool(splitByPrefixStr);
+    } catch (Exception e) {
+      throw new TriggerValidationException(getName(), SPLIT_BY_PREFIX, "invalid value: '" + splitByPrefixStr + "': " + e.getMessage());
+    }
+  }
+  
+  private boolean getValidBool(String str) throws Exception {
+    if (str != null && (str.toLowerCase(Locale.ROOT).equals("true") || str.toLowerCase(Locale.ROOT).equals("false"))) {
+      return Boolean.parseBoolean(str);
+    }
+    throw new IllegalArgumentException("Expected a valid boolean value but got " + str);
   }
 
   @Override
@@ -430,10 +445,11 @@ public class IndexSizeTrigger extends TriggerBase {
         TriggerEvent.Op op = new TriggerEvent.Op(aboveOp);
         op.addHint(Suggester.Hint.COLL_SHARD, new Pair<>(coll, r.getShard()));
         Map<String, Object> params = new HashMap<>();
-        params.put(CommonAdminParams.SPLIT_METHOD, splitMethod.toLower());
+        params.put(SPLIT_METHOD_PROP, splitMethod.toLower());
         if (splitFuzz > 0) {
-          params.put(CommonAdminParams.SPLIT_FUZZ, splitFuzz);
+          params.put(SPLIT_FUZZ_PROP, splitFuzz);
         }
+        params.put(SPLIT_BY_PREFIX, splitByPrefix);
         op.addHint(Suggester.Hint.PARAMS, params);
         ops.add(op);
         Long time = lastAboveEventMap.get(r.getCore());
diff --git a/solr/core/src/java/org/apache/solr/handler/admin/SplitOp.java b/solr/core/src/java/org/apache/solr/handler/admin/SplitOp.java
index d280b11..584f183 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/SplitOp.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/SplitOp.java
@@ -30,6 +30,7 @@ import org.apache.lucene.index.MultiTerms;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.StringHelper;
 import org.apache.solr.cloud.CloudDescriptor;
 import org.apache.solr.cloud.ZkShardTerms;
 import org.apache.solr.common.SolrException;
@@ -385,16 +386,7 @@ class SplitOp implements CoreAdminHandler.CoreAdminOp {
 
       // compare to current prefix bucket and see if this new term shares the same prefix
       if (term != null && term.length >= currPrefix.length && currPrefix.length > 0) {
-        int i = 0;
-        for (; i < currPrefix.length; i++) {
-          if (currPrefix.bytes[i] != term.bytes[term.offset + i]) {
-            break;
-          }
-        }
-
-        if (i == currPrefix.length) {
-          // prefix was the same (common-case fast path)
-          // int count = termsEnum.docFreq();
+        if (StringHelper.startsWith(term, currPrefix)) {
           bucketCount++;  // use 1 since we are dealing with unique ids
           continue;
         }
@@ -442,7 +434,7 @@ class SplitOp implements CoreAdminHandler.CoreAdminOp {
       }
     }
 
-    log.info("Split histogram from idField {}: ms={}, numBuckets={} sumBuckets={} numPrefixes={}numCollisions={}", idField, timer.getTime(), counts.size(), sumBuckets, numPrefixes, numCollisions);
+    log.info("Split histogram from idField {}: ms={}, numBuckets={} sumBuckets={} numPrefixes={} numCollisions={}", idField, timer.getTime(), counts.size(), sumBuckets, numPrefixes, numCollisions);
 
     return counts.values();
   }
diff --git a/solr/core/src/test/org/apache/solr/cloud/api/collections/SplitByPrefixTest.java b/solr/core/src/test/org/apache/solr/cloud/api/collections/SplitByPrefixTest.java
index ca2aefc..5ec53e5 100644
--- a/solr/core/src/test/org/apache/solr/cloud/api/collections/SplitByPrefixTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/api/collections/SplitByPrefixTest.java
@@ -40,10 +40,10 @@ import org.junit.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-//
-// This class tests higher level SPLITSHARD functionality when splitByPrefix is specified.
-// See SplitHandlerTest for random tests of lower-level split selection logic.
-//
+/** 
+ *  This class tests higher level SPLITSHARD functionality when splitByPrefix is specified.
+ *  See SplitHandlerTest for random tests of lower-level split selection logic.
+ */
 public class SplitByPrefixTest extends SolrCloudTestCase {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
@@ -90,7 +90,9 @@ public class SplitByPrefixTest extends SolrCloudTestCase {
     }
   }
 
-  // find prefixes (shard keys) matching certain criteria
+  /**
+   * find prefixes (shard keys) matching certain criteria
+   */
   public static List<Prefix> findPrefixes(int numToFind, int lowerBound, int upperBound) {
     CompositeIdRouter router = new CompositeIdRouter();
 
@@ -115,7 +117,9 @@ public class SplitByPrefixTest extends SolrCloudTestCase {
     return prefixes;
   }
 
-  // remove duplicate prefixes from the sorted prefix list
+  /**
+   * remove duplicate prefixes from the SORTED prefix list
+   */
   public static List<Prefix> removeDups(List<Prefix> prefixes) {
     ArrayList<Prefix> result = new ArrayList<>();
     Prefix last = null;
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/IndexSizeTriggerTest.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/IndexSizeTriggerTest.java
index 59c7d67..d54cf0b 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/IndexSizeTriggerTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/IndexSizeTriggerTest.java
@@ -64,6 +64,7 @@ import org.apache.solr.common.util.TimeSource;
 import org.apache.solr.common.util.Utils;
 import org.apache.solr.core.SolrResourceLoader;
 import org.apache.solr.metrics.SolrCoreMetricManager;
+import org.apache.solr.update.SolrIndexSplitter;
 import org.apache.solr.util.LogLevel;
 import org.junit.After;
 import org.junit.AfterClass;
@@ -155,7 +156,6 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
   }
 
   @Test
-  //@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028")
   public void testTrigger() throws Exception {
     String collectionName = "testTrigger_collection";
     CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
@@ -228,7 +228,11 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
         }
         Map<String, Object> params = (Map<String, Object>)op.getHints().get(Suggester.Hint.PARAMS);
         assertNotNull("params are null: " + op, params);
-        assertEquals("splitMethod: " + op, "link", params.get(CommonAdminParams.SPLIT_METHOD));
+        
+        // verify default split configs
+        assertEquals("splitMethod: " + op, SolrIndexSplitter.SplitMethod.LINK.toLower(),
+            params.get(CommonAdminParams.SPLIT_METHOD));
+        assertEquals("splitByPrefix: " + op, false, params.get(CommonAdminParams.SPLIT_BY_PREFIX));
       }
       assertTrue("shard1 should be split", shard1);
       assertTrue("shard2 should be split", shard2);
@@ -261,7 +265,6 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
   }
 
   @Test
-  //@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028")
   public void testSplitIntegration() throws Exception {
     String collectionName = "testSplitIntegration_collection";
     CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
@@ -385,7 +388,6 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
   }
 
   @Test
-  //@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028")
   public void testMergeIntegration() throws Exception {
     String collectionName = "testMergeIntegration_collection";
     CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
@@ -500,8 +502,6 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
   }
 
   @Test
-  //@BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 05-Jul-2018
-  //@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028")
   public void testMixedBounds() throws Exception {
     if (!realCluster) {
       log.info("This test doesn't work with a simulated cluster");
@@ -732,7 +732,6 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
     req = AutoScalingRequest.create(SolrRequest.METHOD.POST, suspendTriggerCommand);
     response = solrClient.request(req);
     assertEquals(response.get("result").toString(), "success");
-//    System.exit(-1);
 
     assertEquals(1, listenerEvents.size());
     events = listenerEvents.get("capturing4");
@@ -766,7 +765,6 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
   }
 
   @Test
-  //@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028")
   public void testMaxOps() throws Exception {
     String collectionName = "testMaxOps_collection";
     CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
@@ -908,9 +906,10 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
     assertEquals("number of ops: " + ops, 3, ops.size());
   }
 
+  //test that split parameters can be overridden
   @Test
-  public void testSplitMethodConfig() throws Exception {
-    String collectionName = "testSplitMethod_collection";
+  public void testSplitConfig() throws Exception {
+    String collectionName = "testSplitConfig_collection";
     CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
         "conf", 2, 2).setMaxShardsPerNode(2);
     create.process(solrClient);
@@ -919,7 +918,9 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
 
     long waitForSeconds = 3 + random().nextInt(5);
     Map<String, Object> props = createTriggerProps(waitForSeconds);
-    props.put(CommonAdminParams.SPLIT_METHOD, "link");
+    props.put(CommonAdminParams.SPLIT_METHOD, SolrIndexSplitter.SplitMethod.REWRITE.toLower());
+    props.put(IndexSizeTrigger.SPLIT_BY_PREFIX, true);
+    
     try (IndexSizeTrigger trigger = new IndexSizeTrigger("index_size_trigger6")) {
       trigger.configure(loader, cloudManager, props);
       trigger.init();
@@ -977,13 +978,41 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
         }
         Map<String, Object> params = (Map<String, Object>)op.getHints().get(Suggester.Hint.PARAMS);
         assertNotNull("params are null: " + op, params);
-        assertEquals("splitMethod: " + op, "link", params.get(CommonAdminParams.SPLIT_METHOD));
+        
+        // verify overrides for split config
+        assertEquals("splitMethod: " + op, SolrIndexSplitter.SplitMethod.REWRITE.toLower(),
+            params.get(CommonAdminParams.SPLIT_METHOD));
+        assertEquals("splitByPrefix: " + op, true, params.get(CommonAdminParams.SPLIT_BY_PREFIX));
       }
       assertTrue("shard1 should be split", shard1);
       assertTrue("shard2 should be split", shard2);
     }
 
   }
+  
+  //validates that trigger configuration will fail for invalid split configs
+  @Test
+  public void testInvalidSplitConfig() throws Exception {
+    long waitForSeconds = 3 + random().nextInt(5);
+    Map<String, Object> props = createTriggerProps(waitForSeconds);
+    props.put(IndexSizeTrigger.SPLIT_BY_PREFIX, "hello");
+
+    try (IndexSizeTrigger trigger = new IndexSizeTrigger("index_size_trigger7")) {
+      trigger.configure(loader, cloudManager, props);
+      fail("Trigger configuration should have failed with invalid property.");
+    } catch (TriggerValidationException e) {
+      assertTrue(e.getDetails().containsKey(IndexSizeTrigger.SPLIT_BY_PREFIX));
+    }
+
+    props.put(IndexSizeTrigger.SPLIT_BY_PREFIX, true);
+    props.put(CommonAdminParams.SPLIT_METHOD, "hello");
+    try (IndexSizeTrigger trigger = new IndexSizeTrigger("index_size_trigger8")) {
+      trigger.configure(loader, cloudManager, props);
+      fail("Trigger configuration should have failed with invalid property.");
+    } catch (TriggerValidationException e) {
+      assertTrue(e.getDetails().containsKey(IndexSizeTrigger.SPLIT_METHOD_PROP));
+    }
+  }
 
 
   @Test
diff --git a/solr/core/src/test/org/apache/solr/handler/admin/SplitHandlerTest.java b/solr/core/src/test/org/apache/solr/handler/admin/SplitHandlerTest.java
index 02174b7..dcfc749 100644
--- a/solr/core/src/test/org/apache/solr/handler/admin/SplitHandlerTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/admin/SplitHandlerTest.java
@@ -227,7 +227,7 @@ public class SplitHandlerTest extends SolrTestCaseJ4 {
   }
 
   @Test
-  public void testHistoramBuilding() throws Exception {
+  public void testHistogramBuilding() throws Exception {
     List<Prefix> prefixes = SplitByPrefixTest.findPrefixes(20, 0, 0x00ffffff);
     List<Prefix> uniquePrefixes = SplitByPrefixTest.removeDups(prefixes);
     assertTrue(prefixes.size() > uniquePrefixes.size());  // make sure we have some duplicates to test hash collisions
@@ -273,6 +273,10 @@ public class SplitHandlerTest extends SolrTestCaseJ4 {
   }
 
   private boolean eqCount(Collection<SplitOp.RangeCount> a, Collection<SplitOp.RangeCount> b) {
+    if (a.size() != b.size()) {
+      return false;
+    }
+    
     Iterator<SplitOp.RangeCount> it1 = a.iterator();
     Iterator<SplitOp.RangeCount> it2 = b.iterator();
     while (it1.hasNext()) {
diff --git a/solr/solr-ref-guide/src/solrcloud-autoscaling-triggers.adoc b/solr/solr-ref-guide/src/solrcloud-autoscaling-triggers.adoc
index 190381d..484f514 100644
--- a/solr/solr-ref-guide/src/solrcloud-autoscaling-triggers.adoc
+++ b/solr/solr-ref-guide/src/solrcloud-autoscaling-triggers.adoc
@@ -330,6 +330,11 @@ Non-zero values are useful for large indexes with aggressively growing size, as
 avalanches of split shard requests when the total size of the index
 reaches even multiples of the maximum shard size thresholds.
 
+`splitByPrefix`::
+A boolean value (default is false) that specifies whether the aboveOp shard split should try to
+calculate sub-shard hash ranges according to document prefixes, or do a traditional shard split (i.e.
+split the hash range into n sub-ranges).
+
 Events generated by this trigger contain additional details about the shards
 that exceeded thresholds and the types of violations (upper / lower bounds, bytes / docs metrics).
 
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/SplitShardSuggester.java b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/SplitShardSuggester.java
index 8aafef8..a244a10 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/SplitShardSuggester.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/SplitShardSuggester.java
@@ -56,6 +56,10 @@ class SplitShardSuggester extends Suggester {
     if (splitMethod != null) {
       req.setSplitMethod(splitMethod);
     }
+    Boolean splitByPrefix = (Boolean)params.get(CommonAdminParams.SPLIT_BY_PREFIX);
+    if (splitByPrefix != null) {
+      req.setSplitByPrefix(splitByPrefix);
+    }
     return req;
   }
 }