You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2019/12/10 13:21:46 UTC

[lucene-solr] branch master updated: SOLR-13563: SPLITSHARD using LINK method fails on disk usage checks.

This is an automated email from the ASF dual-hosted git repository.

ab pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/master by this push:
     new fed199d  SOLR-13563: SPLITSHARD using LINK method fails on disk usage checks.
fed199d is described below

commit fed199df7b3370b27f173d221e52c4c6983e8020
Author: Andrzej Bialecki <ab...@apache.org>
AuthorDate: Tue Dec 10 14:07:07 2019 +0100

    SOLR-13563: SPLITSHARD using LINK method fails on disk usage checks.
---
 solr/CHANGES.txt                                   |  2 ++
 .../solr/cloud/api/collections/SplitShardCmd.java  | 14 +++++++----
 .../autoscaling/sim/SimClusterStateProvider.java   |  8 +++++++
 .../solr/cloud/autoscaling/sim/SimScenario.java    | 27 ++++++++++++++++++----
 .../cloud/autoscaling/sim/TestSimScenario.java     | 25 ++++++++++++++++++++
 5 files changed, 66 insertions(+), 10 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 4aa670a..a65dc8e 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -221,6 +221,8 @@ Bug Fixes
 * SOLR-13966: LatLonPointSpatialField wasn't working correctly with atomic/partial updates or RTG.
   (Thomas Wöckinger via David Smiley)
 
+* SOLR-13563: SPLITSHARD using LINK method fails on disk usage checks. (Andrew Kettmann, ab)
+
 Other Changes
 ---------------------
 
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/SplitShardCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/SplitShardCmd.java
index 32329ff..e08c6e5 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/SplitShardCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/SplitShardCmd.java
@@ -153,7 +153,7 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
     }
 
     RTimerTree t = timings.sub("checkDiskSpace");
-    checkDiskSpace(collectionName, slice.get(), parentShardLeader);
+    checkDiskSpace(collectionName, slice.get(), parentShardLeader, splitMethod, ocmh.cloudManager);
     t.stop();
 
     // let's record the ephemeralOwner of the parent leader node
@@ -617,10 +617,12 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
       throw new SolrException(ErrorCode.SERVER_ERROR, msgOnError);
     }
   }
-  private void checkDiskSpace(String collection, String shard, Replica parentShardLeader) throws SolrException {
+
+  // public and static to facilitate reuse in the simulation framework and in tests
+  public static void checkDiskSpace(String collection, String shard, Replica parentShardLeader, SolrIndexSplitter.SplitMethod method, SolrCloudManager cloudManager) throws SolrException {
     // check that enough disk space is available on the parent leader node
     // otherwise the actual index splitting will always fail
-    NodeStateProvider nodeStateProvider = ocmh.cloudManager.getNodeStateProvider();
+    NodeStateProvider nodeStateProvider = cloudManager.getNodeStateProvider();
     Map<String, Object> nodeValues = nodeStateProvider.getNodeValues(parentShardLeader.getNodeName(),
         Collections.singletonList(ImplicitSnitch.DISK));
     Map<String, Map<String, List<ReplicaInfo>>> infos = nodeStateProvider.getReplicaInfo(parentShardLeader.getNodeName(),
@@ -648,9 +650,11 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
     if (freeSize == null) {
       throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "missing node disk space information for parent shard leader");
     }
-    if (freeSize.doubleValue() < 2.0 * indexSize) {
+    // 100% more for REWRITE, 5% more for LINK
+    double neededSpace = method == SolrIndexSplitter.SplitMethod.REWRITE ? 2.0 * indexSize : 1.05 * indexSize;
+    if (freeSize.doubleValue() < neededSpace) {
       throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "not enough free disk space to perform index split on node " +
-          parentShardLeader.getNodeName() + ", required: " + (2 * indexSize) + ", available: " + freeSize);
+          parentShardLeader.getNodeName() + ", required: " + neededSpace + ", available: " + freeSize);
     }
   }
 
diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java
index bb8c654..bf5aef1 100644
--- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java
+++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java
@@ -96,6 +96,7 @@ import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.Utils;
 import org.apache.solr.core.SolrInfoBean;
 import org.apache.solr.metrics.SolrMetricManager;
+import org.apache.solr.update.SolrIndexSplitter;
 import org.apache.zookeeper.CreateMode;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -1275,6 +1276,12 @@ public class SimClusterStateProvider implements ClusterStateProvider {
     AtomicReference<String> sliceName = new AtomicReference<>();
     sliceName.set(message.getStr(SHARD_ID_PROP));
     String splitKey = message.getStr("split.key");
+    String methodStr = message.getStr(CommonAdminParams.SPLIT_METHOD, SolrIndexSplitter.SplitMethod.REWRITE.toLower());
+    SolrIndexSplitter.SplitMethod splitMethod = SolrIndexSplitter.SplitMethod.get(methodStr);
+    if (splitMethod == null) {
+      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown value '" + CommonAdminParams.SPLIT_METHOD +
+          ": " + methodStr);
+    }
 
     ClusterState clusterState = getClusterState();
     DocCollection collection = clusterState.getCollection(collectionName);
@@ -1285,6 +1292,7 @@ public class SimClusterStateProvider implements ClusterStateProvider {
       throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Shard " + collectionName +
           " /  " + sliceName.get() + " has no leader and can't be split");
     }
+    SplitShardCmd.checkDiskSpace(collectionName, sliceName.get(), leader, splitMethod, cloudManager);
     SplitShardCmd.lockForSplit(cloudManager, collectionName, sliceName.get());
     // start counting buffered updates
     Map<String, Object> props = sliceProperties.computeIfAbsent(collectionName, c -> new ConcurrentHashMap<>())
diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimScenario.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimScenario.java
index 187c66a..40e8716 100644
--- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimScenario.java
+++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimScenario.java
@@ -788,7 +788,19 @@ public class SimScenario implements AutoCloseable {
       Map<String, Object> values = new HashMap<>();
       params.remove(Clause.NODESET);
       for (String key : params.getParameterNames()) {
-        values.put(key, params.get(key));
+        String strVal = params.get(key);
+        Object val;
+        // try auto-converting to a number
+        try {
+          val = Long.parseLong(strVal);
+        } catch (NumberFormatException nfe) {
+          try {
+            val = Double.parseDouble(strVal);
+          } catch (NumberFormatException nfe1) {
+            val = strVal;
+          }
+        }
+        values.put(key, val);
       }
       for (String node : nodes) {
         scenario.cluster.getSimNodeStateProvider().simSetNodeValues(node, values);
@@ -812,11 +824,16 @@ public class SimScenario implements AutoCloseable {
       for (String key : params.getParameterNames()) {
         // try guessing if it's a number
         try {
-          Double d = Double.valueOf(params.get(key));
-          values.put(key, d);
+          Integer i = Integer.valueOf(params.get(key));
+          values.put(key, i);
         } catch (NumberFormatException nfe) {
-          // not a number
-          values.put(key, params.get(key));
+          try {
+            Double d = Double.valueOf(params.get(key));
+            values.put(key, d);
+          } catch (NumberFormatException nfe1) {
+            // not a number
+            values.put(key, params.get(key));
+          }
         }
       }
       values.forEach((k, v) -> {
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimScenario.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimScenario.java
index 6f00151..2a0e6c5 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimScenario.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimScenario.java
@@ -138,4 +138,29 @@ public class TestSimScenario extends SimSolrCloudTestCase {
       scenario.run();
     }
   }
+
+  String splitShardScenario =
+      "create_cluster numNodes=2\n" +
+          "solr_request /admin/collections?action=CREATE&name=testCollection&numShards=2&replicationFactor=2&maxShardsPerNode=5\n" +
+          "wait_collection collection=testCollection&shards=2&replicas=2\n" +
+          "set_shard_metrics collection=testCollection&shard=shard1&INDEX.sizeInBytes=1000000000\n" +
+          "set_node_metrics nodeset=#ANY&freedisk=1.5\n" +
+          "solr_request /admin/collection?action=SPLITSHARD&collection=testCollection&shard=shard1&splitMethod=${method}\n" +
+          "wait_collection collection=testCollection&shards=4&&withInactive=true&replicas=2&requireLeaders=true\n"
+      ;
+  @Test
+  public void testSplitShard() throws Exception {
+    try (SimScenario scenario = SimScenario.load(splitShardScenario)) {
+      scenario.context.put("method", "REWRITE");
+      scenario.run();
+    } catch (Exception e) {
+      assertTrue(e.toString(), e.toString().contains("not enough free disk"));
+    }
+    try (SimScenario scenario = SimScenario.load(splitShardScenario)) {
+      scenario.context.put("method", "LINK");
+      scenario.run();
+    } catch (Exception e) {
+      fail("should have succeeded with method LINK, but failed: " + e.toString());
+    }
+  }
 }