You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2019/12/10 13:21:46 UTC
[lucene-solr] branch master updated: SOLR-13563: SPLITSHARD using
LINK method fails on disk usage checks.
This is an automated email from the ASF dual-hosted git repository.
ab pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/master by this push:
new fed199d SOLR-13563: SPLITSHARD using LINK method fails on disk usage checks.
fed199d is described below
commit fed199df7b3370b27f173d221e52c4c6983e8020
Author: Andrzej Bialecki <ab...@apache.org>
AuthorDate: Tue Dec 10 14:07:07 2019 +0100
SOLR-13563: SPLITSHARD using LINK method fails on disk usage checks.
---
solr/CHANGES.txt | 2 ++
.../solr/cloud/api/collections/SplitShardCmd.java | 14 +++++++----
.../autoscaling/sim/SimClusterStateProvider.java | 8 +++++++
.../solr/cloud/autoscaling/sim/SimScenario.java | 27 ++++++++++++++++++----
.../cloud/autoscaling/sim/TestSimScenario.java | 25 ++++++++++++++++++++
5 files changed, 66 insertions(+), 10 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 4aa670a..a65dc8e 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -221,6 +221,8 @@ Bug Fixes
* SOLR-13966: LatLonPointSpatialField wasn't working correctly with atomic/partial updates or RTG.
(Thomas Wöckinger via David Smiley)
+* SOLR-13563: SPLITSHARD using LINK method fails on disk usage checks. (Andrew Kettmann, ab)
+
Other Changes
---------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/SplitShardCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/SplitShardCmd.java
index 32329ff..e08c6e5 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/SplitShardCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/SplitShardCmd.java
@@ -153,7 +153,7 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
}
RTimerTree t = timings.sub("checkDiskSpace");
- checkDiskSpace(collectionName, slice.get(), parentShardLeader);
+ checkDiskSpace(collectionName, slice.get(), parentShardLeader, splitMethod, ocmh.cloudManager);
t.stop();
// let's record the ephemeralOwner of the parent leader node
@@ -617,10 +617,12 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
throw new SolrException(ErrorCode.SERVER_ERROR, msgOnError);
}
}
- private void checkDiskSpace(String collection, String shard, Replica parentShardLeader) throws SolrException {
+
+ // public and static to facilitate reuse in the simulation framework and in tests
+ public static void checkDiskSpace(String collection, String shard, Replica parentShardLeader, SolrIndexSplitter.SplitMethod method, SolrCloudManager cloudManager) throws SolrException {
// check that enough disk space is available on the parent leader node
// otherwise the actual index splitting will always fail
- NodeStateProvider nodeStateProvider = ocmh.cloudManager.getNodeStateProvider();
+ NodeStateProvider nodeStateProvider = cloudManager.getNodeStateProvider();
Map<String, Object> nodeValues = nodeStateProvider.getNodeValues(parentShardLeader.getNodeName(),
Collections.singletonList(ImplicitSnitch.DISK));
Map<String, Map<String, List<ReplicaInfo>>> infos = nodeStateProvider.getReplicaInfo(parentShardLeader.getNodeName(),
@@ -648,9 +650,11 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
if (freeSize == null) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "missing node disk space information for parent shard leader");
}
- if (freeSize.doubleValue() < 2.0 * indexSize) {
+ // 100% more for REWRITE, 5% more for LINK
+ double neededSpace = method == SolrIndexSplitter.SplitMethod.REWRITE ? 2.0 * indexSize : 1.05 * indexSize;
+ if (freeSize.doubleValue() < neededSpace) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "not enough free disk space to perform index split on node " +
- parentShardLeader.getNodeName() + ", required: " + (2 * indexSize) + ", available: " + freeSize);
+ parentShardLeader.getNodeName() + ", required: " + neededSpace + ", available: " + freeSize);
}
}
diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java
index bb8c654..bf5aef1 100644
--- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java
+++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java
@@ -96,6 +96,7 @@ import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.Utils;
import org.apache.solr.core.SolrInfoBean;
import org.apache.solr.metrics.SolrMetricManager;
+import org.apache.solr.update.SolrIndexSplitter;
import org.apache.zookeeper.CreateMode;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -1275,6 +1276,12 @@ public class SimClusterStateProvider implements ClusterStateProvider {
AtomicReference<String> sliceName = new AtomicReference<>();
sliceName.set(message.getStr(SHARD_ID_PROP));
String splitKey = message.getStr("split.key");
+ String methodStr = message.getStr(CommonAdminParams.SPLIT_METHOD, SolrIndexSplitter.SplitMethod.REWRITE.toLower());
+ SolrIndexSplitter.SplitMethod splitMethod = SolrIndexSplitter.SplitMethod.get(methodStr);
+ if (splitMethod == null) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown value '" + CommonAdminParams.SPLIT_METHOD +
+ ": " + methodStr);
+ }
ClusterState clusterState = getClusterState();
DocCollection collection = clusterState.getCollection(collectionName);
@@ -1285,6 +1292,7 @@ public class SimClusterStateProvider implements ClusterStateProvider {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Shard " + collectionName +
" / " + sliceName.get() + " has no leader and can't be split");
}
+ SplitShardCmd.checkDiskSpace(collectionName, sliceName.get(), leader, splitMethod, cloudManager);
SplitShardCmd.lockForSplit(cloudManager, collectionName, sliceName.get());
// start counting buffered updates
Map<String, Object> props = sliceProperties.computeIfAbsent(collectionName, c -> new ConcurrentHashMap<>())
diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimScenario.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimScenario.java
index 187c66a..40e8716 100644
--- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimScenario.java
+++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimScenario.java
@@ -788,7 +788,19 @@ public class SimScenario implements AutoCloseable {
Map<String, Object> values = new HashMap<>();
params.remove(Clause.NODESET);
for (String key : params.getParameterNames()) {
- values.put(key, params.get(key));
+ String strVal = params.get(key);
+ Object val;
+ // try auto-converting to a number
+ try {
+ val = Long.parseLong(strVal);
+ } catch (NumberFormatException nfe) {
+ try {
+ val = Double.parseDouble(strVal);
+ } catch (NumberFormatException nfe1) {
+ val = strVal;
+ }
+ }
+ values.put(key, val);
}
for (String node : nodes) {
scenario.cluster.getSimNodeStateProvider().simSetNodeValues(node, values);
@@ -812,11 +824,16 @@ public class SimScenario implements AutoCloseable {
for (String key : params.getParameterNames()) {
// try guessing if it's a number
try {
- Double d = Double.valueOf(params.get(key));
- values.put(key, d);
+ Integer i = Integer.valueOf(params.get(key));
+ values.put(key, i);
} catch (NumberFormatException nfe) {
- // not a number
- values.put(key, params.get(key));
+ try {
+ Double d = Double.valueOf(params.get(key));
+ values.put(key, d);
+ } catch (NumberFormatException nfe1) {
+ // not a number
+ values.put(key, params.get(key));
+ }
}
}
values.forEach((k, v) -> {
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimScenario.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimScenario.java
index 6f00151..2a0e6c5 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimScenario.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimScenario.java
@@ -138,4 +138,29 @@ public class TestSimScenario extends SimSolrCloudTestCase {
scenario.run();
}
}
+
+ String splitShardScenario =
+ "create_cluster numNodes=2\n" +
+ "solr_request /admin/collections?action=CREATE&name=testCollection&numShards=2&replicationFactor=2&maxShardsPerNode=5\n" +
+ "wait_collection collection=testCollection&shards=2&replicas=2\n" +
+ "set_shard_metrics collection=testCollection&shard=shard1&INDEX.sizeInBytes=1000000000\n" +
+ "set_node_metrics nodeset=#ANY&freedisk=1.5\n" +
+ "solr_request /admin/collection?action=SPLITSHARD&collection=testCollection&shard=shard1&splitMethod=${method}\n" +
+ "wait_collection collection=testCollection&shards=4&&withInactive=true&replicas=2&requireLeaders=true\n"
+ ;
+ @Test
+ public void testSplitShard() throws Exception {
+ try (SimScenario scenario = SimScenario.load(splitShardScenario)) {
+ scenario.context.put("method", "REWRITE");
+ scenario.run();
+ } catch (Exception e) {
+ assertTrue(e.toString(), e.toString().contains("not enough free disk"));
+ }
+ try (SimScenario scenario = SimScenario.load(splitShardScenario)) {
+ scenario.context.put("method", "LINK");
+ scenario.run();
+ } catch (Exception e) {
+ fail("should have succeeded with method LINK, but failed: " + e.toString());
+ }
+ }
}