You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ho...@apache.org on 2016/03/22 17:40:46 UTC
[1/5] lucene-solr:jira/SOLR-445: SOLR-445: remove nocommits related
to OOM trapping since SOLR-8539 has concluded that this isn't a thing the
java code actually needs to be defensive of
Repository: lucene-solr
Updated Branches:
refs/heads/jira/SOLR-445 21c0fe690 -> cc2cd23ca
SOLR-445: remove nocommits related to OOM trapping since SOLR-8539 has concluded that this isn't a thing the java code actually needs to be defensive of
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/fe54da0b
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/fe54da0b
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/fe54da0b
Branch: refs/heads/jira/SOLR-445
Commit: fe54da0b58ed18a38f3dd436dd3f30fbee9acbbf
Parents: 21c0fe6
Author: Chris Hostetter <ho...@apache.org>
Authored: Mon Mar 21 10:53:43 2016 -0700
Committer: Chris Hostetter <ho...@apache.org>
Committed: Mon Mar 21 10:54:19 2016 -0700
----------------------------------------------------------------------
.../solr/update/processor/TolerantUpdateProcessor.java | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/fe54da0b/solr/core/src/java/org/apache/solr/update/processor/TolerantUpdateProcessor.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/update/processor/TolerantUpdateProcessor.java b/solr/core/src/java/org/apache/solr/update/processor/TolerantUpdateProcessor.java
index 9f9ff5e..79573c9 100644
--- a/solr/core/src/java/org/apache/solr/update/processor/TolerantUpdateProcessor.java
+++ b/solr/core/src/java/org/apache/solr/update/processor/TolerantUpdateProcessor.java
@@ -156,7 +156,7 @@ public class TolerantUpdateProcessor extends UpdateRequestProcessor {
super.processAdd(cmd);
- } catch (Throwable t) { // nocommit: OOM trap
+ } catch (Throwable t) {
firstErrTracker.caught(t);
if (isLeader || distribPhase.equals(DistribPhase.NONE)) {
@@ -188,7 +188,7 @@ public class TolerantUpdateProcessor extends UpdateRequestProcessor {
super.processDelete(cmd);
- } catch (Throwable t) { // nocommit: OOM trap
+ } catch (Throwable t) {
firstErrTracker.caught(t);
ToleratedUpdateError err = new ToleratedUpdateError(cmd.isDeleteById() ? CmdType.DELID : CmdType.DELQ,
@@ -214,7 +214,7 @@ public class TolerantUpdateProcessor extends UpdateRequestProcessor {
public void processMergeIndexes(MergeIndexesCommand cmd) throws IOException {
try {
super.processMergeIndexes(cmd);
- } catch (Throwable t) { // nocommit: OOM trap
+ } catch (Throwable t) {
// we're not tolerante of errors from this type of command, but we
// do need to track it so we can annotate it with any other errors we were allready tolerant of
firstErrTracker.caught(t);
@@ -226,7 +226,7 @@ public class TolerantUpdateProcessor extends UpdateRequestProcessor {
public void processCommit(CommitUpdateCommand cmd) throws IOException {
try {
super.processCommit(cmd);
- } catch (Throwable t) { // nocommit: OOM trap
+ } catch (Throwable t) {
// we're not tolerante of errors from this type of command, but we
// do need to track it so we can annotate it with any other errors we were allready tolerant of
firstErrTracker.caught(t);
@@ -238,7 +238,7 @@ public class TolerantUpdateProcessor extends UpdateRequestProcessor {
public void processRollback(RollbackUpdateCommand cmd) throws IOException {
try {
super.processRollback(cmd);
- } catch (Throwable t) { // nocommit: OOM trap
+ } catch (Throwable t) {
// we're not tolerante of errors from this type of command, but we
// do need to track it so we can annotate it with any other errors we were allready tolerant of
firstErrTracker.caught(t);
@@ -370,7 +370,7 @@ public class TolerantUpdateProcessor extends UpdateRequestProcessor {
* even if you are going to ignore it (for now). If you plan to rethrow the Exception, use
* {@link #throwFirst} instead.
*/
- public void caught(Throwable t) { // nocommit: switch to just Exception?
+ public void caught(Throwable t) {
assert null != t;
if (null == first) {
if (t instanceof SolrException) {
[5/5] lucene-solr:jira/SOLR-445: SOLR-445: cloud test & bug fix for
docs missing their uniqueKey field
Posted by ho...@apache.org.
SOLR-445: cloud test & bug fix for docs missing their uniqueKey field
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/cc2cd23c
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/cc2cd23c
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/cc2cd23c
Branch: refs/heads/jira/SOLR-445
Commit: cc2cd23ca2537324dc7e4afe6a29605bbf9f1cb8
Parents: b6be74f
Author: Chris Hostetter <ho...@apache.org>
Authored: Tue Mar 22 09:25:33 2016 -0700
Committer: Chris Hostetter <ho...@apache.org>
Committed: Tue Mar 22 09:25:33 2016 -0700
----------------------------------------------------------------------
.../processor/TolerantUpdateProcessor.java | 4 +-
.../cloud/TestTolerantUpdateProcessorCloud.java | 91 ++++++++++++++++++++
2 files changed, 94 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/cc2cd23c/solr/core/src/java/org/apache/solr/update/processor/TolerantUpdateProcessor.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/update/processor/TolerantUpdateProcessor.java b/solr/core/src/java/org/apache/solr/update/processor/TolerantUpdateProcessor.java
index 79573c9..316a8d0 100644
--- a/solr/core/src/java/org/apache/solr/update/processor/TolerantUpdateProcessor.java
+++ b/solr/core/src/java/org/apache/solr/update/processor/TolerantUpdateProcessor.java
@@ -147,12 +147,14 @@ public class TolerantUpdateProcessor extends UpdateRequestProcessor {
@Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
- boolean isLeader = isLeader(cmd); // nocommit: is this needed? see below...
+ boolean isLeader = true; // set below during 'try' // nocommit: is this var really needed (see below)
BytesRef id = null;
try {
// force AddUpdateCommand to validate+cache the id before proceeding
id = cmd.getIndexedId();
+ // if the id is missing from doc, act like we're the leader, let downstream throw error
+ isLeader = (null == id) || isLeader(cmd); // nocommit: is this needed? see below...
super.processAdd(cmd);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/cc2cd23c/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorCloud.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorCloud.java b/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorCloud.java
index 48c81de..236213e 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorCloud.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorCloud.java
@@ -493,6 +493,38 @@ public class TestTolerantUpdateProcessorCloud extends SolrCloudTestCase {
// clean slate
assertEquals(0, client.deleteByQuery("*:*").getStatus());
+
+ // many docs from diff shards, 1 from each shard should fail and 1 w/o uniqueKey
+
+ rsp = update(params("update.chain", "tolerant-chain-max-errors-10",
+ "commit", "true"),
+ doc(f("id", S_ONE_PRE + "11")),
+ doc(f("id", S_TWO_PRE + "21")),
+ doc(f("id", S_ONE_PRE + "12")),
+ doc(f("id", S_TWO_PRE + "22"), f("foo_i", "bogus_val")),
+ doc(f("id", S_ONE_PRE + "13")),
+ doc(f("id", S_TWO_PRE + "23")),
+ doc(f("foo_i", "42")), // no "id"
+ doc(f("id", S_ONE_PRE + "14")),
+ doc(f("id", S_TWO_PRE + "24")),
+ doc(f("id", S_ONE_PRE + "15"), f("foo_i", "bogus_val")),
+ doc(f("id", S_TWO_PRE + "25")),
+ doc(f("id", S_ONE_PRE + "16")),
+ doc(f("id", S_TWO_PRE + "26"))).process(client);
+
+ assertEquals(0, rsp.getStatus());
+ assertUpdateTolerantAddErrors("many docs, 1 from each shard (+ no id) should fail", rsp,
+ S_ONE_PRE + "15",
+ "(unknown)",
+ S_TWO_PRE + "22");
+ assertQueryDocIds(client, false, S_TWO_PRE + "22", S_ONE_PRE + "15");
+ assertQueryDocIds(client, true,
+ S_ONE_PRE + "11", S_TWO_PRE + "21", S_ONE_PRE + "12",
+ S_ONE_PRE + "13", S_TWO_PRE + "23", S_ONE_PRE + "14", S_TWO_PRE + "24",
+ S_TWO_PRE + "25", S_ONE_PRE + "16", S_TWO_PRE + "26");
+
+ // clean slate
+ assertEquals(0, client.deleteByQuery("*:*").getStatus());
// many docs from diff shards, more then 10 (total) should fail
@@ -652,6 +684,65 @@ public class TestTolerantUpdateProcessorCloud extends SolrCloudTestCase {
// , S_ONE_PRE + "x", S_TWO_PRE + "x", // skipped
);
+ // clean slate
+ assertEquals(0, client.deleteByQuery("*:*").getStatus());
+
+ // many docs from diff shards, more then 10 don't have any uniqueKey specified
+
+ try {
+ ArrayList<SolrInputDocument> docs = new ArrayList<SolrInputDocument>(30);
+ docs.add(doc(f("id", S_ONE_PRE + "z")));
+ docs.add(doc(f("id", S_TWO_PRE + "z")));
+ docs.add(doc(f("id", S_ONE_PRE + "y")));
+ docs.add(doc(f("id", S_TWO_PRE + "y")));
+ for (int i = 0; i < 11; i++) {
+ // no "id" field
+ docs.add(doc(f("foo_i", "" + i)));
+ }
+ docs.add(doc(f("id", S_ONE_PRE + "x"))); // may be skipped, more then 10 fails
+ docs.add(doc(f("id", S_TWO_PRE + "x"))); // may be skipped, more then 10 fails
+
+ rsp = update(params("update.chain", "tolerant-chain-max-errors-10",
+ "commit", "true"),
+ docs.toArray(new SolrInputDocument[docs.size()])).process(client);
+
+ fail("did not get a top level exception when more then 10 docs mising uniqueKey: " + rsp.toString());
+ } catch (SolrException e) {
+ // we can't make any reliable assertions about the error message, because
+ // it varies based on how the request was routed -- see SOLR-8830
+ assertEquals("not the type of error we were expecting ("+e.code()+"): " + e.toString(),
+ // NOTE: we always expect a 400 because we know that's what we would get from these types of errors
+ // on a single node setup -- a 5xx type error isn't something we should have triggered
+ 400, e.code());
+
+ // verify that the Exceptions metadata can tell us what failed.
+ NamedList<String> remoteErrMetadata = e.getMetadata();
+ assertNotNull("no metadata in: " + e.toString(), remoteErrMetadata);
+ int actualKnownErrsCount = 0;
+ for (int i = 0; i < remoteErrMetadata.size(); i++) {
+ ToleratedUpdateError err =
+ ToleratedUpdateError.parseMetadataIfToleratedUpdateError(remoteErrMetadata.getName(i),
+ remoteErrMetadata.getVal(i));
+ if (null == err) {
+ // some metadata unrelated to this update processor
+ continue;
+ }
+ actualKnownErrsCount++;
+ assertEquals("only expected type of error is ADD: " + err,
+ CmdType.ADD, err.getType());
+ assertTrue("failed id didn't match 'unknown': " + err,
+ err.getId().contains("unknown"));
+ }
+ assertEquals("wrong number of errors in metadata: " + remoteErrMetadata.toString(),
+ 11, actualKnownErrsCount);
+ }
+ assertEquals(0, client.commit().getStatus()); // need to force since update didn't finish
+ assertQueryDocIds(client, true
+ , S_ONE_PRE + "z", S_ONE_PRE + "y", S_TWO_PRE + "z", S_TWO_PRE + "y" // first
+ // // we can't assert for sure these docs were skipped or added
+ // // depending on shard we hit, they may have been added async before errors were exceeded
+ // , S_ONE_PRE + "x", S_TWO_PRE + "x" // skipped
+ );
}
//
[3/5] lucene-solr:jira/SOLR-445: SOLR-8881: replace nocommits with
doc note and link to jira
Posted by ho...@apache.org.
SOLR-8881: replace nocommits with doc note and link to jira
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/c740e696
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/c740e696
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/c740e696
Branch: refs/heads/jira/SOLR-445
Commit: c740e69622f3c0295498f02e76e42af6341ba333
Parents: 5d93384
Author: Chris Hostetter <ho...@apache.org>
Authored: Mon Mar 21 16:36:58 2016 -0700
Committer: Chris Hostetter <ho...@apache.org>
Committed: Mon Mar 21 16:36:58 2016 -0700
----------------------------------------------------------------------
.../update/processor/TolerantUpdateProcessorFactory.java | 7 +++++++
.../solr/cloud/TestTolerantUpdateProcessorCloud.java | 10 ----------
2 files changed, 7 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c740e696/solr/core/src/java/org/apache/solr/update/processor/TolerantUpdateProcessorFactory.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/update/processor/TolerantUpdateProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/TolerantUpdateProcessorFactory.java
index 35ca63b..e7f5653 100644
--- a/solr/core/src/java/org/apache/solr/update/processor/TolerantUpdateProcessorFactory.java
+++ b/solr/core/src/java/org/apache/solr/update/processor/TolerantUpdateProcessorFactory.java
@@ -69,6 +69,13 @@ import static org.apache.solr.update.processor.DistributingUpdateProcessorFactor
* curl http://localhost:8983/update?update.chain=tolerant-chain&maxErrors=100 -H "Content-Type: text/xml" -d @myfile.xml
* </pre>
*
+ * <p>
+ * <b>NOTE:</b> The behavior of this UpdateProcessofFactory in conjunction with indexing operations
+ * while a Shard Split is actively in progress is not well defined (or sufficiently tested). Users
+ * of this update processor are encouraged to either disable it, or pause updates, while any shard
+ * splitting is in progress (see <a href="https://issues.apache.org/jira/browse/SOLR-8881">SOLR-8881</a>
+ * for more details.)
+ * </p>
*/
public class TolerantUpdateProcessorFactory extends UpdateRequestProcessorFactory
implements SolrCoreAware, UpdateRequestProcessorFactory.RunAlways {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c740e696/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorCloud.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorCloud.java b/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorCloud.java
index 41ff4af..3c105c4 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorCloud.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorCloud.java
@@ -69,16 +69,6 @@ import org.slf4j.LoggerFactory;
* and assumes that the state of the cluster is healthy.
* </p>
*
- *
- * nocommit: what about shard splitting and "sub shard leaders" ? ...
- * (no idea if/how that affects things, but i notice lots of logic in DistributedUpdateProcessor along
- * the lines of "if (isLeader || isSubShardLeader)" and "if (!isLeader) { if (subShardLeader) {..."
- * which makes me worry that we may need explict testing of "tolerant" behavior when updates are routed
- * to subshards and then fail?
- *
- * nocommit: once these tests are passing reliably, we should also have a fully randomized sibling test...
- * - randomized # nodes, shards, replicas
- * - random updates contain rand # of docs with rand # failures to a random client
*/
public class TestTolerantUpdateProcessorCloud extends SolrCloudTestCase {
[2/5] lucene-solr:jira/SOLR-445: SOLR-445: fix exception msg when
CloudSolrClient does async updates that (cumulatively) exceed maxErrors
Posted by ho...@apache.org.
SOLR-445: fix exception msg when CloudSolrClient does async updates that (cumulatively) exceed maxErrors
I initially thought it would make sense to refactor DistributedUpdatesAsyncException into solr-common and re-use it here, but when i started down that path i realized it didn't make any sense since there aren't actual exceptions to wrap client side.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/5d93384e
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/5d93384e
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/5d93384e
Branch: refs/heads/jira/SOLR-445
Commit: 5d93384e724b6f611270e212a4f9bd5b00c38e85
Parents: fe54da0
Author: Chris Hostetter <ho...@apache.org>
Authored: Mon Mar 21 14:36:12 2016 -0700
Committer: Chris Hostetter <ho...@apache.org>
Committed: Mon Mar 21 14:36:12 2016 -0700
----------------------------------------------------------------------
.../solr/client/solrj/impl/CloudSolrClient.java | 14 +++++++++++---
1 file changed, 11 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5d93384e/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrClient.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrClient.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrClient.java
index 37cee8e..edfe1c3 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrClient.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrClient.java
@@ -781,14 +781,22 @@ public class CloudSolrClient extends SolrClient {
if (maxToleratedErrors < toleratedErrors.size()) {
// cumulative errors are too high, we need to throw a client exception w/correct metadata
- // nocommit: refactor & reuse DistributedUpdatesAsyncException
+ // NOTE: it shouldn't be possible for 1 == toleratedErrors.size(), because if that were the case
+ // then at least one shard should have thrown a real error before this, so we don't worry
+ // about having a more "singular" exception msg for that situation
+ StringBuilder msgBuf = new StringBuilder()
+ .append(toleratedErrors.size()).append(" Async failures during distributed update: ");
+
NamedList metadata = new NamedList<String>();
- SolrException toThrow = new SolrException(ErrorCode.BAD_REQUEST, "nocommit: better msg from DUAE");
- toThrow.setMetadata(metadata);
for (SimpleOrderedMap<String> err : toleratedErrors) {
ToleratedUpdateError te = ToleratedUpdateError.parseMap(err);
metadata.add(te.getMetadataKey(), te.getMetadataValue());
+
+ msgBuf.append("\n").append(te.getMessage());
}
+
+ SolrException toThrow = new SolrException(ErrorCode.BAD_REQUEST, msgBuf.toString());
+ toThrow.setMetadata(metadata);
throw toThrow;
}
}
[4/5] lucene-solr:jira/SOLR-445: SOLR-8862 work around. Maybe
something like this should be promoted into MiniSolrCloudCluster's start()
method? or SolrCloudTestCase's configureCluster?
Posted by ho...@apache.org.
SOLR-8862 work around. Maybe something like this should be promoted into MiniSolrCloudCluster's start() method? or SolrCloudTestCase's configureCluster?
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/b6be74f2
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/b6be74f2
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/b6be74f2
Branch: refs/heads/jira/SOLR-445
Commit: b6be74f2182c46a10f861556ea81d3ed1a79a308
Parents: c740e69
Author: Chris Hostetter <ho...@apache.org>
Authored: Mon Mar 21 18:34:12 2016 -0700
Committer: Chris Hostetter <ho...@apache.org>
Committed: Mon Mar 21 18:34:12 2016 -0700
----------------------------------------------------------------------
.../cloud/TestTolerantUpdateProcessorCloud.java | 35 ++++++++++++++++++++
.../TestTolerantUpdateProcessorRandomCloud.java | 4 +--
2 files changed, 37 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b6be74f2/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorCloud.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorCloud.java b/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorCloud.java
index 3c105c4..48c81de 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorCloud.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorCloud.java
@@ -113,6 +113,7 @@ public class TestTolerantUpdateProcessorCloud extends SolrCloudTestCase {
configureCluster(NUM_SERVERS)
.addConfig(configName, configDir.toPath())
.configure();
+ assertSpinLoopAllJettyAreRunning(cluster);
Map<String, String> collectionProperties = new HashMap<>();
collectionProperties.put("config", "solrconfig-distrib-update-processor-chains.xml");
@@ -764,6 +765,40 @@ public class TestTolerantUpdateProcessorCloud extends SolrCloudTestCase {
}
+ /**
+ * HACK: Loops over every Jetty instance in the specified MiniSolrCloudCluster to see if they are running,
+ * and sleeps small increments until they all report that they are, or a max num iters is reached
+ *
+ * (work around for SOLR-8862. Maybe something like this should be promoted into MiniSolrCloudCluster's
+ * start() method? or SolrCloudTestCase's configureCluster?)
+ */
+ public static void assertSpinLoopAllJettyAreRunning(MiniSolrCloudCluster cluster) throws InterruptedException {
+ // NOTE: idealy we could use an ExecutorService that tried to open Sockets (with a long timeout)
+ // to each of the jetty instances in parallel w/o any sleeping -- but since they pick their ports
+ // dynamically and don't report them until/unless the server is up, that won't neccessarily do us
+ // any good.
+ final int numServers = cluster.getJettySolrRunners().size();
+ int numRunning = 0;
+ for (int i = 5; 0 <= i; i--) {
+ numRunning = 0;
+ for (JettySolrRunner jetty : cluster.getJettySolrRunners()) {
+ if (jetty.isRunning()) {
+ numRunning++;
+ }
+ }
+ if (numServers == numRunning) {
+ return;
+ } else if (0 == i) {
+ // give up
+ break;
+ }
+ // the more nodes we're waiting on, the longer we should try to sleep (within reason)
+ Thread.sleep(Math.min((numServers - numRunning) * 100, 1000));
+ }
+ assertEquals("giving up waiting for all jetty instances to be running",
+ numServers, numRunning);
+ }
+
/** Asserts that the UpdateResponse contains the specified expectedErrs and no others */
public static void assertUpdateTolerantErrors(String assertionMsgPrefix,
UpdateResponse response,
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b6be74f2/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorRandomCloud.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorRandomCloud.java b/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorRandomCloud.java
index 536bb89..6e5daeb 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorRandomCloud.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorRandomCloud.java
@@ -104,8 +104,8 @@ public class TestTolerantUpdateProcessorRandomCloud extends SolrCloudTestCase {
configureCluster(numServers)
.addConfig(configName, configDir.toPath())
.configure();
-
- Thread.sleep(2000); // anoying attempt to work arround SOLR-8862 // nocommit ? ? ?
+
+ TestTolerantUpdateProcessorCloud.assertSpinLoopAllJettyAreRunning(cluster);
Map<String, String> collectionProperties = new HashMap<>();
collectionProperties.put("config", "solrconfig-distrib-update-processor-chains.xml");