You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2020/09/01 20:33:17 UTC
[lucene-solr] 01/02: @678 Work on debugging
testRetryUpdatesWhenClusterStateIsStale.
This is an automated email from the ASF dual-hosted git repository.
markrmiller pushed a commit to branch reference_impl_dev
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
commit ac8acdcd9aba5d11877730fa65b7e2891414be60
Author: markrmiller@gmail.com <ma...@gmail.com>
AuthorDate: Tue Sep 1 15:23:42 2020 -0500
@678 Work on debugging testRetryUpdatesWhenClusterStateIsStale.
---
.../solr/cloud/ShardLeaderElectionContext.java | 15 +++++++-
.../solr/cloud/ShardLeaderElectionContextBase.java | 4 +-
.../java/org/apache/solr/cloud/ZkController.java | 20 +++++++---
.../src/java/org/apache/solr/core/SolrCore.java | 44 +++++++++++++---------
.../solrj/impl/CloudHttp2SolrClientTest.java | 15 +++-----
.../solrj/impl/CloudSolrClientCacheTest.java | 1 +
6 files changed, 63 insertions(+), 36 deletions(-)
diff --git a/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContext.java b/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContext.java
index 2f564f9..f195e8c 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContext.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContext.java
@@ -270,12 +270,15 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
}
}
if (!success) {
+ if (isClosed()) {
+ return;
+ }
rejoinLeaderElection(core);
return;
}
}
- if (!isClosed) {
+ if (!isClosed()) {
try {
if (replicaType == Replica.Type.TLOG) {
// stop replicate from old leader
@@ -298,6 +301,10 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
"without being up-to-date with the previous leader", coreNodeName);
zkController.getShardTerms(collection, shardId).setTermEqualsToLeader(coreNodeName);
}
+ if (isClosed()) {
+ return;
+ }
+
super.runLeaderProcess(context, weAreReplacement, 0);
assert shardId != null;
@@ -316,6 +323,9 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
try (SolrCore core = cc.getCore(coreName)) {
if (core != null) {
+ if (isClosed()) {
+ return;
+ }
core.getCoreDescriptor().getCloudDescriptor().setLeader(true);
publishActiveIfRegisteredAndNotActive(core);
} else {
@@ -347,6 +357,9 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
// we could not publish ourselves as leader - try and rejoin election
try {
+ if (isClosed()) {
+ return;
+ }
rejoinLeaderElection(core);
} catch (Exception exc) {
ParWork.propegateInterrupt(e);
diff --git a/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContextBase.java b/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContextBase.java
index 57a888a..c8138f8 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContextBase.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContextBase.java
@@ -83,7 +83,7 @@ class ShardLeaderElectionContextBase extends ElectionContext {
log.debug("Removing leader registration node on cancel: {} {}", leaderPath, version);
List<Op> ops = new ArrayList<>(2);
ops.add(Op.check(Paths.get(leaderPath).getParent().toString(), version));
- ops.add(Op.check(electionPath, -1));
+ ops.add(Op.check(leaderSeqPath, -1));
ops.add(Op.delete(leaderPath, -1));
zkClient.multi(ops);
} catch (KeeperException e) {
@@ -110,8 +110,6 @@ class ShardLeaderElectionContextBase extends ElectionContext {
return;
} catch (Exception e) {
throw new SolrException(ErrorCode.SERVER_ERROR, "Exception canceling election", e);
- } finally {
- version = null;
}
} else {
log.info("No version found for ephemeral leader parent node, won't remove previous leader registration.");
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
index c8480c5..67d45b1 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
@@ -149,6 +149,7 @@ public class ZkController implements Closeable {
public static final String CLUSTER_SHUTDOWN = "/cluster/shutdown";
static final int WAIT_DOWN_STATES_TIMEOUT_SECONDS = 60;
+ public static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
public final int WAIT_FOR_STATE = Integer.getInteger("solr.waitForState", 10);
private final boolean SKIP_AUTO_RECOVERY = Boolean.getBoolean("solrcloud.skip.autorecovery");
@@ -384,7 +385,16 @@ public class ZkController implements Closeable {
ContextKey contextKey = new ContextKey(collection, coreNodeName);
ElectionContext context = electionContexts.get(contextKey);
if (context != null) {
- context.close();
+ try {
+ context.cancelElection();
+ } catch (InterruptedException e) {
+ ParWork.propegateInterrupt(e);
+ throw new SolrException(ErrorCode.SERVER_ERROR, e);
+ } catch (KeeperException e) {
+ throw new SolrException(ErrorCode.SERVER_ERROR, e);
+ } finally {
+ context.close();
+ }
}
}
@@ -403,12 +413,12 @@ public class ZkController implements Closeable {
public synchronized void command() {
try (ParWork worker = new ParWork("disconnected", true)) {
- worker.collect(overseerContexts);
- worker.collect( ZkController.this.overseer);
+ worker.collect("OverseerElectionContexts", overseerContexts.values());
+ worker.collect("Overseer", ZkController.this.overseer);
worker.collect("", () -> {
clearZkCollectionTerms();
});
- worker.collect(electionContexts.values());
+ worker.collect("electionContexts", electionContexts.values());
worker.collect("",() -> {
markAllAsNotLeader(descriptorsSupplier);
});
@@ -2317,7 +2327,7 @@ public class ZkController implements Closeable {
*/
public boolean claimAsyncId(String asyncId) throws KeeperException {
try {
- return asyncIdsMap.putIfAbsent(asyncId, new byte[0]);
+ return asyncIdsMap.putIfAbsent(asyncId, EMPTY_BYTE_ARRAY);
} catch (InterruptedException e) {
ParWork.propegateInterrupt(e);
throw new RuntimeException(e);
diff --git a/solr/core/src/java/org/apache/solr/core/SolrCore.java b/solr/core/src/java/org/apache/solr/core/SolrCore.java
index 6e3fa0a..8978bca 100644
--- a/solr/core/src/java/org/apache/solr/core/SolrCore.java
+++ b/solr/core/src/java/org/apache/solr/core/SolrCore.java
@@ -3069,24 +3069,7 @@ public final class SolrCore implements SolrInfoBean, Closeable {
}
}
if (deleteInstanceDir) {
- addCloseHook(new CloseHook() {
- @Override
- public void preClose(SolrCore core) {
- // empty block
- }
-
- @Override
- public void postClose(SolrCore core) {
- if (desc != null) {
- try {
- FileUtils.deleteDirectory(desc.getInstanceDir().toFile());
- } catch (IOException e) {
- SolrException.log(log, "Failed to delete instance dir for core:"
- + core.getName() + " dir:" + desc.getInstanceDir());
- }
- }
- }
- });
+ addCloseHook(new SolrCoreDeleteCloseHook(desc));
}
}
@@ -3332,4 +3315,29 @@ public final class SolrCore implements SolrInfoBean, Closeable {
public void runAsync(Runnable r) {
ParWork.getMyPerThreadExecutor().submit(r);
}
+
+ private static class SolrCoreDeleteCloseHook extends CloseHook {
+ private final CoreDescriptor desc;
+
+ public SolrCoreDeleteCloseHook(CoreDescriptor desc) {
+ this.desc = desc;
+ }
+
+ @Override
+ public void preClose(SolrCore core) {
+ // empty block
+ }
+
+ @Override
+ public void postClose(SolrCore core) {
+ if (desc != null) {
+ try {
+ FileUtils.deleteDirectory(desc.getInstanceDir().toFile());
+ } catch (IOException e) {
+ SolrException.log(log, "Failed to delete instance dir for core:"
+ + core.getName() + " dir:" + desc.getInstanceDir());
+ }
+ }
+ }
+ }
}
diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudHttp2SolrClientTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudHttp2SolrClientTest.java
index a34e2c0..3ec2dea 100644
--- a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudHttp2SolrClientTest.java
+++ b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudHttp2SolrClientTest.java
@@ -436,7 +436,6 @@ public class CloudHttp2SolrClientTest extends SolrCloudTestCase {
CollectionAdminRequest.createCollection(collectionName, "conf", liveNodes, liveNodes)
.setMaxShardsPerNode(liveNodes * liveNodes)
.process(cluster.getSolrClient());
- cluster.waitForActiveCollection(collectionName, liveNodes, liveNodes * liveNodes);
// Add some new documents
new UpdateRequest()
.add(id, "0", "a_t", "hello1")
@@ -517,7 +516,6 @@ public class CloudHttp2SolrClientTest extends SolrCloudTestCase {
CollectionAdminRequest.createCollection(collectionName, "conf", 1, liveNodes/3, liveNodes/3, liveNodes/3)
.setMaxShardsPerNode(liveNodes)
.process(cluster.getSolrClient());
- cluster.waitForActiveCollection(collectionName, 1, liveNodes);
// Add some new documents
new UpdateRequest()
@@ -611,7 +609,6 @@ public class CloudHttp2SolrClientTest extends SolrCloudTestCase {
try (CloudSolrClient client = SolrTestCaseJ4.getCloudSolrClient(cluster.getZkServer().getZkAddress())) {
// important to have one replica on each node
CollectionAdminRequest.createCollection("foo", "conf", 1, NODE_COUNT).process(client);
- cluster.waitForActiveCollection("foo", 1, NODE_COUNT);
client.setDefaultCollection("foo");
Map<String, String> adminPathToMbean = new HashMap<>(CommonParams.ADMIN_PATHS.size());
@@ -666,8 +663,6 @@ public class CloudHttp2SolrClientTest extends SolrCloudTestCase {
CollectionAdminRequest.waitForAsyncRequest(async1, client, TIMEOUT);
CollectionAdminRequest.waitForAsyncRequest(async2, client, TIMEOUT);
- cluster.waitForActiveCollection("multicollection1", 2, 2);
- cluster.waitForActiveCollection("multicollection2", 2, 2);
client.setDefaultCollection("multicollection1");
List<SolrInputDocument> docs = new ArrayList<>(3);
@@ -812,7 +807,6 @@ public class CloudHttp2SolrClientTest extends SolrCloudTestCase {
@Ignore // nocommit ~ possible regression? response doesn't contain "adds"?
public void testVersionsAreReturned() throws Exception {
CollectionAdminRequest.createCollection("versions_collection", "conf", 2, 1).process(cluster.getSolrClient());
- cluster.waitForActiveCollection("versions_collection", 2, 2);
// assert that "adds" are returned
UpdateRequest updateRequest = new UpdateRequest()
@@ -898,7 +892,6 @@ public class CloudHttp2SolrClientTest extends SolrCloudTestCase {
CollectionAdminRequest.createCollection(COL, "conf", 1, 1)
.setCreateNodeSet(old_leader_node.getNodeName())
.process(cluster.getSolrClient()).getStatus());
- cluster.waitForActiveCollection(COL, 1, 1);
// determine the coreNodeName of only current replica
Collection<Slice> slices = cluster.getSolrClient().getZkStateReader().getClusterState().getCollection(COL).getSlices();
@@ -926,6 +919,8 @@ public class CloudHttp2SolrClientTest extends SolrCloudTestCase {
.setNode(new_leader_node.getNodeName())
// NOTE: don't use our stale_client for this -- don't tip it off of a collection change
.process(cluster.getSolrClient()).getStatus());
+
+ cluster.waitForActiveCollection(COL, 1, 2);
// ...and delete our original leader.
assertEquals("Couldn't create collection", 0,
@@ -933,7 +928,10 @@ public class CloudHttp2SolrClientTest extends SolrCloudTestCase {
// NOTE: don't use our stale_client for this -- don't tip it off of a collection change
.process(cluster.getSolrClient()).getStatus());
- // stale_client's collection state cache should now only point at a leader that no longer exists.
+// Thread.currentThread().sleep(3000);
+//
+// cluster.getZkClient().printLayout();
+ cluster.waitForActiveCollection(COL, 1, 1);
// attempt a (direct) update that should succeed in spite of cached cluster state
// pointing solely to a node that's no longer part of our collection...
@@ -974,7 +972,6 @@ public class CloudHttp2SolrClientTest extends SolrCloudTestCase {
CollectionAdminRequest.createCollection(collectionName, "conf", liveNodes, 1, 1, pullReplicas)
.setMaxShardsPerNode(liveNodes)
.process(cluster.getSolrClient());
- cluster.waitForActiveCollection(collectionName, liveNodes, liveNodes * (2 + pullReplicas));
// Add some new documents
new UpdateRequest()
diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrClientCacheTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrClientCacheTest.java
index 08a535b..b647905 100644
--- a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrClientCacheTest.java
+++ b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrClientCacheTest.java
@@ -64,6 +64,7 @@ public class CloudSolrClientCacheTest extends SolrTestCaseJ4 {
this.c = c;
}
+
@Override
public boolean isLazilyLoaded() {
return true;