You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by kr...@apache.org on 2023/09/29 18:02:10 UTC
[solr] branch main updated: SOLR-17004: ZkStateReader waitForState should check clusterState before using watchers (#1945)
This is an automated email from the ASF dual-hosted git repository.
krisden pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/main by this push:
new 240ae14962a SOLR-17004: ZkStateReader waitForState should check clusterState before using watchers (#1945)
240ae14962a is described below
commit 240ae14962a62192fedaea48d07590dd15ff1891
Author: Kevin Risden <ri...@users.noreply.github.com>
AuthorDate: Fri Sep 29 14:02:04 2023 -0400
SOLR-17004: ZkStateReader waitForState should check clusterState before using watchers (#1945)
---
solr/CHANGES.txt | 3 +-
.../solr/cloud/LeaderElectionIntegrationTest.java | 10 ++++++-
.../apache/solr/common/cloud/ZkStateReader.java | 33 ++++++++++++++++++----
3 files changed, 38 insertions(+), 8 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index cc7d9a46059..df10be97fc9 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -21,7 +21,8 @@ Improvements
Optimizations
---------------------
-(No changes)
+
+* SOLR-17004: ZkStateReader waitForState should check clusterState before using watchers (Kevin Risden)
Bug Fixes
---------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/LeaderElectionIntegrationTest.java b/solr/core/src/test/org/apache/solr/cloud/LeaderElectionIntegrationTest.java
index 4491476f030..5da2f862ea2 100644
--- a/solr/core/src/test/org/apache/solr/cloud/LeaderElectionIntegrationTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/LeaderElectionIntegrationTest.java
@@ -79,15 +79,22 @@ public class LeaderElectionIntegrationTest extends SolrCloudTestCase {
.getCoreDescriptor()
.getCloudDescriptor()
.getShardId());
+ String jettyNodeName = jetty.getNodeName(); // must get before shutdown
jetty.stop();
stoppedRunners.add(jetty);
+ waitForState(
+ "Leader should not be " + jettyNodeName,
+ collection,
+ (n, c) ->
+ c.getLeader("shard1") != null
+ && !jettyNodeName.equals(c.getLeader("shard1").getNodeName()));
}
for (JettySolrRunner runner : stoppedRunners) {
runner.start();
}
waitForState(
- "Expected to see nodes come back " + collection, collection, (n, c) -> n.size() == 6);
+ "Expected to see nodes come back for " + collection, collection, (n, c) -> n.size() == 6);
CollectionAdminRequest.deleteCollection(collection).process(cluster.getSolrClient());
// testLeaderElectionAfterClientTimeout
@@ -99,6 +106,7 @@ public class LeaderElectionIntegrationTest extends SolrCloudTestCase {
// timeout the leader
String leader = getLeader(collection);
JettySolrRunner jetty = getRunner(leader);
+ assertNotNull(jetty);
cluster.expireZkSession(jetty);
for (int i = 0; i < 60; i++) { // wait till leader is changed
diff --git a/solr/solrj-zookeeper/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj-zookeeper/src/java/org/apache/solr/common/cloud/ZkStateReader.java
index 17f7bdbc5d5..4879733e7fe 100644
--- a/solr/solrj-zookeeper/src/java/org/apache/solr/common/cloud/ZkStateReader.java
+++ b/solr/solrj-zookeeper/src/java/org/apache/solr/common/cloud/ZkStateReader.java
@@ -936,7 +936,6 @@ public class ZkStateReader implements SolrCloseable {
/** Get shard leader properties, with retry if none exist. */
public Replica getLeaderRetry(String collection, String shard, int timeout)
throws InterruptedException {
- AtomicReference<DocCollection> coll = new AtomicReference<>();
AtomicReference<Replica> leader = new AtomicReference<>();
try {
waitForState(
@@ -945,7 +944,6 @@ public class ZkStateReader implements SolrCloseable {
TimeUnit.MILLISECONDS,
(n, c) -> {
if (c == null) return false;
- coll.set(c);
Replica l = getLeader(n, c, shard);
if (l != null) {
log.debug("leader found for {}/{} to be {}", collection, shard, l);
@@ -1802,6 +1800,18 @@ public class ZkStateReader implements SolrCloseable {
throw new AlreadyClosedException();
}
+ // Check predicate against known clusterState before trying to add watchers
+ if (clusterState != null) {
+ Set<String> liveNodes = clusterState.getLiveNodes();
+ DocCollection docCollection = clusterState.getCollectionOrNull(collection);
+ if (liveNodes != null && docCollection != null) {
+ if (predicate.matches(liveNodes, docCollection)) {
+ log.debug("Found {} directly in clusterState", predicate);
+ return;
+ }
+ }
+ }
+
final CountDownLatch latch = new CountDownLatch(1);
waitLatches.add(latch);
AtomicReference<DocCollection> docCollection = new AtomicReference<>();
@@ -1855,12 +1865,23 @@ public class ZkStateReader implements SolrCloseable {
throw new AlreadyClosedException();
}
+ // Check predicate against known clusterState before trying to add watchers
+ if (clusterState != null) {
+ DocCollection docCollection = clusterState.getCollectionOrNull(collection);
+ if (docCollection != null) {
+ if (predicate.test(docCollection)) {
+ log.debug("Found {} directly in clusterState", predicate);
+ return docCollection;
+ }
+ }
+ }
+
final CountDownLatch latch = new CountDownLatch(1);
waitLatches.add(latch);
- AtomicReference<DocCollection> docCollection = new AtomicReference<>();
+ AtomicReference<DocCollection> docCollectionReference = new AtomicReference<>();
DocCollectionWatcher watcher =
(c) -> {
- docCollection.set(c);
+ docCollectionReference.set(c);
boolean matches = predicate.test(c);
if (matches) latch.countDown();
@@ -1875,8 +1896,8 @@ public class ZkStateReader implements SolrCloseable {
"Timeout waiting to see state for collection="
+ collection
+ " :"
- + docCollection.get());
- return docCollection.get();
+ + docCollectionReference.get());
+ return docCollectionReference.get();
} finally {
removeDocCollectionWatcher(collection, watcher);
waitLatches.remove(latch);