You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2020/07/20 22:52:12 UTC
[lucene-solr] branch reference_impl updated: @257 - Fix some close,
start on cluster shutdown.
This is an automated email from the ASF dual-hosted git repository.
markrmiller pushed a commit to branch reference_impl
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/reference_impl by this push:
new 22ef26e @257 - Fix some close, start on cluster shutdown.
22ef26e is described below
commit 22ef26ef28f009e14d2d97a6f0188c128aef9230
Author: markrmiller@gmail.com <ma...@gmail.com>
AuthorDate: Mon Jul 20 17:50:10 2020 -0500
@257 - Fix some close, start on cluster shutdown.
---
.../client/solrj/embedded/JettySolrRunner.java | 5 +-
.../src/java/org/apache/solr/cloud/Overseer.java | 4 +-
.../core/src/java/org/apache/solr/cloud/ZkCLI.java | 2 +-
.../java/org/apache/solr/cloud/ZkController.java | 142 ++++++++++++++++++++-
.../TestCloudPhrasesIdentificationComponent.java | 4 -
.../solr/cloud/TestCloudPseudoReturnFields.java | 4 -
.../apache/solr/cloud/TestRandomFlRTGCloud.java | 4 -
.../TestTolerantUpdateProcessorRandomCloud.java | 3 -
.../src/java/org/apache/solr/common/ParWork.java | 3 +-
.../org/apache/solr/common/cloud/SolrZkClient.java | 5 +
.../apache/solr/common/cloud/ZkStateReader.java | 8 +-
.../org/apache/solr/common/util/CloseTracker.java | 30 +++++
.../solr/common/util/ObjectReleaseTracker.java | 4 +
.../apache/solr/cloud/MiniSolrCloudCluster.java | 113 ++++++++++++----
.../java/org/apache/solr/cloud/ZkTestServer.java | 21 ++-
15 files changed, 295 insertions(+), 57 deletions(-)
diff --git a/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java b/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java
index bc1f834..469d0d4 100644
--- a/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java
+++ b/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java
@@ -91,6 +91,7 @@ import org.eclipse.jetty.server.ServerConnector;
import org.eclipse.jetty.server.SessionIdManager;
import org.eclipse.jetty.server.SslConnectionFactory;
import org.eclipse.jetty.server.handler.HandlerWrapper;
+import org.eclipse.jetty.server.handler.ShutdownHandler;
import org.eclipse.jetty.server.handler.gzip.GzipHandler;
import org.eclipse.jetty.server.session.DefaultSessionIdManager;
import org.eclipse.jetty.server.session.HouseKeeper;
@@ -464,7 +465,9 @@ public class JettySolrRunner implements Closeable {
}
chain = injectJettyHandlers(chain);
-
+ ShutdownHandler shutdownHandler = new ShutdownHandler("solrrocks", true, true);
+ shutdownHandler.setHandler(chain);
+ chain = shutdownHandler;
if(config.enableV2) {
RewriteHandler rwh = new RewriteHandler();
rwh.setHandler(chain);
diff --git a/solr/core/src/java/org/apache/solr/cloud/Overseer.java b/solr/core/src/java/org/apache/solr/cloud/Overseer.java
index a925a27..a124a9d 100644
--- a/solr/core/src/java/org/apache/solr/cloud/Overseer.java
+++ b/solr/core/src/java/org/apache/solr/cloud/Overseer.java
@@ -655,7 +655,9 @@ public class Overseer implements SolrCloseable {
updaterThread.start();
ccThread.start();
- triggerThread.start();
+ if (triggerThread != null) {
+ triggerThread.start();
+ }
systemCollectionCompatCheck(new BiConsumer<String, Object>() {
boolean firstPair = true;
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkCLI.java b/solr/core/src/java/org/apache/solr/cloud/ZkCLI.java
index 0650735..8477129 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkCLI.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkCLI.java
@@ -197,7 +197,7 @@ public class ZkCLI implements CLIO {
SolrZkClient zkClient = null;
CoreContainer cc = null;
try {
- zkClient = new SolrZkClient(zkServerAddress, 30000, 30000,
+ zkClient = new SolrZkClient(zkServerAddress, 30000, 10000,
() -> {
}).start();
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
index 0b8ba59..63b4448 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
@@ -22,8 +22,12 @@ import java.io.IOException;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.lang.invoke.MethodHandles;
+import java.net.HttpURLConnection;
import java.net.InetAddress;
+import java.net.MalformedURLException;
import java.net.NetworkInterface;
+import java.net.SocketException;
+import java.net.URL;
import java.net.URLEncoder;
import java.net.UnknownHostException;
import java.nio.charset.StandardCharsets;
@@ -161,6 +165,9 @@ import static org.apache.solr.common.cloud.ZkStateReader.SHARD_ID_PROP;
public class ZkController implements Closeable {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+ public static final String CLUSTER_SHUTDOWN = "/cluster/shutdown";
+
static final int WAIT_DOWN_STATES_TIMEOUT_SECONDS = 60;
public final int WAIT_FOR_STATE = Integer.getInteger("solr.waitForState", 10);
@@ -339,7 +346,6 @@ public class ZkController implements Closeable {
if (cc == null) log.error("null corecontainer");
if (cc == null) throw new IllegalArgumentException("CoreContainer cannot be null.");
try {
- this.closeZkClient = true;
this.cc = cc;
this.descriptorsSupplier = descriptorsSupplier;
this.cloudConfig = cloudConfig;
@@ -930,9 +936,137 @@ public class ZkController implements Closeable {
}
private void init() {
- log.info("do init");
+ log.info("making shutdown watcher for cluster");
+ try {
+ zkClient.exists(CLUSTER_SHUTDOWN, new Watcher() {
+ @Override
+ public void process(WatchedEvent event) {
+ if (Event.EventType.None.equals(event.getType())) {
+ return;
+ }
+
+ log.info("Got even for shutdown {}" + event);
+ if (event.getType().equals(Event.EventType.NodeCreated)) {
+ log.info("Shutdown zk node created, shutting down");
+ shutdown();
+ } else {
+ log.info("Remaking shutdown watcher");
+ Stat stat = null;
+ try {
+ stat = zkClient.exists(CLUSTER_SHUTDOWN, this);
+ } catch (KeeperException e) {
+ SolrException.log(log, e);
+ return;
+ } catch (InterruptedException e) {
+ SolrException.log(log, e);
+ return;
+ }
+ if (stat != null) {
+ log.info("Got shutdown even while remaking watcher, shutting down");
+ shutdown();
+ }
+ }
+ }
+
+ private void shutdown() {
+ Thread updaterThead = overseer.getUpdaterThread();
+ log.info("Cluster shutdown initiated");
+ if (updaterThead!= null && updaterThead.isAlive()) {
+ log.info("We are the Overseer, wait for others to shutdown");
+
+ CountDownLatch latch = new CountDownLatch(1);
+ List<String> children = null;
+ try {
+ children = zkClient.getChildren("/solr" + ZkStateReader.LIVE_NODES_ZKNODE, new Watcher() {
+ @Override
+ public void process(WatchedEvent event) {
+ if (Event.EventType.None.equals(event.getType())) {
+ return;
+ }
+ if (event.getType() == Event.EventType.NodeChildrenChanged) {
+ Thread updaterThead = overseer.getUpdaterThread();
+ if (updaterThead == null || !updaterThead.isAlive()) {
+ log.info("We were the Overseer, but it seems not anymore, shutting down");
+ latch.countDown();
+ return;
+ }
+
+ try {
+ List<String> children = zkClient.getChildren("/solr" + ZkStateReader.LIVE_NODES_ZKNODE, this, false);
+ if (children.size() == 1) {
+ latch.countDown();
+ }
+ } catch (KeeperException e) {
+ log.error("Exception on proper shutdown", e);
+ return;
+ } catch (InterruptedException e) {
+ ParWork.propegateInterrupt(e);
+ return;
+ }
+ }
+ }
+ }, false);
+ } catch (KeeperException e) {
+ log.error("Time out waiting to see solr live nodes go down " + children.size());
+ return;
+ } catch (InterruptedException e) {
+ ParWork.propegateInterrupt(e);
+ return;
+ }
+
+ if (children.size() > 1) {
+ boolean success = false;
+ try {
+ success = latch.await(10, TimeUnit.SECONDS);
+ } catch (InterruptedException e) {
+ ParWork.propegateInterrupt(e);
+ }
+ if (!success) {
+ log.error("Time out waiting to see solr live nodes go down " + children.size());
+ }
+ }
+ }
+
+ ParWork.getExecutor().submit(new Runnable() {
+ @Override
+ public void run() {
+ try {
+ cc.close();
+ } catch (IOException e) {
+ log.error("IOException on shutdown", e);
+ return;
+ }
+ URL url = null;
+ try {
+ url = new URL(getHostName() + ":" + getHostPort() + "/shutdown?token=" + "solrrocks");
+ } catch (MalformedURLException e) {
+ SolrException.log(log, e);
+ return;
+ }
+ try {
+ HttpURLConnection connection = (HttpURLConnection) url.openConnection();
+ connection.setRequestMethod("POST");
+ connection.getResponseCode();
+ log.info("Shutting down " + url + ": " + connection.getResponseCode() + " " + connection.getResponseMessage());
+ } catch (SocketException e) {
+ SolrException.log(log, e);
+ // Okay - the server is not running
+ } catch (IOException e) {
+ SolrException.log(log, e);
+ return;
+ }
+ }
+ });
+ }
+ });
+ } catch (KeeperException e) {
+ throw new SolrException(ErrorCode.SERVER_ERROR, e);
+ } catch (InterruptedException e) {
+ ParWork.propegateInterrupt(e);
+ throw new SolrException(ErrorCode.SERVER_ERROR, e);
+ }
try {
- zkClient.mkdir("/cluster_lock");
+ zkClient.mkdirs("/cluster/cluster_lock");
} catch (KeeperException.NodeExistsException e) {
// okay
} catch (KeeperException e) {
@@ -940,7 +1074,7 @@ public class ZkController implements Closeable {
}
boolean createdClusterNodes = false;
try {
- DistributedLock lock = new DistributedLock(zkClient, "/cluster_lock", zkClient.getZkACLProvider().getACLsToAdd("/cluster_lock"));
+ DistributedLock lock = new DistributedLock(zkClient, "/cluster/cluster_lock", zkClient.getZkACLProvider().getACLsToAdd("/cluster/cluster_lock"));
if (log.isDebugEnabled()) log.debug("get cluster lock");
while (!lock.lock()) {
Thread.sleep(250);
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestCloudPhrasesIdentificationComponent.java b/solr/core/src/test/org/apache/solr/cloud/TestCloudPhrasesIdentificationComponent.java
index 708a39b..777e1e44 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestCloudPhrasesIdentificationComponent.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestCloudPhrasesIdentificationComponent.java
@@ -108,10 +108,6 @@ public class TestCloudPhrasesIdentificationComponent extends SolrCloudTestCase {
@AfterClass
private static void afterClass() throws Exception {
- if (null != CLOUD_CLIENT) {
- CLOUD_CLIENT.close();
- CLOUD_CLIENT = null;
- }
for (HttpSolrClient client : CLIENTS) {
client.close();
}
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestCloudPseudoReturnFields.java b/solr/core/src/test/org/apache/solr/cloud/TestCloudPseudoReturnFields.java
index 9ff2db3..3f8c68e 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestCloudPseudoReturnFields.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestCloudPseudoReturnFields.java
@@ -108,10 +108,6 @@ public class TestCloudPseudoReturnFields extends SolrCloudTestCase {
@AfterClass
private static void afterClass() throws Exception {
- if (null != CLOUD_CLIENT) {
- CLOUD_CLIENT.close();
- CLOUD_CLIENT = null;
- }
for (HttpSolrClient client : CLIENTS) {
client.close();
}
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestRandomFlRTGCloud.java b/solr/core/src/test/org/apache/solr/cloud/TestRandomFlRTGCloud.java
index 58cf472..a71ed48 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestRandomFlRTGCloud.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestRandomFlRTGCloud.java
@@ -155,10 +155,6 @@ public class TestRandomFlRTGCloud extends SolrCloudTestCase {
@AfterClass
private static void afterClass() throws Exception {
- if (null != CLOUD_CLIENT) {
- CLOUD_CLIENT.close();
- CLOUD_CLIENT = null;
- }
for (HttpSolrClient client : CLIENTS) {
client.close();
}
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorRandomCloud.java b/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorRandomCloud.java
index 20c186c..748510b 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorRandomCloud.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorRandomCloud.java
@@ -137,9 +137,6 @@ public class TestTolerantUpdateProcessorRandomCloud extends SolrCloudTestCase {
}
}
NODE_CLIENTS = null;
- if (CLOUD_CLIENT != null) {
- CLOUD_CLIENT.close();
- }
CLOUD_CLIENT = null;
}
diff --git a/solr/solrj/src/java/org/apache/solr/common/ParWork.java b/solr/solrj/src/java/org/apache/solr/common/ParWork.java
index 543303e..1161c1d 100644
--- a/solr/solrj/src/java/org/apache/solr/common/ParWork.java
+++ b/solr/solrj/src/java/org/apache/solr/common/ParWork.java
@@ -43,6 +43,7 @@ import java.util.concurrent.atomic.AtomicReference;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.solr.client.solrj.impl.HttpClientUtil;
+import org.apache.solr.common.util.ObjectReleaseTracker;
import org.apache.solr.common.util.OrderedExecutor;
import org.apache.solr.common.util.SysStats;
import org.apache.zookeeper.KeeperException;
@@ -667,7 +668,7 @@ public class ParWork implements Closeable {
} else {
if (ignoreExceptions) {
warns.add(t);
- log.error("Error", t);
+ log.error("Error handling close for an object", new ObjectReleaseTracker.ObjectTrackerException(t));
if (t instanceof Error && !(t instanceof AssertionError)) {
throw (Error) t;
}
diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/SolrZkClient.java b/solr/solrj/src/java/org/apache/solr/common/cloud/SolrZkClient.java
index 922d0f8..058afa8 100644
--- a/solr/solrj/src/java/org/apache/solr/common/cloud/SolrZkClient.java
+++ b/solr/solrj/src/java/org/apache/solr/common/cloud/SolrZkClient.java
@@ -61,6 +61,7 @@ import org.apache.solr.common.ParWorkExecutor;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.StringUtils;
import org.apache.solr.common.cloud.ConnectionManager.IsClosed;
+import org.apache.solr.common.util.CloseTracker;
import org.apache.solr.common.util.ExecutorUtil;
import org.apache.solr.common.util.IOUtils;
import org.apache.solr.common.util.ObjectReleaseTracker;
@@ -97,6 +98,7 @@ public class SolrZkClient implements Closeable {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private final int zkClientConnectTimeout;
+ private final CloseTracker closeTracker;
private volatile ConnectionManager connManager;
@@ -124,6 +126,7 @@ public class SolrZkClient implements Closeable {
// expert: for tests
public SolrZkClient() {
+ closeTracker = new CloseTracker();
zkClientConnectTimeout = 0;
}
@@ -157,6 +160,7 @@ public class SolrZkClient implements Closeable {
public SolrZkClient(String zkServerAddress, int zkClientTimeout, int clientConnectTimeout,
ZkClientConnectionStrategy strat, final OnReconnect onReconnect, BeforeReconnect beforeReconnect, ZkACLProvider zkACLProvider, IsClosed higherLevelIsClosed) {
ObjectReleaseTracker.track(this);
+ closeTracker = new CloseTracker();
this.zkServerAddress = zkServerAddress;
this.higherLevelIsClosed = higherLevelIsClosed;
if (strat == null) {
@@ -854,6 +858,7 @@ public class SolrZkClient implements Closeable {
}
public void close() {
+ closeTracker.close();
if (isClosed) return; // it's okay if we over close - same as solrcore
isClosed = true;
diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java
index 55d1b3a..98b2938 100644
--- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java
+++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java
@@ -55,6 +55,7 @@ import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.AutoScalingParams;
import org.apache.solr.common.params.CollectionAdminParams;
import org.apache.solr.common.params.CoreAdminParams;
+import org.apache.solr.common.util.CloseTracker;
import org.apache.solr.common.util.ExecutorUtil;
import org.apache.solr.common.util.ObjectReleaseTracker;
import org.apache.solr.common.util.Pair;
@@ -156,6 +157,7 @@ public class ZkStateReader implements SolrCloseable {
private static final String SOLR_ENVIRONMENT = "environment";
public static final String REPLICA_TYPE = "type";
+ private final CloseTracker closeTracker;
/**
* A view of the current state of all collections; combines all the different state sources into a single view.
@@ -337,6 +339,7 @@ public class ZkStateReader implements SolrCloseable {
}
public ZkStateReader(SolrZkClient zkClient, Runnable securityNodeListener) {
+ closeTracker = new CloseTracker();
this.zkClient = zkClient;
this.configManager = new ZkConfigManager(zkClient);
this.closeClient = false;
@@ -346,6 +349,7 @@ public class ZkStateReader implements SolrCloseable {
public ZkStateReader(String zkServerAddress, int zkClientTimeout, int zkClientConnectTimeout) {
+ closeTracker = new CloseTracker();
this.zkClient = new SolrZkClient(zkServerAddress, zkClientTimeout, zkClientConnectTimeout,
// on reconnect, reload cloud info
new OnReconnect() {
@@ -416,7 +420,8 @@ public class ZkStateReader implements SolrCloseable {
public void process(WatchedEvent event) {
if (EventType.None.equals(event.getType())) {
return;
- } log.info("Got event on live node watcher {}", event.toString());
+ }
+ log.info("Got event on live node watcher {}", event.toString());
if (event.getType() == EventType.NodeCreated) {
latch.countDown();
} else {
@@ -891,6 +896,7 @@ public class ZkStateReader implements SolrCloseable {
}
public void close() {
+ closeTracker.close();
this.closed = true;
try (ParWork closer = new ParWork(this)) {
diff --git a/solr/solrj/src/java/org/apache/solr/common/util/CloseTracker.java b/solr/solrj/src/java/org/apache/solr/common/util/CloseTracker.java
new file mode 100644
index 0000000..f0b0a77
--- /dev/null
+++ b/solr/solrj/src/java/org/apache/solr/common/util/CloseTracker.java
@@ -0,0 +1,30 @@
+package org.apache.solr.common.util;
+
+import org.apache.commons.io.output.StringBuilderWriter;
+import org.apache.solr.common.AlreadyClosedException;
+
+import java.io.Closeable;
+import java.io.PrintWriter;
+
+public class CloseTracker implements Closeable {
+ private volatile boolean closed = false;
+ private volatile String closeStack = "";
+
+ @Override
+ public void close() {
+ if (closed) {
+ throw new AlreadyClosedException(closeStack);
+ }
+
+ StringBuilderWriter sw = new StringBuilderWriter(4096);
+ PrintWriter pw = new PrintWriter(sw);
+ new ObjectReleaseTracker.ObjectTrackerException(this.getClass().getName()).printStackTrace(pw);
+ closeStack = sw.toString();
+ closed = true;
+ }
+
+ public boolean isClosed() {
+ return closed;
+ }
+
+}
diff --git a/solr/solrj/src/java/org/apache/solr/common/util/ObjectReleaseTracker.java b/solr/solrj/src/java/org/apache/solr/common/util/ObjectReleaseTracker.java
index 36024b0..0b4d40b 100644
--- a/solr/solrj/src/java/org/apache/solr/common/util/ObjectReleaseTracker.java
+++ b/solr/solrj/src/java/org/apache/solr/common/util/ObjectReleaseTracker.java
@@ -80,6 +80,10 @@ public class ObjectReleaseTracker {
public ObjectTrackerException(String msg) {
super(msg);
}
+
+ public ObjectTrackerException(Throwable t) {
+ super(t);
+ }
}
}
diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/MiniSolrCloudCluster.java b/solr/test-framework/src/java/org/apache/solr/cloud/MiniSolrCloudCluster.java
index 91766a0..27e501e 100644
--- a/solr/test-framework/src/java/org/apache/solr/cloud/MiniSolrCloudCluster.java
+++ b/solr/test-framework/src/java/org/apache/solr/cloud/MiniSolrCloudCluster.java
@@ -77,6 +77,8 @@ import org.apache.solr.core.CoreContainer;
import org.apache.solr.handler.admin.CollectionsHandler;
import org.apache.solr.util.TimeOut;
import org.apache.zookeeper.KeeperException;
+import org.apache.zookeeper.WatchedEvent;
+import org.apache.zookeeper.Watcher;
import org.eclipse.jetty.server.handler.HandlerWrapper;
import org.eclipse.jetty.servlet.ServletHolder;
import org.slf4j.Logger;
@@ -143,6 +145,7 @@ public class MiniSolrCloudCluster {
"</solr>\n";
private final Object startupWait = new Object();
+ private final SolrZkClient solrZkClient;
private volatile ZkTestServer zkServer; // non-final due to injectChaos()
private final boolean externalZkServer;
private final List<JettySolrRunner> jettys = new CopyOnWriteArrayList<>();
@@ -332,7 +335,7 @@ public class MiniSolrCloudCluster {
// build the client
solrClient = buildSolrClient();
-
+ solrZkClient = solrClient.getZkStateReader().getZkClient();
if (numServers > 0) {
waitForAllNodes(numServers, STARTUP_WAIT_SECONDS);
}
@@ -651,20 +654,75 @@ public class MiniSolrCloudCluster {
*/
public void shutdown() throws Exception {
try {
+ log.info("creating cluster shutdown zk node");
+ zkServer.getZkClient().mkdirs("/solr" + ZkController.CLUSTER_SHUTDOWN);
+ zkServer.getZkClient().printLayout();
+ zkServer.getZkClient().printLayoutToStream(System.out);
+
+ CountDownLatch latch = new CountDownLatch(1);
+ List<String> children = zkServer.getZkClient().getChildren("/solr" + ZkStateReader.LIVE_NODES_ZKNODE, new Watcher() {
+ @Override
+ public void process(WatchedEvent event) {
+ if (Event.EventType.None.equals(event.getType())) {
+ return;
+ }
+ if (event.getType() == Event.EventType.NodeChildrenChanged) {
+ try {
+ List<String> children = zkServer.getZkClient().getChildren("/solr" + ZkStateReader.LIVE_NODES_ZKNODE, this, false);
+ if (children.size() == 0) {
+ latch.countDown();
+ }
+ } catch (KeeperException e) {
+ log.error("Exception on proper shutdown", e);
+ return;
+ } catch (InterruptedException e) {
+ ParWork.propegateInterrupt(e);
+ return;
+ }
+ }
+ }
+ }, false);
+
+ if (children.size() > 0) {
+ boolean success = latch.await(10, TimeUnit.SECONDS);
+ if (!success) {
+ throw new TimeoutException("Time out waiting to see solr live nodes go down " + children.size());
+ }
+ }
+
+ } catch (KeeperException.NodeExistsException e) {
+ log.info("Shutdown zk node already exists");
+ } catch (Exception e) {
+ log.error("Exception on proper shutdown", e);
+ }
+
+
+ //ZkStateReader reader = zkServer.getZkClient().getZkStateReader();
+
+// try {
+// reader.waitForLiveNodes(10, TimeUnit.SECONDS, (o, n) -> n.size() == 0);
+// } catch (InterruptedException e) {
+// Thread.currentThread().interrupt();
+// throw new SolrException(ErrorCode.SERVER_ERROR, "interrupted");
+// }
+ // Thread.sleep(40000);
+
+ try {
List<Callable<JettySolrRunner>> shutdowns = new ArrayList<>(jettys.size());
for (final JettySolrRunner jetty : jettys) {
- shutdowns.add(() -> stopJettySolrRunner(jetty, false));
+ shutdowns.add(() -> stopJettySolrRunner(jetty, true));
}
jettys.clear();
try (ParWork parWork = new ParWork(this, true)) {
- parWork.collect(solrClient);
parWork.collect(shutdowns);
- parWork.addCollect("jetties&solrClient");
+ parWork.addCollect("jetties");
+ parWork.collect(solrClient);
+ parWork.addCollect("solrClient");
if (!externalZkServer) {
parWork.collect(zkServer);
+ parWork.addCollect("zkServer");
}
- parWork.addCollect("zkServer");
}
} finally {
System.clearProperty("zkHost");
@@ -682,7 +740,7 @@ public class MiniSolrCloudCluster {
}
public SolrZkClient getZkClient() {
- return solrClient.getZkStateReader().getZkClient();
+ return solrZkClient;
}
protected CloudSolrClient buildSolrClient() {
@@ -759,28 +817,31 @@ public class MiniSolrCloudCluster {
}
}
- public synchronized void injectChaos(Random random) throws Exception {
-
- // sometimes we restart one of the jetty nodes
- if (random.nextBoolean()) {
- JettySolrRunner jetty = jettys.get(random.nextInt(jettys.size()));
- jetty.stop();
- log.info("============ Restarting jetty");
- jetty.start();
- }
+ public void injectChaos(Random random) throws Exception {
+ if (LuceneTestCase.TEST_NIGHTLY && false) { // nocommit
+ synchronized (this) {
+ // sometimes we restart one of the jetty nodes
+ if (random.nextBoolean()) {
+ JettySolrRunner jetty = jettys.get(random.nextInt(jettys.size()));
+ jetty.stop();
+ log.info("============ Restarting jetty");
+ jetty.start();
+ }
- // sometimes we restart zookeeper
- if (random.nextBoolean()) {
- zkServer.shutdown();
- log.info("============ Restarting zookeeper");
- zkServer = new ZkTestServer(zkServer.getZkDir(), zkServer.getPort());
- zkServer.run(false);
- }
+ // sometimes we restart zookeeper
+ if (random.nextBoolean()) {
+ zkServer.shutdown();
+ log.info("============ Restarting zookeeper");
+ zkServer = new ZkTestServer(zkServer.getZkDir(), zkServer.getPort());
+ zkServer.run(false);
+ }
- // sometimes we cause a connection loss - sometimes it will hit the overseer
- if (random.nextBoolean()) {
- JettySolrRunner jetty = jettys.get(random.nextInt(jettys.size()));
- ChaosMonkey.causeConnectionLoss(jetty);
+ // sometimes we cause a connection loss - sometimes it will hit the overseer
+ if (random.nextBoolean()) {
+ JettySolrRunner jetty = jettys.get(random.nextInt(jettys.size()));
+ ChaosMonkey.causeConnectionLoss(jetty);
+ }
+ }
}
}
diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/ZkTestServer.java b/solr/test-framework/src/java/org/apache/solr/cloud/ZkTestServer.java
index f633aba..bbdc485 100644
--- a/solr/test-framework/src/java/org/apache/solr/cloud/ZkTestServer.java
+++ b/solr/test-framework/src/java/org/apache/solr/cloud/ZkTestServer.java
@@ -49,6 +49,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
import javax.management.JMException;
import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.common.AlreadyClosedException;
import org.apache.solr.common.ParWork;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.cloud.SolrZkClient;
@@ -56,6 +57,7 @@ import org.apache.solr.common.cloud.ZkMaintenanceUtils;
import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.cloud.ZooKeeperException;
+import org.apache.solr.common.util.CloseTracker;
import org.apache.solr.common.util.ObjectReleaseTracker;
import org.apache.solr.common.util.TimeOut;
import org.apache.solr.common.util.TimeSource;
@@ -97,6 +99,8 @@ public class ZkTestServer implements Closeable {
}
}
+ private volatile CloseTracker closeTracker;
+
private Path zkMonitoringFile;
public static final int TIMEOUT = 45000;
@@ -543,18 +547,21 @@ public class ZkTestServer implements Closeable {
run(false);
}
- public void run(boolean solrFormat) throws InterruptedException, IOException {
+ public synchronized void run(boolean solrFormat) throws InterruptedException, IOException {
log.info("STARTING ZK TEST SERVER dataDir={}", this.zkDir);
// docs say no config for netty yet
// System.setProperty("zookeeper.serverCnxnFactory", "org.apache.zookeeper.server.NettyServerCnxnFactory");
// System.setProperty("zookeeper.clientCnxnSocket", "org.apache.zookeeper.ClientCnxnSocketNetty");
-
+ if (zooThread != null) {
+ throw new AlreadyClosedException();
+ }
+ if (closeTracker != null) {
+ throw new AlreadyClosedException();
+ }
+ closeTracker = new CloseTracker();
try {
- if (zooThread != null) {
- throw new IllegalStateException("ZK TEST SERVER IS ALREADY RUNNING");
- }
// we don't call super.distribSetUp
zooThread = new Thread("ZkTestServer Run Thread") {
@@ -620,9 +627,9 @@ public class ZkTestServer implements Closeable {
}
}
- public void shutdown() throws IOException, InterruptedException {
+ public synchronized void shutdown() throws IOException, InterruptedException {
log.info("Shutting down ZkTestServer.");
-
+ closeTracker.close();
try {
if (chRootClient != null) {
chRootClient.printLayout();