You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2016/06/29 11:52:19 UTC
lucene-solr:branch_6x: SOLR-8777: Duplicate Solr process can cripple
a running process (cherry picked from commit 4ea95bf)
Repository: lucene-solr
Updated Branches:
refs/heads/branch_6x 976501f6f -> 812fd346f
SOLR-8777: Duplicate Solr process can cripple a running process
(cherry picked from commit 4ea95bf)
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/812fd346
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/812fd346
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/812fd346
Branch: refs/heads/branch_6x
Commit: 812fd346f7a136ccfe550a6ba0d7b0e634d68769
Parents: 976501f
Author: Shalin Shekhar Mangar <sh...@apache.org>
Authored: Wed Jun 29 14:49:59 2016 +0530
Committer: Shalin Shekhar Mangar <sh...@apache.org>
Committed: Wed Jun 29 16:59:56 2016 +0530
----------------------------------------------------------------------
solr/CHANGES.txt | 4 +-
.../org/apache/solr/cloud/ZkController.java | 96 ++++++++++++++------
2 files changed, 70 insertions(+), 30 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/812fd346/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 817d85e..10f490a 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -54,10 +54,12 @@ Bug Fixes
* SOLR-8626: 404 error when clicking nodes in cloud graph view in angular UI. (janhoy, Trey Grainger via shalin)
+* SOLR-8777: Duplicate Solr process can cripple a running process. (Jessica Cheng Mallet, Scott Blum, shalin)
+
* SOLR-9254: GraphTermsQueryQParserPlugin throws NPE when field being searched is not present in segment
(Joel Bernstein)
-* SOLR-8657: Fix SolrRequestInfo error logs if QuerySenderListener is being used (Pascal Chollet,
+* SOLR-8657: Fix SolrRequestInfo error logs if QuerySenderListener is being used (Pascal Chollet,
Tom�s Fern�ndez L�bbe)
Optimizations
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/812fd346/solr/core/src/java/org/apache/solr/cloud/ZkController.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
index 1388ee5..102774f 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
@@ -25,8 +25,19 @@ import java.net.URLEncoder;
import java.net.UnknownHostException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
import java.util.concurrent.Callable;
+import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
@@ -41,7 +52,25 @@ import org.apache.solr.cloud.overseer.OverseerAction;
import org.apache.solr.cloud.overseer.SliceMutator;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
-import org.apache.solr.common.cloud.*;
+import org.apache.solr.common.cloud.BeforeReconnect;
+import org.apache.solr.common.cloud.ClusterState;
+import org.apache.solr.common.cloud.ClusterStateUtil;
+import org.apache.solr.common.cloud.DefaultConnectionStrategy;
+import org.apache.solr.common.cloud.DefaultZkACLProvider;
+import org.apache.solr.common.cloud.DefaultZkCredentialsProvider;
+import org.apache.solr.common.cloud.DocCollection;
+import org.apache.solr.common.cloud.OnReconnect;
+import org.apache.solr.common.cloud.Replica;
+import org.apache.solr.common.cloud.Slice;
+import org.apache.solr.common.cloud.SolrZkClient;
+import org.apache.solr.common.cloud.ZkACLProvider;
+import org.apache.solr.common.cloud.ZkCmdExecutor;
+import org.apache.solr.common.cloud.ZkConfigManager;
+import org.apache.solr.common.cloud.ZkCoreNodeProps;
+import org.apache.solr.common.cloud.ZkCredentialsProvider;
+import org.apache.solr.common.cloud.ZkNodeProps;
+import org.apache.solr.common.cloud.ZkStateReader;
+import org.apache.solr.common.cloud.ZooKeeperException;
import org.apache.solr.common.params.CollectionParams;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.SolrParams;
@@ -642,6 +671,8 @@ public final class ZkController {
zkStateReader.createClusterStateWatchersAndUpdate();
this.baseURL = zkStateReader.getBaseUrlForNodeName(this.nodeName);
+ checkForExistingEphemeralNode();
+
// start the overseer first as following code may need it's processing
if (!zkRunOnly) {
overseerElector = new LeaderElector(zkClient);
@@ -678,6 +709,39 @@ public final class ZkController {
}
+ private void checkForExistingEphemeralNode() throws KeeperException, InterruptedException {
+ if (zkRunOnly) {
+ return;
+ }
+ String nodeName = getNodeName();
+ String nodePath = ZkStateReader.LIVE_NODES_ZKNODE + "/" + nodeName;
+
+ if (!zkClient.exists(nodePath, true)) {
+ return;
+ }
+
+ final CountDownLatch deletedLatch = new CountDownLatch(1);
+ Stat stat = zkClient.exists(nodePath, event -> {
+ if (Watcher.Event.EventType.None.equals(event.getType())) {
+ return;
+ }
+ if (Watcher.Event.EventType.NodeDeleted.equals(event.getType())) {
+ deletedLatch.countDown();
+ }
+ }, true);
+
+ if (stat == null) {
+ // znode suddenly disappeared but that's okay
+ return;
+ }
+
+ boolean deleted = deletedLatch.await(zkClient.getSolrZooKeeper().getSessionTimeout() * 2, TimeUnit.MILLISECONDS);
+ if (!deleted) {
+ throw new SolrException(ErrorCode.SERVER_ERROR, "A previous ephemeral live node still exists. " +
+ "Solr cannot continue. Please ensure that no other Solr process using the same port is running already.");
+ }
+ }
+
public void publishAndWaitForDownStates() throws KeeperException,
InterruptedException {
@@ -752,33 +816,7 @@ public final class ZkController {
String nodeName = getNodeName();
String nodePath = ZkStateReader.LIVE_NODES_ZKNODE + "/" + nodeName;
log.info("Register node as live in ZooKeeper:" + nodePath);
-
- try {
- boolean nodeDeleted = true;
- try {
- // we attempt a delete in the case of a quick server bounce -
- // if there was not a graceful close, the node may exist
- // until expiration timeout - so a node won't be created here because
- // it exists, but eventually the node will be removed. So delete
- // in case it exists and create a new node.
- zkClient.delete(nodePath, -1, true);
- } catch (KeeperException.NoNodeException e) {
- // fine if there is nothing to delete
- // TODO: annoying that ZK logs a warning on us
- nodeDeleted = false;
- }
- if (nodeDeleted) {
- log
- .info("Found a previous node that still exists while trying to register a new live node "
- + nodePath + " - removing existing node to create another.");
- }
- zkClient.makePath(nodePath, CreateMode.EPHEMERAL, true);
- } catch (KeeperException e) {
- // it's okay if the node already exists
- if (e.code() != KeeperException.Code.NODEEXISTS) {
- throw e;
- }
- }
+ zkClient.makePath(nodePath, CreateMode.EPHEMERAL, true);
}
public String getNodeName() {