You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by an...@apache.org on 2016/04/21 23:11:42 UTC
[2/4] lucene-solr:branch_5_5: SOLR-8697: Add synchronization around
registering as leader and canceling.
SOLR-8697: Add synchronization around registering as leader and canceling.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/78bed536
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/78bed536
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/78bed536
Branch: refs/heads/branch_5_5
Commit: 78bed536984dbfd4ba2f802deb58b29979d59329
Parents: f8dd98f
Author: markrmiller <ma...@apache.org>
Authored: Fri Feb 26 12:32:12 2016 -0500
Committer: Anshum Gupta <an...@apache.org>
Committed: Thu Apr 21 13:42:07 2016 -0700
----------------------------------------------------------------------
.../org/apache/solr/cloud/ElectionContext.java | 111 ++++++++++---------
.../org/apache/solr/cloud/ZkController.java | 2 +-
.../org/apache/solr/cloud/OverseerTest.java | 7 ++
3 files changed, 69 insertions(+), 51 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/78bed536/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
index 871dbcd..d4d02d8 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
@@ -110,8 +110,11 @@ class ShardLeaderElectionContextBase extends ElectionContext {
protected String shardId;
protected String collection;
protected LeaderElector leaderElector;
- protected volatile Integer leaderZkNodeParentVersion;
-
+ private Integer leaderZkNodeParentVersion;
+
+ // Prevents a race between cancelling and becoming leader.
+ private final Object lock = new Object();
+
public ShardLeaderElectionContextBase(LeaderElector leaderElector,
final String shardId, final String collection, final String coreNodeName,
ZkNodeProps props, ZkStateReader zkStateReader) {
@@ -138,31 +141,33 @@ class ShardLeaderElectionContextBase extends ElectionContext {
@Override
public void cancelElection() throws InterruptedException, KeeperException {
super.cancelElection();
- if (leaderZkNodeParentVersion != null) {
- try {
- // We need to be careful and make sure we *only* delete our own leader registration node.
- // We do this by using a multi and ensuring the parent znode of the leader registration node
- // matches the version we expect - there is a setData call that increments the parent's znode
- // version whenever a leader registers.
- log.info("Removing leader registration node on cancel: {} {}", leaderPath, leaderZkNodeParentVersion);
- List<Op> ops = new ArrayList<>(2);
- ops.add(Op.check(new Path(leaderPath).getParent().toString(), leaderZkNodeParentVersion));
- ops.add(Op.delete(leaderPath, -1));
- zkClient.multi(ops, true);
- } catch (KeeperException.NoNodeException nne) {
- // no problem
- log.info("No leader registration node found to remove: {}", leaderPath);
- } catch (KeeperException.BadVersionException bve) {
- log.info("Cannot remove leader registration node because the current registered node is not ours: {}", leaderPath);
- // no problem
- } catch (InterruptedException e) {
- throw e;
- } catch (Exception e) {
- SolrException.log(log, e);
+ synchronized (lock) {
+ if (leaderZkNodeParentVersion != null) {
+ try {
+ // We need to be careful and make sure we *only* delete our own leader registration node.
+ // We do this by using a multi and ensuring the parent znode of the leader registration node
+ // matches the version we expect - there is a setData call that increments the parent's znode
+ // version whenever a leader registers.
+ log.info("Removing leader registration node on cancel: {} {}", leaderPath, leaderZkNodeParentVersion);
+ List<Op> ops = new ArrayList<>(2);
+ ops.add(Op.check(new Path(leaderPath).getParent().toString(), leaderZkNodeParentVersion));
+ ops.add(Op.delete(leaderPath, -1));
+ zkClient.multi(ops, true);
+ } catch (KeeperException.NoNodeException nne) {
+ // no problem
+ log.info("No leader registration node found to remove: {}", leaderPath);
+ } catch (KeeperException.BadVersionException bve) {
+ log.info("Cannot remove leader registration node because the current registered node is not ours: {}", leaderPath);
+ // no problem
+ } catch (InterruptedException e) {
+ throw e;
+ } catch (Exception e) {
+ SolrException.log(log, e);
+ }
+ leaderZkNodeParentVersion = null;
+ } else {
+ log.info("No version found for ephemeral leader parent node, won't remove previous leader registration.");
}
- leaderZkNodeParentVersion = null;
- } else {
- log.info("No version found for ephemeral leader parent node, won't remove previous leader registration.");
}
}
@@ -179,31 +184,31 @@ class ShardLeaderElectionContextBase extends ElectionContext {
@Override
public void execute() throws InterruptedException, KeeperException {
- log.info("Creating leader registration node {} after winning as {}", leaderPath, leaderSeqPath);
- List<Op> ops = new ArrayList<>(2);
-
- // We use a multi operation to get the parent nodes version, which will
- // be used to make sure we only remove our own leader registration node.
- // The setData call used to get the parent version is also the trigger to
- // increment the version. We also do a sanity check that our leaderSeqPath exists.
-
- ops.add(Op.check(leaderSeqPath, -1));
- ops.add(Op.create(leaderPath, json, zkClient.getZkACLProvider().getACLsToAdd(leaderPath), CreateMode.EPHEMERAL));
- // we set the json on the parent too for back compat, see SOLR-7844
- ops.add(Op.setData(parent, json, -1));
- List<OpResult> results;
-
- results = zkClient.multi(ops, true);
-
- for (OpResult result : results) {
- if (result.getType() == ZooDefs.OpCode.setData) {
- SetDataResult dresult = (SetDataResult) result;
- Stat stat = dresult.getStat();
- leaderZkNodeParentVersion = stat.getVersion();
- return;
+ synchronized (lock) {
+ log.info("Creating leader registration node {} after winning as {}", leaderPath, leaderSeqPath);
+ List<Op> ops = new ArrayList<>(2);
+
+ // We use a multi operation to get the parent nodes version, which will
+ // be used to make sure we only remove our own leader registration node.
+ // The setData call used to get the parent version is also the trigger to
+ // increment the version. We also do a sanity check that our leaderSeqPath exists.
+
+ ops.add(Op.check(leaderSeqPath, -1));
+ ops.add(Op.create(leaderPath, Utils.toJSON(leaderProps), zkClient.getZkACLProvider().getACLsToAdd(leaderPath), CreateMode.EPHEMERAL));
+ ops.add(Op.setData(parent, null, -1));
+ List<OpResult> results;
+
+ results = zkClient.multi(ops, true);
+ for (OpResult result : results) {
+ if (result.getType() == ZooDefs.OpCode.setData) {
+ SetDataResult dresult = (SetDataResult) result;
+ Stat stat = dresult.getStat();
+ leaderZkNodeParentVersion = stat.getVersion();
+ return;
+ }
}
+ assert leaderZkNodeParentVersion != null;
}
- assert leaderZkNodeParentVersion != null;
}
});
} catch (Throwable t) {
@@ -226,7 +231,13 @@ class ShardLeaderElectionContextBase extends ElectionContext {
public LeaderElector getLeaderElector() {
return leaderElector;
- }
+ }
+
+ Integer getLeaderZkNodeParentVersion() {
+ synchronized (lock) {
+ return leaderZkNodeParentVersion;
+ }
+ }
}
// add core container and stop passing core around...
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/78bed536/solr/core/src/java/org/apache/solr/cloud/ZkController.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
index e7b5f50..cc8f7a2 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
@@ -2194,7 +2194,7 @@ public final class ZkController {
// we use this version and multi to ensure *only* the current zk registered leader
// for a shard can put a replica into LIR
- Integer leaderZkNodeParentVersion = ((ShardLeaderElectionContextBase)context).leaderZkNodeParentVersion;
+ Integer leaderZkNodeParentVersion = ((ShardLeaderElectionContextBase)context).getLeaderZkNodeParentVersion();
// TODO: should we do this optimistically to avoid races?
if (zkClient.exists(znodePath, retryOnConnLoss)) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/78bed536/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java b/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
index 108ccbe..98b04b4 100644
--- a/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
@@ -121,6 +121,13 @@ public class OverseerTest extends SolrTestCaseJ4 {
}
public void close() {
+ for (ElectionContext ec : electionContext.values()) {
+ try {
+ ec.cancelElection();
+ } catch (Exception e) {
+ log.warn(String.format("Error cancelling election for %s", ec.id), e);
+ }
+ }
deleteNode(ZkStateReader.LIVE_NODES_ZKNODE + "/" + nodeName);
zkClient.close();
}