You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2016/02/26 18:32:25 UTC
lucene-solr git commit: SOLR-8697: Add synchronization around
registering as leader and canceling.
Repository: lucene-solr
Updated Branches:
refs/heads/master 0ed625b10 -> efb7bb171
SOLR-8697: Add synchronization around registering as leader and canceling.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/efb7bb17
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/efb7bb17
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/efb7bb17
Branch: refs/heads/master
Commit: efb7bb171b22a3c6a00d30eefe935a0024df0c71
Parents: 0ed625b
Author: markrmiller <ma...@apache.org>
Authored: Fri Feb 26 12:32:12 2016 -0500
Committer: markrmiller <ma...@apache.org>
Committed: Fri Feb 26 12:32:12 2016 -0500
----------------------------------------------------------------------
.../org/apache/solr/cloud/ElectionContext.java | 110 ++++++++++---------
.../org/apache/solr/cloud/ZkController.java | 2 +-
.../org/apache/solr/cloud/OverseerTest.java | 7 ++
3 files changed, 69 insertions(+), 50 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/efb7bb17/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
index da4b0c6..6743436 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
@@ -110,8 +110,11 @@ class ShardLeaderElectionContextBase extends ElectionContext {
protected String shardId;
protected String collection;
protected LeaderElector leaderElector;
- protected volatile Integer leaderZkNodeParentVersion;
-
+ private Integer leaderZkNodeParentVersion;
+
+ // Prevents a race between cancelling and becoming leader.
+ private final Object lock = new Object();
+
public ShardLeaderElectionContextBase(LeaderElector leaderElector,
final String shardId, final String collection, final String coreNodeName,
ZkNodeProps props, ZkStateReader zkStateReader) {
@@ -138,31 +141,33 @@ class ShardLeaderElectionContextBase extends ElectionContext {
@Override
public void cancelElection() throws InterruptedException, KeeperException {
super.cancelElection();
- if (leaderZkNodeParentVersion != null) {
- try {
- // We need to be careful and make sure we *only* delete our own leader registration node.
- // We do this by using a multi and ensuring the parent znode of the leader registration node
- // matches the version we expect - there is a setData call that increments the parent's znode
- // version whenever a leader registers.
- log.info("Removing leader registration node on cancel: {} {}", leaderPath, leaderZkNodeParentVersion);
- List<Op> ops = new ArrayList<>(2);
- ops.add(Op.check(new Path(leaderPath).getParent().toString(), leaderZkNodeParentVersion));
- ops.add(Op.delete(leaderPath, -1));
- zkClient.multi(ops, true);
- } catch (KeeperException.NoNodeException nne) {
- // no problem
- log.info("No leader registration node found to remove: {}", leaderPath);
- } catch (KeeperException.BadVersionException bve) {
- log.info("Cannot remove leader registration node because the current registered node is not ours: {}", leaderPath);
- // no problem
- } catch (InterruptedException e) {
- throw e;
- } catch (Exception e) {
- SolrException.log(log, e);
+ synchronized (lock) {
+ if (leaderZkNodeParentVersion != null) {
+ try {
+ // We need to be careful and make sure we *only* delete our own leader registration node.
+ // We do this by using a multi and ensuring the parent znode of the leader registration node
+ // matches the version we expect - there is a setData call that increments the parent's znode
+ // version whenever a leader registers.
+ log.info("Removing leader registration node on cancel: {} {}", leaderPath, leaderZkNodeParentVersion);
+ List<Op> ops = new ArrayList<>(2);
+ ops.add(Op.check(new Path(leaderPath).getParent().toString(), leaderZkNodeParentVersion));
+ ops.add(Op.delete(leaderPath, -1));
+ zkClient.multi(ops, true);
+ } catch (KeeperException.NoNodeException nne) {
+ // no problem
+ log.info("No leader registration node found to remove: {}", leaderPath);
+ } catch (KeeperException.BadVersionException bve) {
+ log.info("Cannot remove leader registration node because the current registered node is not ours: {}", leaderPath);
+ // no problem
+ } catch (InterruptedException e) {
+ throw e;
+ } catch (Exception e) {
+ SolrException.log(log, e);
+ }
+ leaderZkNodeParentVersion = null;
+ } else {
+ log.info("No version found for ephemeral leader parent node, won't remove previous leader registration.");
}
- leaderZkNodeParentVersion = null;
- } else {
- log.info("No version found for ephemeral leader parent node, won't remove previous leader registration.");
}
}
@@ -179,30 +184,31 @@ class ShardLeaderElectionContextBase extends ElectionContext {
@Override
public void execute() throws InterruptedException, KeeperException {
- log.info("Creating leader registration node {} after winning as {}", leaderPath, leaderSeqPath);
- List<Op> ops = new ArrayList<>(2);
-
- // We use a multi operation to get the parent nodes version, which will
- // be used to make sure we only remove our own leader registration node.
- // The setData call used to get the parent version is also the trigger to
- // increment the version. We also do a sanity check that our leaderSeqPath exists.
-
- ops.add(Op.check(leaderSeqPath, -1));
- ops.add(Op.create(leaderPath, Utils.toJSON(leaderProps), zkClient.getZkACLProvider().getACLsToAdd(leaderPath), CreateMode.EPHEMERAL));
- ops.add(Op.setData(parent, null, -1));
- List<OpResult> results;
-
- results = zkClient.multi(ops, true);
-
- for (OpResult result : results) {
- if (result.getType() == ZooDefs.OpCode.setData) {
- SetDataResult dresult = (SetDataResult) result;
- Stat stat = dresult.getStat();
- leaderZkNodeParentVersion = stat.getVersion();
- return;
+ synchronized (lock) {
+ log.info("Creating leader registration node {} after winning as {}", leaderPath, leaderSeqPath);
+ List<Op> ops = new ArrayList<>(2);
+
+ // We use a multi operation to get the parent nodes version, which will
+ // be used to make sure we only remove our own leader registration node.
+ // The setData call used to get the parent version is also the trigger to
+ // increment the version. We also do a sanity check that our leaderSeqPath exists.
+
+ ops.add(Op.check(leaderSeqPath, -1));
+ ops.add(Op.create(leaderPath, Utils.toJSON(leaderProps), zkClient.getZkACLProvider().getACLsToAdd(leaderPath), CreateMode.EPHEMERAL));
+ ops.add(Op.setData(parent, null, -1));
+ List<OpResult> results;
+
+ results = zkClient.multi(ops, true);
+ for (OpResult result : results) {
+ if (result.getType() == ZooDefs.OpCode.setData) {
+ SetDataResult dresult = (SetDataResult) result;
+ Stat stat = dresult.getStat();
+ leaderZkNodeParentVersion = stat.getVersion();
+ return;
+ }
}
+ assert leaderZkNodeParentVersion != null;
}
- assert leaderZkNodeParentVersion != null;
}
});
} catch (Throwable t) {
@@ -225,7 +231,13 @@ class ShardLeaderElectionContextBase extends ElectionContext {
public LeaderElector getLeaderElector() {
return leaderElector;
- }
+ }
+
+ Integer getLeaderZkNodeParentVersion() {
+ synchronized (lock) {
+ return leaderZkNodeParentVersion;
+ }
+ }
}
// add core container and stop passing core around...
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/efb7bb17/solr/core/src/java/org/apache/solr/cloud/ZkController.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
index 4c826a7..aba2e59 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
@@ -2120,7 +2120,7 @@ public final class ZkController {
// we use this version and multi to ensure *only* the current zk registered leader
// for a shard can put a replica into LIR
- Integer leaderZkNodeParentVersion = ((ShardLeaderElectionContextBase)context).leaderZkNodeParentVersion;
+ Integer leaderZkNodeParentVersion = ((ShardLeaderElectionContextBase)context).getLeaderZkNodeParentVersion();
// TODO: should we do this optimistically to avoid races?
if (zkClient.exists(znodePath, retryOnConnLoss)) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/efb7bb17/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java b/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
index 8ac0512..ea82cbf 100644
--- a/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
@@ -121,6 +121,13 @@ public class OverseerTest extends SolrTestCaseJ4 {
}
public void close() {
+ for (ElectionContext ec : electionContext.values()) {
+ try {
+ ec.cancelElection();
+ } catch (Exception e) {
+ log.warn(String.format("Error cancelling election for %s", ec.id), e);
+ }
+ }
deleteNode(ZkStateReader.LIVE_NODES_ZKNODE + "/" + nodeName);
zkClient.close();
}
Re: lucene-solr git commit: SOLR-8697: Add synchronization around
registering as leader and canceling.
Posted by Mark Miller <ma...@gmail.com>.
Thanks Shalin!
- Mark
On Sat, Feb 27, 2016 at 2:08 AM Shalin Shekhar Mangar <
shalinmangar@gmail.com> wrote:
> I pushed a fix.
>
> On Sat, Feb 27, 2016 at 9:25 AM, Scott Blum <dr...@gmail.com> wrote:
> > That's annoying, since there's no semantic difference between that and a
> > log.warn(fmt, arg, arg). Except that this particular logging API doesn't
> > have an overload that takes a throwable, a format string, and a set of
> args.
> >
> > On Fri, Feb 26, 2016 at 8:02 PM, Chris Hostetter <
> hossman_lucene@fucit.org>
> > wrote:
> >>
> >>
> >> this breaks precommit...
> >>
> >>
> >> [forbidden-apis] Forbidden method invocation:
> >> java.lang.String#format(java.lang.String,java.lang.Object[]) [Uses
> default
> >> locale]
> >> [forbidden-apis] in
> org.apache.solr.cloud.OverseerTest$MockZKController
> >> (OverseerTest.java:128)
> >> [forbidden-apis] Scanned 3181 (and 1946 related) class file(s) for
> >> forbidden API invocations (in 1.27s), 1 error(s).
> >>
> >>
> >>
> >>
> >> : Date: Fri, 26 Feb 2016 17:32:25 +0000 (UTC)
> >> : From: markrmiller@apache.org
> >> : Reply-To: dev@lucene.apache.org
> >> : To: commits@lucene.apache.org
> >> : Subject: lucene-solr git commit: SOLR-8697: Add synchronization around
> >> : registering as leader and canceling.
> >> :
> >> : Repository: lucene-solr
> >> : Updated Branches:
> >> : refs/heads/master 0ed625b10 -> efb7bb171
> >> :
> >> :
> >> : SOLR-8697: Add synchronization around registering as leader and
> >> canceling.
> >> :
> >> :
> >> : Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
> >> : Commit:
> >> http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/efb7bb17
> >> : Tree:
> http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/efb7bb17
> >> : Diff:
> http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/efb7bb17
> >> :
> >> : Branch: refs/heads/master
> >> : Commit: efb7bb171b22a3c6a00d30eefe935a0024df0c71
> >> : Parents: 0ed625b
> >> : Author: markrmiller <ma...@apache.org>
> >> : Authored: Fri Feb 26 12:32:12 2016 -0500
> >> : Committer: markrmiller <ma...@apache.org>
> >> : Committed: Fri Feb 26 12:32:12 2016 -0500
> >> :
> >> : ----------------------------------------------------------------------
> >> : .../org/apache/solr/cloud/ElectionContext.java | 110
> >> ++++++++++---------
> >> : .../org/apache/solr/cloud/ZkController.java | 2 +-
> >> : .../org/apache/solr/cloud/OverseerTest.java | 7 ++
> >> : 3 files changed, 69 insertions(+), 50 deletions(-)
> >> : ----------------------------------------------------------------------
> >> :
> >> :
> >> :
> >>
> http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/efb7bb17/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
> >> : ----------------------------------------------------------------------
> >> : diff --git
> >> a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
> >> b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
> >> : index da4b0c6..6743436 100644
> >> : --- a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
> >> : +++ b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
> >> : @@ -110,8 +110,11 @@ class ShardLeaderElectionContextBase extends
> >> ElectionContext {
> >> : protected String shardId;
> >> : protected String collection;
> >> : protected LeaderElector leaderElector;
> >> : - protected volatile Integer leaderZkNodeParentVersion;
> >> : -
> >> : + private Integer leaderZkNodeParentVersion;
> >> : +
> >> : + // Prevents a race between cancelling and becoming leader.
> >> : + private final Object lock = new Object();
> >> : +
> >> : public ShardLeaderElectionContextBase(LeaderElector leaderElector,
> >> : final String shardId, final String collection, final String
> >> coreNodeName,
> >> : ZkNodeProps props, ZkStateReader zkStateReader) {
> >> : @@ -138,31 +141,33 @@ class ShardLeaderElectionContextBase extends
> >> ElectionContext {
> >> : @Override
> >> : public void cancelElection() throws InterruptedException,
> >> KeeperException {
> >> : super.cancelElection();
> >> : - if (leaderZkNodeParentVersion != null) {
> >> : - try {
> >> : - // We need to be careful and make sure we *only* delete our
> own
> >> leader registration node.
> >> : - // We do this by using a multi and ensuring the parent znode
> of
> >> the leader registration node
> >> : - // matches the version we expect - there is a setData call
> that
> >> increments the parent's znode
> >> : - // version whenever a leader registers.
> >> : - log.info("Removing leader registration node on cancel: {}
> {}",
> >> leaderPath, leaderZkNodeParentVersion);
> >> : - List<Op> ops = new ArrayList<>(2);
> >> : - ops.add(Op.check(new Path(leaderPath).getParent().toString(),
> >> leaderZkNodeParentVersion));
> >> : - ops.add(Op.delete(leaderPath, -1));
> >> : - zkClient.multi(ops, true);
> >> : - } catch (KeeperException.NoNodeException nne) {
> >> : - // no problem
> >> : - log.info("No leader registration node found to remove: {}",
> >> leaderPath);
> >> : - } catch (KeeperException.BadVersionException bve) {
> >> : - log.info("Cannot remove leader registration node because the
> >> current registered node is not ours: {}", leaderPath);
> >> : - // no problem
> >> : - } catch (InterruptedException e) {
> >> : - throw e;
> >> : - } catch (Exception e) {
> >> : - SolrException.log(log, e);
> >> : + synchronized (lock) {
> >> : + if (leaderZkNodeParentVersion != null) {
> >> : + try {
> >> : + // We need to be careful and make sure we *only* delete our
> >> own leader registration node.
> >> : + // We do this by using a multi and ensuring the parent
> znode
> >> of the leader registration node
> >> : + // matches the version we expect - there is a setData call
> >> that increments the parent's znode
> >> : + // version whenever a leader registers.
> >> : + log.info("Removing leader registration node on cancel: {}
> >> {}", leaderPath, leaderZkNodeParentVersion);
> >> : + List<Op> ops = new ArrayList<>(2);
> >> : + ops.add(Op.check(new
> Path(leaderPath).getParent().toString(),
> >> leaderZkNodeParentVersion));
> >> : + ops.add(Op.delete(leaderPath, -1));
> >> : + zkClient.multi(ops, true);
> >> : + } catch (KeeperException.NoNodeException nne) {
> >> : + // no problem
> >> : + log.info("No leader registration node found to remove:
> {}",
> >> leaderPath);
> >> : + } catch (KeeperException.BadVersionException bve) {
> >> : + log.info("Cannot remove leader registration node because
> the
> >> current registered node is not ours: {}", leaderPath);
> >> : + // no problem
> >> : + } catch (InterruptedException e) {
> >> : + throw e;
> >> : + } catch (Exception e) {
> >> : + SolrException.log(log, e);
> >> : + }
> >> : + leaderZkNodeParentVersion = null;
> >> : + } else {
> >> : + log.info("No version found for ephemeral leader parent node,
> >> won't remove previous leader registration.");
> >> : }
> >> : - leaderZkNodeParentVersion = null;
> >> : - } else {
> >> : - log.info("No version found for ephemeral leader parent node,
> >> won't remove previous leader registration.");
> >> : }
> >> : }
> >> :
> >> : @@ -179,30 +184,31 @@ class ShardLeaderElectionContextBase extends
> >> ElectionContext {
> >> :
> >> : @Override
> >> : public void execute() throws InterruptedException,
> >> KeeperException {
> >> : - log.info("Creating leader registration node {} after
> winning
> >> as {}", leaderPath, leaderSeqPath);
> >> : - List<Op> ops = new ArrayList<>(2);
> >> : -
> >> : - // We use a multi operation to get the parent nodes
> version,
> >> which will
> >> : - // be used to make sure we only remove our own leader
> >> registration node.
> >> : - // The setData call used to get the parent version is also
> >> the trigger to
> >> : - // increment the version. We also do a sanity check that
> our
> >> leaderSeqPath exists.
> >> : -
> >> : - ops.add(Op.check(leaderSeqPath, -1));
> >> : - ops.add(Op.create(leaderPath, Utils.toJSON(leaderProps),
> >> zkClient.getZkACLProvider().getACLsToAdd(leaderPath),
> >> CreateMode.EPHEMERAL));
> >> : - ops.add(Op.setData(parent, null, -1));
> >> : - List<OpResult> results;
> >> : -
> >> : - results = zkClient.multi(ops, true);
> >> : -
> >> : - for (OpResult result : results) {
> >> : - if (result.getType() == ZooDefs.OpCode.setData) {
> >> : - SetDataResult dresult = (SetDataResult) result;
> >> : - Stat stat = dresult.getStat();
> >> : - leaderZkNodeParentVersion = stat.getVersion();
> >> : - return;
> >> : + synchronized (lock) {
> >> : + log.info("Creating leader registration node {} after
> >> winning as {}", leaderPath, leaderSeqPath);
> >> : + List<Op> ops = new ArrayList<>(2);
> >> : +
> >> : + // We use a multi operation to get the parent nodes
> >> version, which will
> >> : + // be used to make sure we only remove our own leader
> >> registration node.
> >> : + // The setData call used to get the parent version is
> also
> >> the trigger to
> >> : + // increment the version. We also do a sanity check that
> >> our leaderSeqPath exists.
> >> : +
> >> : + ops.add(Op.check(leaderSeqPath, -1));
> >> : + ops.add(Op.create(leaderPath, Utils.toJSON(leaderProps),
> >> zkClient.getZkACLProvider().getACLsToAdd(leaderPath),
> >> CreateMode.EPHEMERAL));
> >> : + ops.add(Op.setData(parent, null, -1));
> >> : + List<OpResult> results;
> >> : +
> >> : + results = zkClient.multi(ops, true);
> >> : + for (OpResult result : results) {
> >> : + if (result.getType() == ZooDefs.OpCode.setData) {
> >> : + SetDataResult dresult = (SetDataResult) result;
> >> : + Stat stat = dresult.getStat();
> >> : + leaderZkNodeParentVersion = stat.getVersion();
> >> : + return;
> >> : + }
> >> : }
> >> : + assert leaderZkNodeParentVersion != null;
> >> : }
> >> : - assert leaderZkNodeParentVersion != null;
> >> : }
> >> : });
> >> : } catch (Throwable t) {
> >> : @@ -225,7 +231,13 @@ class ShardLeaderElectionContextBase extends
> >> ElectionContext {
> >> :
> >> : public LeaderElector getLeaderElector() {
> >> : return leaderElector;
> >> : - }
> >> : + }
> >> : +
> >> : + Integer getLeaderZkNodeParentVersion() {
> >> : + synchronized (lock) {
> >> : + return leaderZkNodeParentVersion;
> >> : + }
> >> : + }
> >> : }
> >> :
> >> : // add core container and stop passing core around...
> >> :
> >> :
> >>
> http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/efb7bb17/solr/core/src/java/org/apache/solr/cloud/ZkController.java
> >> : ----------------------------------------------------------------------
> >> : diff --git
> a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
> >> b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
> >> : index 4c826a7..aba2e59 100644
> >> : --- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
> >> : +++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
> >> : @@ -2120,7 +2120,7 @@ public final class ZkController {
> >> : // we use this version and multi to ensure *only* the current zk
> >> registered leader
> >> : // for a shard can put a replica into LIR
> >> :
> >> : - Integer leaderZkNodeParentVersion =
> >> ((ShardLeaderElectionContextBase)context).leaderZkNodeParentVersion;
> >> : + Integer leaderZkNodeParentVersion =
> >>
> ((ShardLeaderElectionContextBase)context).getLeaderZkNodeParentVersion();
> >> :
> >> : // TODO: should we do this optimistically to avoid races?
> >> : if (zkClient.exists(znodePath, retryOnConnLoss)) {
> >> :
> >> :
> >>
> http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/efb7bb17/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
> >> : ----------------------------------------------------------------------
> >> : diff --git
> a/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
> >> b/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
> >> : index 8ac0512..ea82cbf 100644
> >> : --- a/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
> >> : +++ b/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
> >> : @@ -121,6 +121,13 @@ public class OverseerTest extends SolrTestCaseJ4
> {
> >> : }
> >> :
> >> : public void close() {
> >> : + for (ElectionContext ec : electionContext.values()) {
> >> : + try {
> >> : + ec.cancelElection();
> >> : + } catch (Exception e) {
> >> : + log.warn(String.format("Error cancelling election for %s",
> >> ec.id), e);
> >> : + }
> >> : + }
> >> : deleteNode(ZkStateReader.LIVE_NODES_ZKNODE + "/" + nodeName);
> >> : zkClient.close();
> >> : }
> >> :
> >> :
> >>
> >> -Hoss
> >> http://www.lucidworks.com/
> >>
> >> ---------------------------------------------------------------------
> >> To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
> >> For additional commands, e-mail: dev-help@lucene.apache.org
> >>
> >
>
>
>
> --
> Regards,
> Shalin Shekhar Mangar.
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
> For additional commands, e-mail: dev-help@lucene.apache.org
>
> --
- Mark
about.me/markrmiller
Re: lucene-solr git commit: SOLR-8697: Add synchronization around
registering as leader and canceling.
Posted by Shalin Shekhar Mangar <sh...@gmail.com>.
I pushed a fix.
On Sat, Feb 27, 2016 at 9:25 AM, Scott Blum <dr...@gmail.com> wrote:
> That's annoying, since there's no semantic difference between that and a
> log.warn(fmt, arg, arg). Except that this particular logging API doesn't
> have an overload that takes a throwable, a format string, and a set of args.
>
> On Fri, Feb 26, 2016 at 8:02 PM, Chris Hostetter <ho...@fucit.org>
> wrote:
>>
>>
>> this breaks precommit...
>>
>>
>> [forbidden-apis] Forbidden method invocation:
>> java.lang.String#format(java.lang.String,java.lang.Object[]) [Uses default
>> locale]
>> [forbidden-apis] in org.apache.solr.cloud.OverseerTest$MockZKController
>> (OverseerTest.java:128)
>> [forbidden-apis] Scanned 3181 (and 1946 related) class file(s) for
>> forbidden API invocations (in 1.27s), 1 error(s).
>>
>>
>>
>>
>> : Date: Fri, 26 Feb 2016 17:32:25 +0000 (UTC)
>> : From: markrmiller@apache.org
>> : Reply-To: dev@lucene.apache.org
>> : To: commits@lucene.apache.org
>> : Subject: lucene-solr git commit: SOLR-8697: Add synchronization around
>> : registering as leader and canceling.
>> :
>> : Repository: lucene-solr
>> : Updated Branches:
>> : refs/heads/master 0ed625b10 -> efb7bb171
>> :
>> :
>> : SOLR-8697: Add synchronization around registering as leader and
>> canceling.
>> :
>> :
>> : Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
>> : Commit:
>> http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/efb7bb17
>> : Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/efb7bb17
>> : Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/efb7bb17
>> :
>> : Branch: refs/heads/master
>> : Commit: efb7bb171b22a3c6a00d30eefe935a0024df0c71
>> : Parents: 0ed625b
>> : Author: markrmiller <ma...@apache.org>
>> : Authored: Fri Feb 26 12:32:12 2016 -0500
>> : Committer: markrmiller <ma...@apache.org>
>> : Committed: Fri Feb 26 12:32:12 2016 -0500
>> :
>> : ----------------------------------------------------------------------
>> : .../org/apache/solr/cloud/ElectionContext.java | 110
>> ++++++++++---------
>> : .../org/apache/solr/cloud/ZkController.java | 2 +-
>> : .../org/apache/solr/cloud/OverseerTest.java | 7 ++
>> : 3 files changed, 69 insertions(+), 50 deletions(-)
>> : ----------------------------------------------------------------------
>> :
>> :
>> :
>> http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/efb7bb17/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
>> : ----------------------------------------------------------------------
>> : diff --git
>> a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
>> b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
>> : index da4b0c6..6743436 100644
>> : --- a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
>> : +++ b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
>> : @@ -110,8 +110,11 @@ class ShardLeaderElectionContextBase extends
>> ElectionContext {
>> : protected String shardId;
>> : protected String collection;
>> : protected LeaderElector leaderElector;
>> : - protected volatile Integer leaderZkNodeParentVersion;
>> : -
>> : + private Integer leaderZkNodeParentVersion;
>> : +
>> : + // Prevents a race between cancelling and becoming leader.
>> : + private final Object lock = new Object();
>> : +
>> : public ShardLeaderElectionContextBase(LeaderElector leaderElector,
>> : final String shardId, final String collection, final String
>> coreNodeName,
>> : ZkNodeProps props, ZkStateReader zkStateReader) {
>> : @@ -138,31 +141,33 @@ class ShardLeaderElectionContextBase extends
>> ElectionContext {
>> : @Override
>> : public void cancelElection() throws InterruptedException,
>> KeeperException {
>> : super.cancelElection();
>> : - if (leaderZkNodeParentVersion != null) {
>> : - try {
>> : - // We need to be careful and make sure we *only* delete our own
>> leader registration node.
>> : - // We do this by using a multi and ensuring the parent znode of
>> the leader registration node
>> : - // matches the version we expect - there is a setData call that
>> increments the parent's znode
>> : - // version whenever a leader registers.
>> : - log.info("Removing leader registration node on cancel: {} {}",
>> leaderPath, leaderZkNodeParentVersion);
>> : - List<Op> ops = new ArrayList<>(2);
>> : - ops.add(Op.check(new Path(leaderPath).getParent().toString(),
>> leaderZkNodeParentVersion));
>> : - ops.add(Op.delete(leaderPath, -1));
>> : - zkClient.multi(ops, true);
>> : - } catch (KeeperException.NoNodeException nne) {
>> : - // no problem
>> : - log.info("No leader registration node found to remove: {}",
>> leaderPath);
>> : - } catch (KeeperException.BadVersionException bve) {
>> : - log.info("Cannot remove leader registration node because the
>> current registered node is not ours: {}", leaderPath);
>> : - // no problem
>> : - } catch (InterruptedException e) {
>> : - throw e;
>> : - } catch (Exception e) {
>> : - SolrException.log(log, e);
>> : + synchronized (lock) {
>> : + if (leaderZkNodeParentVersion != null) {
>> : + try {
>> : + // We need to be careful and make sure we *only* delete our
>> own leader registration node.
>> : + // We do this by using a multi and ensuring the parent znode
>> of the leader registration node
>> : + // matches the version we expect - there is a setData call
>> that increments the parent's znode
>> : + // version whenever a leader registers.
>> : + log.info("Removing leader registration node on cancel: {}
>> {}", leaderPath, leaderZkNodeParentVersion);
>> : + List<Op> ops = new ArrayList<>(2);
>> : + ops.add(Op.check(new Path(leaderPath).getParent().toString(),
>> leaderZkNodeParentVersion));
>> : + ops.add(Op.delete(leaderPath, -1));
>> : + zkClient.multi(ops, true);
>> : + } catch (KeeperException.NoNodeException nne) {
>> : + // no problem
>> : + log.info("No leader registration node found to remove: {}",
>> leaderPath);
>> : + } catch (KeeperException.BadVersionException bve) {
>> : + log.info("Cannot remove leader registration node because the
>> current registered node is not ours: {}", leaderPath);
>> : + // no problem
>> : + } catch (InterruptedException e) {
>> : + throw e;
>> : + } catch (Exception e) {
>> : + SolrException.log(log, e);
>> : + }
>> : + leaderZkNodeParentVersion = null;
>> : + } else {
>> : + log.info("No version found for ephemeral leader parent node,
>> won't remove previous leader registration.");
>> : }
>> : - leaderZkNodeParentVersion = null;
>> : - } else {
>> : - log.info("No version found for ephemeral leader parent node,
>> won't remove previous leader registration.");
>> : }
>> : }
>> :
>> : @@ -179,30 +184,31 @@ class ShardLeaderElectionContextBase extends
>> ElectionContext {
>> :
>> : @Override
>> : public void execute() throws InterruptedException,
>> KeeperException {
>> : - log.info("Creating leader registration node {} after winning
>> as {}", leaderPath, leaderSeqPath);
>> : - List<Op> ops = new ArrayList<>(2);
>> : -
>> : - // We use a multi operation to get the parent nodes version,
>> which will
>> : - // be used to make sure we only remove our own leader
>> registration node.
>> : - // The setData call used to get the parent version is also
>> the trigger to
>> : - // increment the version. We also do a sanity check that our
>> leaderSeqPath exists.
>> : -
>> : - ops.add(Op.check(leaderSeqPath, -1));
>> : - ops.add(Op.create(leaderPath, Utils.toJSON(leaderProps),
>> zkClient.getZkACLProvider().getACLsToAdd(leaderPath),
>> CreateMode.EPHEMERAL));
>> : - ops.add(Op.setData(parent, null, -1));
>> : - List<OpResult> results;
>> : -
>> : - results = zkClient.multi(ops, true);
>> : -
>> : - for (OpResult result : results) {
>> : - if (result.getType() == ZooDefs.OpCode.setData) {
>> : - SetDataResult dresult = (SetDataResult) result;
>> : - Stat stat = dresult.getStat();
>> : - leaderZkNodeParentVersion = stat.getVersion();
>> : - return;
>> : + synchronized (lock) {
>> : + log.info("Creating leader registration node {} after
>> winning as {}", leaderPath, leaderSeqPath);
>> : + List<Op> ops = new ArrayList<>(2);
>> : +
>> : + // We use a multi operation to get the parent nodes
>> version, which will
>> : + // be used to make sure we only remove our own leader
>> registration node.
>> : + // The setData call used to get the parent version is also
>> the trigger to
>> : + // increment the version. We also do a sanity check that
>> our leaderSeqPath exists.
>> : +
>> : + ops.add(Op.check(leaderSeqPath, -1));
>> : + ops.add(Op.create(leaderPath, Utils.toJSON(leaderProps),
>> zkClient.getZkACLProvider().getACLsToAdd(leaderPath),
>> CreateMode.EPHEMERAL));
>> : + ops.add(Op.setData(parent, null, -1));
>> : + List<OpResult> results;
>> : +
>> : + results = zkClient.multi(ops, true);
>> : + for (OpResult result : results) {
>> : + if (result.getType() == ZooDefs.OpCode.setData) {
>> : + SetDataResult dresult = (SetDataResult) result;
>> : + Stat stat = dresult.getStat();
>> : + leaderZkNodeParentVersion = stat.getVersion();
>> : + return;
>> : + }
>> : }
>> : + assert leaderZkNodeParentVersion != null;
>> : }
>> : - assert leaderZkNodeParentVersion != null;
>> : }
>> : });
>> : } catch (Throwable t) {
>> : @@ -225,7 +231,13 @@ class ShardLeaderElectionContextBase extends
>> ElectionContext {
>> :
>> : public LeaderElector getLeaderElector() {
>> : return leaderElector;
>> : - }
>> : + }
>> : +
>> : + Integer getLeaderZkNodeParentVersion() {
>> : + synchronized (lock) {
>> : + return leaderZkNodeParentVersion;
>> : + }
>> : + }
>> : }
>> :
>> : // add core container and stop passing core around...
>> :
>> :
>> http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/efb7bb17/solr/core/src/java/org/apache/solr/cloud/ZkController.java
>> : ----------------------------------------------------------------------
>> : diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
>> b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
>> : index 4c826a7..aba2e59 100644
>> : --- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
>> : +++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
>> : @@ -2120,7 +2120,7 @@ public final class ZkController {
>> : // we use this version and multi to ensure *only* the current zk
>> registered leader
>> : // for a shard can put a replica into LIR
>> :
>> : - Integer leaderZkNodeParentVersion =
>> ((ShardLeaderElectionContextBase)context).leaderZkNodeParentVersion;
>> : + Integer leaderZkNodeParentVersion =
>> ((ShardLeaderElectionContextBase)context).getLeaderZkNodeParentVersion();
>> :
>> : // TODO: should we do this optimistically to avoid races?
>> : if (zkClient.exists(znodePath, retryOnConnLoss)) {
>> :
>> :
>> http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/efb7bb17/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
>> : ----------------------------------------------------------------------
>> : diff --git a/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
>> b/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
>> : index 8ac0512..ea82cbf 100644
>> : --- a/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
>> : +++ b/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
>> : @@ -121,6 +121,13 @@ public class OverseerTest extends SolrTestCaseJ4 {
>> : }
>> :
>> : public void close() {
>> : + for (ElectionContext ec : electionContext.values()) {
>> : + try {
>> : + ec.cancelElection();
>> : + } catch (Exception e) {
>> : + log.warn(String.format("Error cancelling election for %s",
>> ec.id), e);
>> : + }
>> : + }
>> : deleteNode(ZkStateReader.LIVE_NODES_ZKNODE + "/" + nodeName);
>> : zkClient.close();
>> : }
>> :
>> :
>>
>> -Hoss
>> http://www.lucidworks.com/
>>
>> ---------------------------------------------------------------------
>> To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
>> For additional commands, e-mail: dev-help@lucene.apache.org
>>
>
--
Regards,
Shalin Shekhar Mangar.
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org
Re: lucene-solr git commit: SOLR-8697: Add synchronization around
registering as leader and canceling.
Posted by Scott Blum <dr...@gmail.com>.
That's annoying, since there's no semantic difference between that and a
log.warn(fmt, arg, arg). Except that this particular logging API doesn't
have an overload that takes a throwable, a format string, and a set of args.
On Fri, Feb 26, 2016 at 8:02 PM, Chris Hostetter <ho...@fucit.org>
wrote:
>
> this breaks precommit...
>
>
> [forbidden-apis] Forbidden method invocation:
> java.lang.String#format(java.lang.String,java.lang.Object[]) [Uses default
> locale]
> [forbidden-apis] in org.apache.solr.cloud.OverseerTest$MockZKController
> (OverseerTest.java:128)
> [forbidden-apis] Scanned 3181 (and 1946 related) class file(s) for
> forbidden API invocations (in 1.27s), 1 error(s).
>
>
>
>
> : Date: Fri, 26 Feb 2016 17:32:25 +0000 (UTC)
> : From: markrmiller@apache.org
> : Reply-To: dev@lucene.apache.org
> : To: commits@lucene.apache.org
> : Subject: lucene-solr git commit: SOLR-8697: Add synchronization around
> : registering as leader and canceling.
> :
> : Repository: lucene-solr
> : Updated Branches:
> : refs/heads/master 0ed625b10 -> efb7bb171
> :
> :
> : SOLR-8697: Add synchronization around registering as leader and
> canceling.
> :
> :
> : Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
> : Commit:
> http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/efb7bb17
> : Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/efb7bb17
> : Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/efb7bb17
> :
> : Branch: refs/heads/master
> : Commit: efb7bb171b22a3c6a00d30eefe935a0024df0c71
> : Parents: 0ed625b
> : Author: markrmiller <ma...@apache.org>
> : Authored: Fri Feb 26 12:32:12 2016 -0500
> : Committer: markrmiller <ma...@apache.org>
> : Committed: Fri Feb 26 12:32:12 2016 -0500
> :
> : ----------------------------------------------------------------------
> : .../org/apache/solr/cloud/ElectionContext.java | 110
> ++++++++++---------
> : .../org/apache/solr/cloud/ZkController.java | 2 +-
> : .../org/apache/solr/cloud/OverseerTest.java | 7 ++
> : 3 files changed, 69 insertions(+), 50 deletions(-)
> : ----------------------------------------------------------------------
> :
> :
> :
> http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/efb7bb17/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
> : ----------------------------------------------------------------------
> : diff --git
> a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
> b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
> : index da4b0c6..6743436 100644
> : --- a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
> : +++ b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
> : @@ -110,8 +110,11 @@ class ShardLeaderElectionContextBase extends
> ElectionContext {
> : protected String shardId;
> : protected String collection;
> : protected LeaderElector leaderElector;
> : - protected volatile Integer leaderZkNodeParentVersion;
> : -
> : + private Integer leaderZkNodeParentVersion;
> : +
> : + // Prevents a race between cancelling and becoming leader.
> : + private final Object lock = new Object();
> : +
> : public ShardLeaderElectionContextBase(LeaderElector leaderElector,
> : final String shardId, final String collection, final String
> coreNodeName,
> : ZkNodeProps props, ZkStateReader zkStateReader) {
> : @@ -138,31 +141,33 @@ class ShardLeaderElectionContextBase extends
> ElectionContext {
> : @Override
> : public void cancelElection() throws InterruptedException,
> KeeperException {
> : super.cancelElection();
> : - if (leaderZkNodeParentVersion != null) {
> : - try {
> : - // We need to be careful and make sure we *only* delete our own
> leader registration node.
> : - // We do this by using a multi and ensuring the parent znode of
> the leader registration node
> : - // matches the version we expect - there is a setData call that
> increments the parent's znode
> : - // version whenever a leader registers.
> : - log.info("Removing leader registration node on cancel: {} {}",
> leaderPath, leaderZkNodeParentVersion);
> : - List<Op> ops = new ArrayList<>(2);
> : - ops.add(Op.check(new Path(leaderPath).getParent().toString(),
> leaderZkNodeParentVersion));
> : - ops.add(Op.delete(leaderPath, -1));
> : - zkClient.multi(ops, true);
> : - } catch (KeeperException.NoNodeException nne) {
> : - // no problem
> : - log.info("No leader registration node found to remove: {}",
> leaderPath);
> : - } catch (KeeperException.BadVersionException bve) {
> : - log.info("Cannot remove leader registration node because the
> current registered node is not ours: {}", leaderPath);
> : - // no problem
> : - } catch (InterruptedException e) {
> : - throw e;
> : - } catch (Exception e) {
> : - SolrException.log(log, e);
> : + synchronized (lock) {
> : + if (leaderZkNodeParentVersion != null) {
> : + try {
> : + // We need to be careful and make sure we *only* delete our
> own leader registration node.
> : + // We do this by using a multi and ensuring the parent znode
> of the leader registration node
> : + // matches the version we expect - there is a setData call
> that increments the parent's znode
> : + // version whenever a leader registers.
> : + log.info("Removing leader registration node on cancel: {}
> {}", leaderPath, leaderZkNodeParentVersion);
> : + List<Op> ops = new ArrayList<>(2);
> : + ops.add(Op.check(new Path(leaderPath).getParent().toString(),
> leaderZkNodeParentVersion));
> : + ops.add(Op.delete(leaderPath, -1));
> : + zkClient.multi(ops, true);
> : + } catch (KeeperException.NoNodeException nne) {
> : + // no problem
> : + log.info("No leader registration node found to remove: {}",
> leaderPath);
> : + } catch (KeeperException.BadVersionException bve) {
> : + log.info("Cannot remove leader registration node because the
> current registered node is not ours: {}", leaderPath);
> : + // no problem
> : + } catch (InterruptedException e) {
> : + throw e;
> : + } catch (Exception e) {
> : + SolrException.log(log, e);
> : + }
> : + leaderZkNodeParentVersion = null;
> : + } else {
> : + log.info("No version found for ephemeral leader parent node,
> won't remove previous leader registration.");
> : }
> : - leaderZkNodeParentVersion = null;
> : - } else {
> : - log.info("No version found for ephemeral leader parent node,
> won't remove previous leader registration.");
> : }
> : }
> :
> : @@ -179,30 +184,31 @@ class ShardLeaderElectionContextBase extends
> ElectionContext {
> :
> : @Override
> : public void execute() throws InterruptedException,
> KeeperException {
> : - log.info("Creating leader registration node {} after winning
> as {}", leaderPath, leaderSeqPath);
> : - List<Op> ops = new ArrayList<>(2);
> : -
> : - // We use a multi operation to get the parent nodes version,
> which will
> : - // be used to make sure we only remove our own leader
> registration node.
> : - // The setData call used to get the parent version is also
> the trigger to
> : - // increment the version. We also do a sanity check that our
> leaderSeqPath exists.
> : -
> : - ops.add(Op.check(leaderSeqPath, -1));
> : - ops.add(Op.create(leaderPath, Utils.toJSON(leaderProps),
> zkClient.getZkACLProvider().getACLsToAdd(leaderPath),
> CreateMode.EPHEMERAL));
> : - ops.add(Op.setData(parent, null, -1));
> : - List<OpResult> results;
> : -
> : - results = zkClient.multi(ops, true);
> : -
> : - for (OpResult result : results) {
> : - if (result.getType() == ZooDefs.OpCode.setData) {
> : - SetDataResult dresult = (SetDataResult) result;
> : - Stat stat = dresult.getStat();
> : - leaderZkNodeParentVersion = stat.getVersion();
> : - return;
> : + synchronized (lock) {
> : + log.info("Creating leader registration node {} after
> winning as {}", leaderPath, leaderSeqPath);
> : + List<Op> ops = new ArrayList<>(2);
> : +
> : + // We use a multi operation to get the parent nodes
> version, which will
> : + // be used to make sure we only remove our own leader
> registration node.
> : + // The setData call used to get the parent version is also
> the trigger to
> : + // increment the version. We also do a sanity check that
> our leaderSeqPath exists.
> : +
> : + ops.add(Op.check(leaderSeqPath, -1));
> : + ops.add(Op.create(leaderPath, Utils.toJSON(leaderProps),
> zkClient.getZkACLProvider().getACLsToAdd(leaderPath),
> CreateMode.EPHEMERAL));
> : + ops.add(Op.setData(parent, null, -1));
> : + List<OpResult> results;
> : +
> : + results = zkClient.multi(ops, true);
> : + for (OpResult result : results) {
> : + if (result.getType() == ZooDefs.OpCode.setData) {
> : + SetDataResult dresult = (SetDataResult) result;
> : + Stat stat = dresult.getStat();
> : + leaderZkNodeParentVersion = stat.getVersion();
> : + return;
> : + }
> : }
> : + assert leaderZkNodeParentVersion != null;
> : }
> : - assert leaderZkNodeParentVersion != null;
> : }
> : });
> : } catch (Throwable t) {
> : @@ -225,7 +231,13 @@ class ShardLeaderElectionContextBase extends
> ElectionContext {
> :
> : public LeaderElector getLeaderElector() {
> : return leaderElector;
> : - }
> : + }
> : +
> : + Integer getLeaderZkNodeParentVersion() {
> : + synchronized (lock) {
> : + return leaderZkNodeParentVersion;
> : + }
> : + }
> : }
> :
> : // add core container and stop passing core around...
> :
> :
> http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/efb7bb17/solr/core/src/java/org/apache/solr/cloud/ZkController.java
> : ----------------------------------------------------------------------
> : diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
> b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
> : index 4c826a7..aba2e59 100644
> : --- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
> : +++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
> : @@ -2120,7 +2120,7 @@ public final class ZkController {
> : // we use this version and multi to ensure *only* the current zk
> registered leader
> : // for a shard can put a replica into LIR
> :
> : - Integer leaderZkNodeParentVersion =
> ((ShardLeaderElectionContextBase)context).leaderZkNodeParentVersion;
> : + Integer leaderZkNodeParentVersion =
> ((ShardLeaderElectionContextBase)context).getLeaderZkNodeParentVersion();
> :
> : // TODO: should we do this optimistically to avoid races?
> : if (zkClient.exists(znodePath, retryOnConnLoss)) {
> :
> :
> http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/efb7bb17/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
> : ----------------------------------------------------------------------
> : diff --git a/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
> b/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
> : index 8ac0512..ea82cbf 100644
> : --- a/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
> : +++ b/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
> : @@ -121,6 +121,13 @@ public class OverseerTest extends SolrTestCaseJ4 {
> : }
> :
> : public void close() {
> : + for (ElectionContext ec : electionContext.values()) {
> : + try {
> : + ec.cancelElection();
> : + } catch (Exception e) {
> : + log.warn(String.format("Error cancelling election for %s",
> ec.id), e);
> : + }
> : + }
> : deleteNode(ZkStateReader.LIVE_NODES_ZKNODE + "/" + nodeName);
> : zkClient.close();
> : }
> :
> :
>
> -Hoss
> http://www.lucidworks.com/
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
> For additional commands, e-mail: dev-help@lucene.apache.org
>
>
Re: lucene-solr git commit: SOLR-8697: Add synchronization around
registering as leader and canceling.
Posted by Chris Hostetter <ho...@fucit.org>.
this breaks precommit...
[forbidden-apis] Forbidden method invocation:
java.lang.String#format(java.lang.String,java.lang.Object[]) [Uses default
locale]
[forbidden-apis] in org.apache.solr.cloud.OverseerTest$MockZKController
(OverseerTest.java:128)
[forbidden-apis] Scanned 3181 (and 1946 related) class file(s) for
forbidden API invocations (in 1.27s), 1 error(s).
: Date: Fri, 26 Feb 2016 17:32:25 +0000 (UTC)
: From: markrmiller@apache.org
: Reply-To: dev@lucene.apache.org
: To: commits@lucene.apache.org
: Subject: lucene-solr git commit: SOLR-8697: Add synchronization around
: registering as leader and canceling.
:
: Repository: lucene-solr
: Updated Branches:
: refs/heads/master 0ed625b10 -> efb7bb171
:
:
: SOLR-8697: Add synchronization around registering as leader and canceling.
:
:
: Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
: Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/efb7bb17
: Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/efb7bb17
: Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/efb7bb17
:
: Branch: refs/heads/master
: Commit: efb7bb171b22a3c6a00d30eefe935a0024df0c71
: Parents: 0ed625b
: Author: markrmiller <ma...@apache.org>
: Authored: Fri Feb 26 12:32:12 2016 -0500
: Committer: markrmiller <ma...@apache.org>
: Committed: Fri Feb 26 12:32:12 2016 -0500
:
: ----------------------------------------------------------------------
: .../org/apache/solr/cloud/ElectionContext.java | 110 ++++++++++---------
: .../org/apache/solr/cloud/ZkController.java | 2 +-
: .../org/apache/solr/cloud/OverseerTest.java | 7 ++
: 3 files changed, 69 insertions(+), 50 deletions(-)
: ----------------------------------------------------------------------
:
:
: http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/efb7bb17/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
: ----------------------------------------------------------------------
: diff --git a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
: index da4b0c6..6743436 100644
: --- a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
: +++ b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
: @@ -110,8 +110,11 @@ class ShardLeaderElectionContextBase extends ElectionContext {
: protected String shardId;
: protected String collection;
: protected LeaderElector leaderElector;
: - protected volatile Integer leaderZkNodeParentVersion;
: -
: + private Integer leaderZkNodeParentVersion;
: +
: + // Prevents a race between cancelling and becoming leader.
: + private final Object lock = new Object();
: +
: public ShardLeaderElectionContextBase(LeaderElector leaderElector,
: final String shardId, final String collection, final String coreNodeName,
: ZkNodeProps props, ZkStateReader zkStateReader) {
: @@ -138,31 +141,33 @@ class ShardLeaderElectionContextBase extends ElectionContext {
: @Override
: public void cancelElection() throws InterruptedException, KeeperException {
: super.cancelElection();
: - if (leaderZkNodeParentVersion != null) {
: - try {
: - // We need to be careful and make sure we *only* delete our own leader registration node.
: - // We do this by using a multi and ensuring the parent znode of the leader registration node
: - // matches the version we expect - there is a setData call that increments the parent's znode
: - // version whenever a leader registers.
: - log.info("Removing leader registration node on cancel: {} {}", leaderPath, leaderZkNodeParentVersion);
: - List<Op> ops = new ArrayList<>(2);
: - ops.add(Op.check(new Path(leaderPath).getParent().toString(), leaderZkNodeParentVersion));
: - ops.add(Op.delete(leaderPath, -1));
: - zkClient.multi(ops, true);
: - } catch (KeeperException.NoNodeException nne) {
: - // no problem
: - log.info("No leader registration node found to remove: {}", leaderPath);
: - } catch (KeeperException.BadVersionException bve) {
: - log.info("Cannot remove leader registration node because the current registered node is not ours: {}", leaderPath);
: - // no problem
: - } catch (InterruptedException e) {
: - throw e;
: - } catch (Exception e) {
: - SolrException.log(log, e);
: + synchronized (lock) {
: + if (leaderZkNodeParentVersion != null) {
: + try {
: + // We need to be careful and make sure we *only* delete our own leader registration node.
: + // We do this by using a multi and ensuring the parent znode of the leader registration node
: + // matches the version we expect - there is a setData call that increments the parent's znode
: + // version whenever a leader registers.
: + log.info("Removing leader registration node on cancel: {} {}", leaderPath, leaderZkNodeParentVersion);
: + List<Op> ops = new ArrayList<>(2);
: + ops.add(Op.check(new Path(leaderPath).getParent().toString(), leaderZkNodeParentVersion));
: + ops.add(Op.delete(leaderPath, -1));
: + zkClient.multi(ops, true);
: + } catch (KeeperException.NoNodeException nne) {
: + // no problem
: + log.info("No leader registration node found to remove: {}", leaderPath);
: + } catch (KeeperException.BadVersionException bve) {
: + log.info("Cannot remove leader registration node because the current registered node is not ours: {}", leaderPath);
: + // no problem
: + } catch (InterruptedException e) {
: + throw e;
: + } catch (Exception e) {
: + SolrException.log(log, e);
: + }
: + leaderZkNodeParentVersion = null;
: + } else {
: + log.info("No version found for ephemeral leader parent node, won't remove previous leader registration.");
: }
: - leaderZkNodeParentVersion = null;
: - } else {
: - log.info("No version found for ephemeral leader parent node, won't remove previous leader registration.");
: }
: }
:
: @@ -179,30 +184,31 @@ class ShardLeaderElectionContextBase extends ElectionContext {
:
: @Override
: public void execute() throws InterruptedException, KeeperException {
: - log.info("Creating leader registration node {} after winning as {}", leaderPath, leaderSeqPath);
: - List<Op> ops = new ArrayList<>(2);
: -
: - // We use a multi operation to get the parent nodes version, which will
: - // be used to make sure we only remove our own leader registration node.
: - // The setData call used to get the parent version is also the trigger to
: - // increment the version. We also do a sanity check that our leaderSeqPath exists.
: -
: - ops.add(Op.check(leaderSeqPath, -1));
: - ops.add(Op.create(leaderPath, Utils.toJSON(leaderProps), zkClient.getZkACLProvider().getACLsToAdd(leaderPath), CreateMode.EPHEMERAL));
: - ops.add(Op.setData(parent, null, -1));
: - List<OpResult> results;
: -
: - results = zkClient.multi(ops, true);
: -
: - for (OpResult result : results) {
: - if (result.getType() == ZooDefs.OpCode.setData) {
: - SetDataResult dresult = (SetDataResult) result;
: - Stat stat = dresult.getStat();
: - leaderZkNodeParentVersion = stat.getVersion();
: - return;
: + synchronized (lock) {
: + log.info("Creating leader registration node {} after winning as {}", leaderPath, leaderSeqPath);
: + List<Op> ops = new ArrayList<>(2);
: +
: + // We use a multi operation to get the parent nodes version, which will
: + // be used to make sure we only remove our own leader registration node.
: + // The setData call used to get the parent version is also the trigger to
: + // increment the version. We also do a sanity check that our leaderSeqPath exists.
: +
: + ops.add(Op.check(leaderSeqPath, -1));
: + ops.add(Op.create(leaderPath, Utils.toJSON(leaderProps), zkClient.getZkACLProvider().getACLsToAdd(leaderPath), CreateMode.EPHEMERAL));
: + ops.add(Op.setData(parent, null, -1));
: + List<OpResult> results;
: +
: + results = zkClient.multi(ops, true);
: + for (OpResult result : results) {
: + if (result.getType() == ZooDefs.OpCode.setData) {
: + SetDataResult dresult = (SetDataResult) result;
: + Stat stat = dresult.getStat();
: + leaderZkNodeParentVersion = stat.getVersion();
: + return;
: + }
: }
: + assert leaderZkNodeParentVersion != null;
: }
: - assert leaderZkNodeParentVersion != null;
: }
: });
: } catch (Throwable t) {
: @@ -225,7 +231,13 @@ class ShardLeaderElectionContextBase extends ElectionContext {
:
: public LeaderElector getLeaderElector() {
: return leaderElector;
: - }
: + }
: +
: + Integer getLeaderZkNodeParentVersion() {
: + synchronized (lock) {
: + return leaderZkNodeParentVersion;
: + }
: + }
: }
:
: // add core container and stop passing core around...
:
: http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/efb7bb17/solr/core/src/java/org/apache/solr/cloud/ZkController.java
: ----------------------------------------------------------------------
: diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
: index 4c826a7..aba2e59 100644
: --- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
: +++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
: @@ -2120,7 +2120,7 @@ public final class ZkController {
: // we use this version and multi to ensure *only* the current zk registered leader
: // for a shard can put a replica into LIR
:
: - Integer leaderZkNodeParentVersion = ((ShardLeaderElectionContextBase)context).leaderZkNodeParentVersion;
: + Integer leaderZkNodeParentVersion = ((ShardLeaderElectionContextBase)context).getLeaderZkNodeParentVersion();
:
: // TODO: should we do this optimistically to avoid races?
: if (zkClient.exists(znodePath, retryOnConnLoss)) {
:
: http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/efb7bb17/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
: ----------------------------------------------------------------------
: diff --git a/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java b/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
: index 8ac0512..ea82cbf 100644
: --- a/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
: +++ b/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
: @@ -121,6 +121,13 @@ public class OverseerTest extends SolrTestCaseJ4 {
: }
:
: public void close() {
: + for (ElectionContext ec : electionContext.values()) {
: + try {
: + ec.cancelElection();
: + } catch (Exception e) {
: + log.warn(String.format("Error cancelling election for %s", ec.id), e);
: + }
: + }
: deleteNode(ZkStateReader.LIVE_NODES_ZKNODE + "/" + nodeName);
: zkClient.close();
: }
:
:
-Hoss
http://www.lucidworks.com/
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org