You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2016/02/26 18:32:25 UTC

lucene-solr git commit: SOLR-8697: Add synchronization around registering as leader and canceling.

Repository: lucene-solr
Updated Branches:
  refs/heads/master 0ed625b10 -> efb7bb171


SOLR-8697: Add synchronization around registering as leader and canceling.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/efb7bb17
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/efb7bb17
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/efb7bb17

Branch: refs/heads/master
Commit: efb7bb171b22a3c6a00d30eefe935a0024df0c71
Parents: 0ed625b
Author: markrmiller <ma...@apache.org>
Authored: Fri Feb 26 12:32:12 2016 -0500
Committer: markrmiller <ma...@apache.org>
Committed: Fri Feb 26 12:32:12 2016 -0500

----------------------------------------------------------------------
 .../org/apache/solr/cloud/ElectionContext.java  | 110 ++++++++++---------
 .../org/apache/solr/cloud/ZkController.java     |   2 +-
 .../org/apache/solr/cloud/OverseerTest.java     |   7 ++
 3 files changed, 69 insertions(+), 50 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/efb7bb17/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
index da4b0c6..6743436 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
@@ -110,8 +110,11 @@ class ShardLeaderElectionContextBase extends ElectionContext {
   protected String shardId;
   protected String collection;
   protected LeaderElector leaderElector;
-  protected volatile Integer leaderZkNodeParentVersion;
-  
+  private Integer leaderZkNodeParentVersion;
+
+  // Prevents a race between cancelling and becoming leader.
+  private final Object lock = new Object();
+
   public ShardLeaderElectionContextBase(LeaderElector leaderElector,
       final String shardId, final String collection, final String coreNodeName,
       ZkNodeProps props, ZkStateReader zkStateReader) {
@@ -138,31 +141,33 @@ class ShardLeaderElectionContextBase extends ElectionContext {
   @Override
   public void cancelElection() throws InterruptedException, KeeperException {
     super.cancelElection();
-    if (leaderZkNodeParentVersion != null) {
-      try {
-        // We need to be careful and make sure we *only* delete our own leader registration node.
-        // We do this by using a multi and ensuring the parent znode of the leader registration node
-        // matches the version we expect - there is a setData call that increments the parent's znode
-        // version whenever a leader registers.
-        log.info("Removing leader registration node on cancel: {} {}", leaderPath, leaderZkNodeParentVersion);
-        List<Op> ops = new ArrayList<>(2);
-        ops.add(Op.check(new Path(leaderPath).getParent().toString(), leaderZkNodeParentVersion));
-        ops.add(Op.delete(leaderPath, -1));
-        zkClient.multi(ops, true);
-      } catch (KeeperException.NoNodeException nne) {
-        // no problem
-        log.info("No leader registration node found to remove: {}", leaderPath);
-      } catch (KeeperException.BadVersionException bve) {
-        log.info("Cannot remove leader registration node because the current registered node is not ours: {}", leaderPath);
-        // no problem
-      } catch (InterruptedException e) {
-        throw e;
-      } catch (Exception e) {
-        SolrException.log(log, e);
+    synchronized (lock) {
+      if (leaderZkNodeParentVersion != null) {
+        try {
+          // We need to be careful and make sure we *only* delete our own leader registration node.
+          // We do this by using a multi and ensuring the parent znode of the leader registration node
+          // matches the version we expect - there is a setData call that increments the parent's znode
+          // version whenever a leader registers.
+          log.info("Removing leader registration node on cancel: {} {}", leaderPath, leaderZkNodeParentVersion);
+          List<Op> ops = new ArrayList<>(2);
+          ops.add(Op.check(new Path(leaderPath).getParent().toString(), leaderZkNodeParentVersion));
+          ops.add(Op.delete(leaderPath, -1));
+          zkClient.multi(ops, true);
+        } catch (KeeperException.NoNodeException nne) {
+          // no problem
+          log.info("No leader registration node found to remove: {}", leaderPath);
+        } catch (KeeperException.BadVersionException bve) {
+          log.info("Cannot remove leader registration node because the current registered node is not ours: {}", leaderPath);
+          // no problem
+        } catch (InterruptedException e) {
+          throw e;
+        } catch (Exception e) {
+          SolrException.log(log, e);
+        }
+        leaderZkNodeParentVersion = null;
+      } else {
+        log.info("No version found for ephemeral leader parent node, won't remove previous leader registration.");
       }
-      leaderZkNodeParentVersion = null;
-    } else {
-      log.info("No version found for ephemeral leader parent node, won't remove previous leader registration.");
     }
   }
   
@@ -179,30 +184,31 @@ class ShardLeaderElectionContextBase extends ElectionContext {
         
         @Override
         public void execute() throws InterruptedException, KeeperException {
-          log.info("Creating leader registration node {} after winning as {}", leaderPath, leaderSeqPath);
-          List<Op> ops = new ArrayList<>(2);
-          
-          // We use a multi operation to get the parent nodes version, which will
-          // be used to make sure we only remove our own leader registration node.
-          // The setData call used to get the parent version is also the trigger to
-          // increment the version. We also do a sanity check that our leaderSeqPath exists.
-          
-          ops.add(Op.check(leaderSeqPath, -1));
-          ops.add(Op.create(leaderPath, Utils.toJSON(leaderProps), zkClient.getZkACLProvider().getACLsToAdd(leaderPath), CreateMode.EPHEMERAL));
-          ops.add(Op.setData(parent, null, -1));
-          List<OpResult> results;
-          
-          results = zkClient.multi(ops, true);
-          
-          for (OpResult result : results) {
-            if (result.getType() == ZooDefs.OpCode.setData) {
-              SetDataResult dresult = (SetDataResult) result;
-              Stat stat = dresult.getStat();
-              leaderZkNodeParentVersion = stat.getVersion();
-              return;
+          synchronized (lock) {
+            log.info("Creating leader registration node {} after winning as {}", leaderPath, leaderSeqPath);
+            List<Op> ops = new ArrayList<>(2);
+
+            // We use a multi operation to get the parent nodes version, which will
+            // be used to make sure we only remove our own leader registration node.
+            // The setData call used to get the parent version is also the trigger to
+            // increment the version. We also do a sanity check that our leaderSeqPath exists.
+
+            ops.add(Op.check(leaderSeqPath, -1));
+            ops.add(Op.create(leaderPath, Utils.toJSON(leaderProps), zkClient.getZkACLProvider().getACLsToAdd(leaderPath), CreateMode.EPHEMERAL));
+            ops.add(Op.setData(parent, null, -1));
+            List<OpResult> results;
+
+            results = zkClient.multi(ops, true);
+            for (OpResult result : results) {
+              if (result.getType() == ZooDefs.OpCode.setData) {
+                SetDataResult dresult = (SetDataResult) result;
+                Stat stat = dresult.getStat();
+                leaderZkNodeParentVersion = stat.getVersion();
+                return;
+              }
             }
+            assert leaderZkNodeParentVersion != null;
           }
-          assert leaderZkNodeParentVersion != null;
         }
       });
     } catch (Throwable t) {
@@ -225,7 +231,13 @@ class ShardLeaderElectionContextBase extends ElectionContext {
 
   public LeaderElector getLeaderElector() {
     return leaderElector;
-  }  
+  }
+
+  Integer getLeaderZkNodeParentVersion() {
+    synchronized (lock) {
+      return leaderZkNodeParentVersion;
+    }
+  }
 }
 
 // add core container and stop passing core around...

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/efb7bb17/solr/core/src/java/org/apache/solr/cloud/ZkController.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
index 4c826a7..aba2e59 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
@@ -2120,7 +2120,7 @@ public final class ZkController {
     // we use this version and multi to ensure *only* the current zk registered leader
     // for a shard can put a replica into LIR
     
-    Integer leaderZkNodeParentVersion = ((ShardLeaderElectionContextBase)context).leaderZkNodeParentVersion;
+    Integer leaderZkNodeParentVersion = ((ShardLeaderElectionContextBase)context).getLeaderZkNodeParentVersion();
     
     // TODO: should we do this optimistically to avoid races?
     if (zkClient.exists(znodePath, retryOnConnLoss)) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/efb7bb17/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java b/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
index 8ac0512..ea82cbf 100644
--- a/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
@@ -121,6 +121,13 @@ public class OverseerTest extends SolrTestCaseJ4 {
     }
 
     public void close() {
+      for (ElectionContext ec : electionContext.values()) {
+        try {
+          ec.cancelElection();
+        } catch (Exception e) {
+          log.warn(String.format("Error cancelling election for %s", ec.id), e);
+        }
+      }
       deleteNode(ZkStateReader.LIVE_NODES_ZKNODE + "/" + nodeName);
       zkClient.close();
     }


Re: lucene-solr git commit: SOLR-8697: Add synchronization around registering as leader and canceling.

Posted by Mark Miller <ma...@gmail.com>.
Thanks Shalin!

- Mark

On Sat, Feb 27, 2016 at 2:08 AM Shalin Shekhar Mangar <
shalinmangar@gmail.com> wrote:

> I pushed a fix.
>
> On Sat, Feb 27, 2016 at 9:25 AM, Scott Blum <dr...@gmail.com> wrote:
> > That's annoying, since there's no semantic difference between that and a
> > log.warn(fmt, arg, arg).  Except that this particular logging API doesn't
> > have an overload that takes a throwable, a format string, and a set of
> args.
> >
> > On Fri, Feb 26, 2016 at 8:02 PM, Chris Hostetter <
> hossman_lucene@fucit.org>
> > wrote:
> >>
> >>
> >> this breaks precommit...
> >>
> >>
> >> [forbidden-apis] Forbidden method invocation:
> >> java.lang.String#format(java.lang.String,java.lang.Object[]) [Uses
> default
> >> locale]
> >> [forbidden-apis]   in
> org.apache.solr.cloud.OverseerTest$MockZKController
> >> (OverseerTest.java:128)
> >> [forbidden-apis] Scanned 3181 (and 1946 related) class file(s) for
> >> forbidden API invocations (in 1.27s), 1 error(s).
> >>
> >>
> >>
> >>
> >> : Date: Fri, 26 Feb 2016 17:32:25 +0000 (UTC)
> >> : From: markrmiller@apache.org
> >> : Reply-To: dev@lucene.apache.org
> >> : To: commits@lucene.apache.org
> >> : Subject: lucene-solr git commit: SOLR-8697: Add synchronization around
> >> :     registering as leader and canceling.
> >> :
> >> : Repository: lucene-solr
> >> : Updated Branches:
> >> :   refs/heads/master 0ed625b10 -> efb7bb171
> >> :
> >> :
> >> : SOLR-8697: Add synchronization around registering as leader and
> >> canceling.
> >> :
> >> :
> >> : Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
> >> : Commit:
> >> http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/efb7bb17
> >> : Tree:
> http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/efb7bb17
> >> : Diff:
> http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/efb7bb17
> >> :
> >> : Branch: refs/heads/master
> >> : Commit: efb7bb171b22a3c6a00d30eefe935a0024df0c71
> >> : Parents: 0ed625b
> >> : Author: markrmiller <ma...@apache.org>
> >> : Authored: Fri Feb 26 12:32:12 2016 -0500
> >> : Committer: markrmiller <ma...@apache.org>
> >> : Committed: Fri Feb 26 12:32:12 2016 -0500
> >> :
> >> : ----------------------------------------------------------------------
> >> :  .../org/apache/solr/cloud/ElectionContext.java  | 110
> >> ++++++++++---------
> >> :  .../org/apache/solr/cloud/ZkController.java     |   2 +-
> >> :  .../org/apache/solr/cloud/OverseerTest.java     |   7 ++
> >> :  3 files changed, 69 insertions(+), 50 deletions(-)
> >> : ----------------------------------------------------------------------
> >> :
> >> :
> >> :
> >>
> http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/efb7bb17/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
> >> : ----------------------------------------------------------------------
> >> : diff --git
> >> a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
> >> b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
> >> : index da4b0c6..6743436 100644
> >> : --- a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
> >> : +++ b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
> >> : @@ -110,8 +110,11 @@ class ShardLeaderElectionContextBase extends
> >> ElectionContext {
> >> :    protected String shardId;
> >> :    protected String collection;
> >> :    protected LeaderElector leaderElector;
> >> : -  protected volatile Integer leaderZkNodeParentVersion;
> >> : -
> >> : +  private Integer leaderZkNodeParentVersion;
> >> : +
> >> : +  // Prevents a race between cancelling and becoming leader.
> >> : +  private final Object lock = new Object();
> >> : +
> >> :    public ShardLeaderElectionContextBase(LeaderElector leaderElector,
> >> :        final String shardId, final String collection, final String
> >> coreNodeName,
> >> :        ZkNodeProps props, ZkStateReader zkStateReader) {
> >> : @@ -138,31 +141,33 @@ class ShardLeaderElectionContextBase extends
> >> ElectionContext {
> >> :    @Override
> >> :    public void cancelElection() throws InterruptedException,
> >> KeeperException {
> >> :      super.cancelElection();
> >> : -    if (leaderZkNodeParentVersion != null) {
> >> : -      try {
> >> : -        // We need to be careful and make sure we *only* delete our
> own
> >> leader registration node.
> >> : -        // We do this by using a multi and ensuring the parent znode
> of
> >> the leader registration node
> >> : -        // matches the version we expect - there is a setData call
> that
> >> increments the parent's znode
> >> : -        // version whenever a leader registers.
> >> : -        log.info("Removing leader registration node on cancel: {}
> {}",
> >> leaderPath, leaderZkNodeParentVersion);
> >> : -        List<Op> ops = new ArrayList<>(2);
> >> : -        ops.add(Op.check(new Path(leaderPath).getParent().toString(),
> >> leaderZkNodeParentVersion));
> >> : -        ops.add(Op.delete(leaderPath, -1));
> >> : -        zkClient.multi(ops, true);
> >> : -      } catch (KeeperException.NoNodeException nne) {
> >> : -        // no problem
> >> : -        log.info("No leader registration node found to remove: {}",
> >> leaderPath);
> >> : -      } catch (KeeperException.BadVersionException bve) {
> >> : -        log.info("Cannot remove leader registration node because the
> >> current registered node is not ours: {}", leaderPath);
> >> : -        // no problem
> >> : -      } catch (InterruptedException e) {
> >> : -        throw e;
> >> : -      } catch (Exception e) {
> >> : -        SolrException.log(log, e);
> >> : +    synchronized (lock) {
> >> : +      if (leaderZkNodeParentVersion != null) {
> >> : +        try {
> >> : +          // We need to be careful and make sure we *only* delete our
> >> own leader registration node.
> >> : +          // We do this by using a multi and ensuring the parent
> znode
> >> of the leader registration node
> >> : +          // matches the version we expect - there is a setData call
> >> that increments the parent's znode
> >> : +          // version whenever a leader registers.
> >> : +          log.info("Removing leader registration node on cancel: {}
> >> {}", leaderPath, leaderZkNodeParentVersion);
> >> : +          List<Op> ops = new ArrayList<>(2);
> >> : +          ops.add(Op.check(new
> Path(leaderPath).getParent().toString(),
> >> leaderZkNodeParentVersion));
> >> : +          ops.add(Op.delete(leaderPath, -1));
> >> : +          zkClient.multi(ops, true);
> >> : +        } catch (KeeperException.NoNodeException nne) {
> >> : +          // no problem
> >> : +          log.info("No leader registration node found to remove:
> {}",
> >> leaderPath);
> >> : +        } catch (KeeperException.BadVersionException bve) {
> >> : +          log.info("Cannot remove leader registration node because
> the
> >> current registered node is not ours: {}", leaderPath);
> >> : +          // no problem
> >> : +        } catch (InterruptedException e) {
> >> : +          throw e;
> >> : +        } catch (Exception e) {
> >> : +          SolrException.log(log, e);
> >> : +        }
> >> : +        leaderZkNodeParentVersion = null;
> >> : +      } else {
> >> : +        log.info("No version found for ephemeral leader parent node,
> >> won't remove previous leader registration.");
> >> :        }
> >> : -      leaderZkNodeParentVersion = null;
> >> : -    } else {
> >> : -      log.info("No version found for ephemeral leader parent node,
> >> won't remove previous leader registration.");
> >> :      }
> >> :    }
> >> :
> >> : @@ -179,30 +184,31 @@ class ShardLeaderElectionContextBase extends
> >> ElectionContext {
> >> :
> >> :          @Override
> >> :          public void execute() throws InterruptedException,
> >> KeeperException {
> >> : -          log.info("Creating leader registration node {} after
> winning
> >> as {}", leaderPath, leaderSeqPath);
> >> : -          List<Op> ops = new ArrayList<>(2);
> >> : -
> >> : -          // We use a multi operation to get the parent nodes
> version,
> >> which will
> >> : -          // be used to make sure we only remove our own leader
> >> registration node.
> >> : -          // The setData call used to get the parent version is also
> >> the trigger to
> >> : -          // increment the version. We also do a sanity check that
> our
> >> leaderSeqPath exists.
> >> : -
> >> : -          ops.add(Op.check(leaderSeqPath, -1));
> >> : -          ops.add(Op.create(leaderPath, Utils.toJSON(leaderProps),
> >> zkClient.getZkACLProvider().getACLsToAdd(leaderPath),
> >> CreateMode.EPHEMERAL));
> >> : -          ops.add(Op.setData(parent, null, -1));
> >> : -          List<OpResult> results;
> >> : -
> >> : -          results = zkClient.multi(ops, true);
> >> : -
> >> : -          for (OpResult result : results) {
> >> : -            if (result.getType() == ZooDefs.OpCode.setData) {
> >> : -              SetDataResult dresult = (SetDataResult) result;
> >> : -              Stat stat = dresult.getStat();
> >> : -              leaderZkNodeParentVersion = stat.getVersion();
> >> : -              return;
> >> : +          synchronized (lock) {
> >> : +            log.info("Creating leader registration node {} after
> >> winning as {}", leaderPath, leaderSeqPath);
> >> : +            List<Op> ops = new ArrayList<>(2);
> >> : +
> >> : +            // We use a multi operation to get the parent nodes
> >> version, which will
> >> : +            // be used to make sure we only remove our own leader
> >> registration node.
> >> : +            // The setData call used to get the parent version is
> also
> >> the trigger to
> >> : +            // increment the version. We also do a sanity check that
> >> our leaderSeqPath exists.
> >> : +
> >> : +            ops.add(Op.check(leaderSeqPath, -1));
> >> : +            ops.add(Op.create(leaderPath, Utils.toJSON(leaderProps),
> >> zkClient.getZkACLProvider().getACLsToAdd(leaderPath),
> >> CreateMode.EPHEMERAL));
> >> : +            ops.add(Op.setData(parent, null, -1));
> >> : +            List<OpResult> results;
> >> : +
> >> : +            results = zkClient.multi(ops, true);
> >> : +            for (OpResult result : results) {
> >> : +              if (result.getType() == ZooDefs.OpCode.setData) {
> >> : +                SetDataResult dresult = (SetDataResult) result;
> >> : +                Stat stat = dresult.getStat();
> >> : +                leaderZkNodeParentVersion = stat.getVersion();
> >> : +                return;
> >> : +              }
> >> :              }
> >> : +            assert leaderZkNodeParentVersion != null;
> >> :            }
> >> : -          assert leaderZkNodeParentVersion != null;
> >> :          }
> >> :        });
> >> :      } catch (Throwable t) {
> >> : @@ -225,7 +231,13 @@ class ShardLeaderElectionContextBase extends
> >> ElectionContext {
> >> :
> >> :    public LeaderElector getLeaderElector() {
> >> :      return leaderElector;
> >> : -  }
> >> : +  }
> >> : +
> >> : +  Integer getLeaderZkNodeParentVersion() {
> >> : +    synchronized (lock) {
> >> : +      return leaderZkNodeParentVersion;
> >> : +    }
> >> : +  }
> >> :  }
> >> :
> >> :  // add core container and stop passing core around...
> >> :
> >> :
> >>
> http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/efb7bb17/solr/core/src/java/org/apache/solr/cloud/ZkController.java
> >> : ----------------------------------------------------------------------
> >> : diff --git
> a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
> >> b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
> >> : index 4c826a7..aba2e59 100644
> >> : --- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
> >> : +++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
> >> : @@ -2120,7 +2120,7 @@ public final class ZkController {
> >> :      // we use this version and multi to ensure *only* the current zk
> >> registered leader
> >> :      // for a shard can put a replica into LIR
> >> :
> >> : -    Integer leaderZkNodeParentVersion =
> >> ((ShardLeaderElectionContextBase)context).leaderZkNodeParentVersion;
> >> : +    Integer leaderZkNodeParentVersion =
> >>
> ((ShardLeaderElectionContextBase)context).getLeaderZkNodeParentVersion();
> >> :
> >> :      // TODO: should we do this optimistically to avoid races?
> >> :      if (zkClient.exists(znodePath, retryOnConnLoss)) {
> >> :
> >> :
> >>
> http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/efb7bb17/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
> >> : ----------------------------------------------------------------------
> >> : diff --git
> a/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
> >> b/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
> >> : index 8ac0512..ea82cbf 100644
> >> : --- a/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
> >> : +++ b/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
> >> : @@ -121,6 +121,13 @@ public class OverseerTest extends SolrTestCaseJ4
> {
> >> :      }
> >> :
> >> :      public void close() {
> >> : +      for (ElectionContext ec : electionContext.values()) {
> >> : +        try {
> >> : +          ec.cancelElection();
> >> : +        } catch (Exception e) {
> >> : +          log.warn(String.format("Error cancelling election for %s",
> >> ec.id), e);
> >> : +        }
> >> : +      }
> >> :        deleteNode(ZkStateReader.LIVE_NODES_ZKNODE + "/" + nodeName);
> >> :        zkClient.close();
> >> :      }
> >> :
> >> :
> >>
> >> -Hoss
> >> http://www.lucidworks.com/
> >>
> >> ---------------------------------------------------------------------
> >> To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
> >> For additional commands, e-mail: dev-help@lucene.apache.org
> >>
> >
>
>
>
> --
> Regards,
> Shalin Shekhar Mangar.
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
> For additional commands, e-mail: dev-help@lucene.apache.org
>
> --
- Mark
about.me/markrmiller

Re: lucene-solr git commit: SOLR-8697: Add synchronization around registering as leader and canceling.

Posted by Shalin Shekhar Mangar <sh...@gmail.com>.
I pushed a fix.

On Sat, Feb 27, 2016 at 9:25 AM, Scott Blum <dr...@gmail.com> wrote:
> That's annoying, since there's no semantic difference between that and a
> log.warn(fmt, arg, arg).  Except that this particular logging API doesn't
> have an overload that takes a throwable, a format string, and a set of args.
>
> On Fri, Feb 26, 2016 at 8:02 PM, Chris Hostetter <ho...@fucit.org>
> wrote:
>>
>>
>> this breaks precommit...
>>
>>
>> [forbidden-apis] Forbidden method invocation:
>> java.lang.String#format(java.lang.String,java.lang.Object[]) [Uses default
>> locale]
>> [forbidden-apis]   in org.apache.solr.cloud.OverseerTest$MockZKController
>> (OverseerTest.java:128)
>> [forbidden-apis] Scanned 3181 (and 1946 related) class file(s) for
>> forbidden API invocations (in 1.27s), 1 error(s).
>>
>>
>>
>>
>> : Date: Fri, 26 Feb 2016 17:32:25 +0000 (UTC)
>> : From: markrmiller@apache.org
>> : Reply-To: dev@lucene.apache.org
>> : To: commits@lucene.apache.org
>> : Subject: lucene-solr git commit: SOLR-8697: Add synchronization around
>> :     registering as leader and canceling.
>> :
>> : Repository: lucene-solr
>> : Updated Branches:
>> :   refs/heads/master 0ed625b10 -> efb7bb171
>> :
>> :
>> : SOLR-8697: Add synchronization around registering as leader and
>> canceling.
>> :
>> :
>> : Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
>> : Commit:
>> http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/efb7bb17
>> : Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/efb7bb17
>> : Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/efb7bb17
>> :
>> : Branch: refs/heads/master
>> : Commit: efb7bb171b22a3c6a00d30eefe935a0024df0c71
>> : Parents: 0ed625b
>> : Author: markrmiller <ma...@apache.org>
>> : Authored: Fri Feb 26 12:32:12 2016 -0500
>> : Committer: markrmiller <ma...@apache.org>
>> : Committed: Fri Feb 26 12:32:12 2016 -0500
>> :
>> : ----------------------------------------------------------------------
>> :  .../org/apache/solr/cloud/ElectionContext.java  | 110
>> ++++++++++---------
>> :  .../org/apache/solr/cloud/ZkController.java     |   2 +-
>> :  .../org/apache/solr/cloud/OverseerTest.java     |   7 ++
>> :  3 files changed, 69 insertions(+), 50 deletions(-)
>> : ----------------------------------------------------------------------
>> :
>> :
>> :
>> http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/efb7bb17/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
>> : ----------------------------------------------------------------------
>> : diff --git
>> a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
>> b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
>> : index da4b0c6..6743436 100644
>> : --- a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
>> : +++ b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
>> : @@ -110,8 +110,11 @@ class ShardLeaderElectionContextBase extends
>> ElectionContext {
>> :    protected String shardId;
>> :    protected String collection;
>> :    protected LeaderElector leaderElector;
>> : -  protected volatile Integer leaderZkNodeParentVersion;
>> : -
>> : +  private Integer leaderZkNodeParentVersion;
>> : +
>> : +  // Prevents a race between cancelling and becoming leader.
>> : +  private final Object lock = new Object();
>> : +
>> :    public ShardLeaderElectionContextBase(LeaderElector leaderElector,
>> :        final String shardId, final String collection, final String
>> coreNodeName,
>> :        ZkNodeProps props, ZkStateReader zkStateReader) {
>> : @@ -138,31 +141,33 @@ class ShardLeaderElectionContextBase extends
>> ElectionContext {
>> :    @Override
>> :    public void cancelElection() throws InterruptedException,
>> KeeperException {
>> :      super.cancelElection();
>> : -    if (leaderZkNodeParentVersion != null) {
>> : -      try {
>> : -        // We need to be careful and make sure we *only* delete our own
>> leader registration node.
>> : -        // We do this by using a multi and ensuring the parent znode of
>> the leader registration node
>> : -        // matches the version we expect - there is a setData call that
>> increments the parent's znode
>> : -        // version whenever a leader registers.
>> : -        log.info("Removing leader registration node on cancel: {} {}",
>> leaderPath, leaderZkNodeParentVersion);
>> : -        List<Op> ops = new ArrayList<>(2);
>> : -        ops.add(Op.check(new Path(leaderPath).getParent().toString(),
>> leaderZkNodeParentVersion));
>> : -        ops.add(Op.delete(leaderPath, -1));
>> : -        zkClient.multi(ops, true);
>> : -      } catch (KeeperException.NoNodeException nne) {
>> : -        // no problem
>> : -        log.info("No leader registration node found to remove: {}",
>> leaderPath);
>> : -      } catch (KeeperException.BadVersionException bve) {
>> : -        log.info("Cannot remove leader registration node because the
>> current registered node is not ours: {}", leaderPath);
>> : -        // no problem
>> : -      } catch (InterruptedException e) {
>> : -        throw e;
>> : -      } catch (Exception e) {
>> : -        SolrException.log(log, e);
>> : +    synchronized (lock) {
>> : +      if (leaderZkNodeParentVersion != null) {
>> : +        try {
>> : +          // We need to be careful and make sure we *only* delete our
>> own leader registration node.
>> : +          // We do this by using a multi and ensuring the parent znode
>> of the leader registration node
>> : +          // matches the version we expect - there is a setData call
>> that increments the parent's znode
>> : +          // version whenever a leader registers.
>> : +          log.info("Removing leader registration node on cancel: {}
>> {}", leaderPath, leaderZkNodeParentVersion);
>> : +          List<Op> ops = new ArrayList<>(2);
>> : +          ops.add(Op.check(new Path(leaderPath).getParent().toString(),
>> leaderZkNodeParentVersion));
>> : +          ops.add(Op.delete(leaderPath, -1));
>> : +          zkClient.multi(ops, true);
>> : +        } catch (KeeperException.NoNodeException nne) {
>> : +          // no problem
>> : +          log.info("No leader registration node found to remove: {}",
>> leaderPath);
>> : +        } catch (KeeperException.BadVersionException bve) {
>> : +          log.info("Cannot remove leader registration node because the
>> current registered node is not ours: {}", leaderPath);
>> : +          // no problem
>> : +        } catch (InterruptedException e) {
>> : +          throw e;
>> : +        } catch (Exception e) {
>> : +          SolrException.log(log, e);
>> : +        }
>> : +        leaderZkNodeParentVersion = null;
>> : +      } else {
>> : +        log.info("No version found for ephemeral leader parent node,
>> won't remove previous leader registration.");
>> :        }
>> : -      leaderZkNodeParentVersion = null;
>> : -    } else {
>> : -      log.info("No version found for ephemeral leader parent node,
>> won't remove previous leader registration.");
>> :      }
>> :    }
>> :
>> : @@ -179,30 +184,31 @@ class ShardLeaderElectionContextBase extends
>> ElectionContext {
>> :
>> :          @Override
>> :          public void execute() throws InterruptedException,
>> KeeperException {
>> : -          log.info("Creating leader registration node {} after winning
>> as {}", leaderPath, leaderSeqPath);
>> : -          List<Op> ops = new ArrayList<>(2);
>> : -
>> : -          // We use a multi operation to get the parent nodes version,
>> which will
>> : -          // be used to make sure we only remove our own leader
>> registration node.
>> : -          // The setData call used to get the parent version is also
>> the trigger to
>> : -          // increment the version. We also do a sanity check that our
>> leaderSeqPath exists.
>> : -
>> : -          ops.add(Op.check(leaderSeqPath, -1));
>> : -          ops.add(Op.create(leaderPath, Utils.toJSON(leaderProps),
>> zkClient.getZkACLProvider().getACLsToAdd(leaderPath),
>> CreateMode.EPHEMERAL));
>> : -          ops.add(Op.setData(parent, null, -1));
>> : -          List<OpResult> results;
>> : -
>> : -          results = zkClient.multi(ops, true);
>> : -
>> : -          for (OpResult result : results) {
>> : -            if (result.getType() == ZooDefs.OpCode.setData) {
>> : -              SetDataResult dresult = (SetDataResult) result;
>> : -              Stat stat = dresult.getStat();
>> : -              leaderZkNodeParentVersion = stat.getVersion();
>> : -              return;
>> : +          synchronized (lock) {
>> : +            log.info("Creating leader registration node {} after
>> winning as {}", leaderPath, leaderSeqPath);
>> : +            List<Op> ops = new ArrayList<>(2);
>> : +
>> : +            // We use a multi operation to get the parent nodes
>> version, which will
>> : +            // be used to make sure we only remove our own leader
>> registration node.
>> : +            // The setData call used to get the parent version is also
>> the trigger to
>> : +            // increment the version. We also do a sanity check that
>> our leaderSeqPath exists.
>> : +
>> : +            ops.add(Op.check(leaderSeqPath, -1));
>> : +            ops.add(Op.create(leaderPath, Utils.toJSON(leaderProps),
>> zkClient.getZkACLProvider().getACLsToAdd(leaderPath),
>> CreateMode.EPHEMERAL));
>> : +            ops.add(Op.setData(parent, null, -1));
>> : +            List<OpResult> results;
>> : +
>> : +            results = zkClient.multi(ops, true);
>> : +            for (OpResult result : results) {
>> : +              if (result.getType() == ZooDefs.OpCode.setData) {
>> : +                SetDataResult dresult = (SetDataResult) result;
>> : +                Stat stat = dresult.getStat();
>> : +                leaderZkNodeParentVersion = stat.getVersion();
>> : +                return;
>> : +              }
>> :              }
>> : +            assert leaderZkNodeParentVersion != null;
>> :            }
>> : -          assert leaderZkNodeParentVersion != null;
>> :          }
>> :        });
>> :      } catch (Throwable t) {
>> : @@ -225,7 +231,13 @@ class ShardLeaderElectionContextBase extends
>> ElectionContext {
>> :
>> :    public LeaderElector getLeaderElector() {
>> :      return leaderElector;
>> : -  }
>> : +  }
>> : +
>> : +  Integer getLeaderZkNodeParentVersion() {
>> : +    synchronized (lock) {
>> : +      return leaderZkNodeParentVersion;
>> : +    }
>> : +  }
>> :  }
>> :
>> :  // add core container and stop passing core around...
>> :
>> :
>> http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/efb7bb17/solr/core/src/java/org/apache/solr/cloud/ZkController.java
>> : ----------------------------------------------------------------------
>> : diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
>> b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
>> : index 4c826a7..aba2e59 100644
>> : --- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
>> : +++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
>> : @@ -2120,7 +2120,7 @@ public final class ZkController {
>> :      // we use this version and multi to ensure *only* the current zk
>> registered leader
>> :      // for a shard can put a replica into LIR
>> :
>> : -    Integer leaderZkNodeParentVersion =
>> ((ShardLeaderElectionContextBase)context).leaderZkNodeParentVersion;
>> : +    Integer leaderZkNodeParentVersion =
>> ((ShardLeaderElectionContextBase)context).getLeaderZkNodeParentVersion();
>> :
>> :      // TODO: should we do this optimistically to avoid races?
>> :      if (zkClient.exists(znodePath, retryOnConnLoss)) {
>> :
>> :
>> http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/efb7bb17/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
>> : ----------------------------------------------------------------------
>> : diff --git a/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
>> b/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
>> : index 8ac0512..ea82cbf 100644
>> : --- a/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
>> : +++ b/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
>> : @@ -121,6 +121,13 @@ public class OverseerTest extends SolrTestCaseJ4 {
>> :      }
>> :
>> :      public void close() {
>> : +      for (ElectionContext ec : electionContext.values()) {
>> : +        try {
>> : +          ec.cancelElection();
>> : +        } catch (Exception e) {
>> : +          log.warn(String.format("Error cancelling election for %s",
>> ec.id), e);
>> : +        }
>> : +      }
>> :        deleteNode(ZkStateReader.LIVE_NODES_ZKNODE + "/" + nodeName);
>> :        zkClient.close();
>> :      }
>> :
>> :
>>
>> -Hoss
>> http://www.lucidworks.com/
>>
>> ---------------------------------------------------------------------
>> To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
>> For additional commands, e-mail: dev-help@lucene.apache.org
>>
>



-- 
Regards,
Shalin Shekhar Mangar.

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org


Re: lucene-solr git commit: SOLR-8697: Add synchronization around registering as leader and canceling.

Posted by Scott Blum <dr...@gmail.com>.
That's annoying, since there's no semantic difference between that and a
log.warn(fmt, arg, arg).  Except that this particular logging API doesn't
have an overload that takes a throwable, a format string, and a set of args.

On Fri, Feb 26, 2016 at 8:02 PM, Chris Hostetter <ho...@fucit.org>
wrote:

>
> this breaks precommit...
>
>
> [forbidden-apis] Forbidden method invocation:
> java.lang.String#format(java.lang.String,java.lang.Object[]) [Uses default
> locale]
> [forbidden-apis]   in org.apache.solr.cloud.OverseerTest$MockZKController
> (OverseerTest.java:128)
> [forbidden-apis] Scanned 3181 (and 1946 related) class file(s) for
> forbidden API invocations (in 1.27s), 1 error(s).
>
>
>
>
> : Date: Fri, 26 Feb 2016 17:32:25 +0000 (UTC)
> : From: markrmiller@apache.org
> : Reply-To: dev@lucene.apache.org
> : To: commits@lucene.apache.org
> : Subject: lucene-solr git commit: SOLR-8697: Add synchronization around
> :     registering as leader and canceling.
> :
> : Repository: lucene-solr
> : Updated Branches:
> :   refs/heads/master 0ed625b10 -> efb7bb171
> :
> :
> : SOLR-8697: Add synchronization around registering as leader and
> canceling.
> :
> :
> : Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
> : Commit:
> http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/efb7bb17
> : Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/efb7bb17
> : Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/efb7bb17
> :
> : Branch: refs/heads/master
> : Commit: efb7bb171b22a3c6a00d30eefe935a0024df0c71
> : Parents: 0ed625b
> : Author: markrmiller <ma...@apache.org>
> : Authored: Fri Feb 26 12:32:12 2016 -0500
> : Committer: markrmiller <ma...@apache.org>
> : Committed: Fri Feb 26 12:32:12 2016 -0500
> :
> : ----------------------------------------------------------------------
> :  .../org/apache/solr/cloud/ElectionContext.java  | 110
> ++++++++++---------
> :  .../org/apache/solr/cloud/ZkController.java     |   2 +-
> :  .../org/apache/solr/cloud/OverseerTest.java     |   7 ++
> :  3 files changed, 69 insertions(+), 50 deletions(-)
> : ----------------------------------------------------------------------
> :
> :
> :
> http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/efb7bb17/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
> : ----------------------------------------------------------------------
> : diff --git
> a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
> b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
> : index da4b0c6..6743436 100644
> : --- a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
> : +++ b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
> : @@ -110,8 +110,11 @@ class ShardLeaderElectionContextBase extends
> ElectionContext {
> :    protected String shardId;
> :    protected String collection;
> :    protected LeaderElector leaderElector;
> : -  protected volatile Integer leaderZkNodeParentVersion;
> : -
> : +  private Integer leaderZkNodeParentVersion;
> : +
> : +  // Prevents a race between cancelling and becoming leader.
> : +  private final Object lock = new Object();
> : +
> :    public ShardLeaderElectionContextBase(LeaderElector leaderElector,
> :        final String shardId, final String collection, final String
> coreNodeName,
> :        ZkNodeProps props, ZkStateReader zkStateReader) {
> : @@ -138,31 +141,33 @@ class ShardLeaderElectionContextBase extends
> ElectionContext {
> :    @Override
> :    public void cancelElection() throws InterruptedException,
> KeeperException {
> :      super.cancelElection();
> : -    if (leaderZkNodeParentVersion != null) {
> : -      try {
> : -        // We need to be careful and make sure we *only* delete our own
> leader registration node.
> : -        // We do this by using a multi and ensuring the parent znode of
> the leader registration node
> : -        // matches the version we expect - there is a setData call that
> increments the parent's znode
> : -        // version whenever a leader registers.
> : -        log.info("Removing leader registration node on cancel: {} {}",
> leaderPath, leaderZkNodeParentVersion);
> : -        List<Op> ops = new ArrayList<>(2);
> : -        ops.add(Op.check(new Path(leaderPath).getParent().toString(),
> leaderZkNodeParentVersion));
> : -        ops.add(Op.delete(leaderPath, -1));
> : -        zkClient.multi(ops, true);
> : -      } catch (KeeperException.NoNodeException nne) {
> : -        // no problem
> : -        log.info("No leader registration node found to remove: {}",
> leaderPath);
> : -      } catch (KeeperException.BadVersionException bve) {
> : -        log.info("Cannot remove leader registration node because the
> current registered node is not ours: {}", leaderPath);
> : -        // no problem
> : -      } catch (InterruptedException e) {
> : -        throw e;
> : -      } catch (Exception e) {
> : -        SolrException.log(log, e);
> : +    synchronized (lock) {
> : +      if (leaderZkNodeParentVersion != null) {
> : +        try {
> : +          // We need to be careful and make sure we *only* delete our
> own leader registration node.
> : +          // We do this by using a multi and ensuring the parent znode
> of the leader registration node
> : +          // matches the version we expect - there is a setData call
> that increments the parent's znode
> : +          // version whenever a leader registers.
> : +          log.info("Removing leader registration node on cancel: {}
> {}", leaderPath, leaderZkNodeParentVersion);
> : +          List<Op> ops = new ArrayList<>(2);
> : +          ops.add(Op.check(new Path(leaderPath).getParent().toString(),
> leaderZkNodeParentVersion));
> : +          ops.add(Op.delete(leaderPath, -1));
> : +          zkClient.multi(ops, true);
> : +        } catch (KeeperException.NoNodeException nne) {
> : +          // no problem
> : +          log.info("No leader registration node found to remove: {}",
> leaderPath);
> : +        } catch (KeeperException.BadVersionException bve) {
> : +          log.info("Cannot remove leader registration node because the
> current registered node is not ours: {}", leaderPath);
> : +          // no problem
> : +        } catch (InterruptedException e) {
> : +          throw e;
> : +        } catch (Exception e) {
> : +          SolrException.log(log, e);
> : +        }
> : +        leaderZkNodeParentVersion = null;
> : +      } else {
> : +        log.info("No version found for ephemeral leader parent node,
> won't remove previous leader registration.");
> :        }
> : -      leaderZkNodeParentVersion = null;
> : -    } else {
> : -      log.info("No version found for ephemeral leader parent node,
> won't remove previous leader registration.");
> :      }
> :    }
> :
> : @@ -179,30 +184,31 @@ class ShardLeaderElectionContextBase extends
> ElectionContext {
> :
> :          @Override
> :          public void execute() throws InterruptedException,
> KeeperException {
> : -          log.info("Creating leader registration node {} after winning
> as {}", leaderPath, leaderSeqPath);
> : -          List<Op> ops = new ArrayList<>(2);
> : -
> : -          // We use a multi operation to get the parent nodes version,
> which will
> : -          // be used to make sure we only remove our own leader
> registration node.
> : -          // The setData call used to get the parent version is also
> the trigger to
> : -          // increment the version. We also do a sanity check that our
> leaderSeqPath exists.
> : -
> : -          ops.add(Op.check(leaderSeqPath, -1));
> : -          ops.add(Op.create(leaderPath, Utils.toJSON(leaderProps),
> zkClient.getZkACLProvider().getACLsToAdd(leaderPath),
> CreateMode.EPHEMERAL));
> : -          ops.add(Op.setData(parent, null, -1));
> : -          List<OpResult> results;
> : -
> : -          results = zkClient.multi(ops, true);
> : -
> : -          for (OpResult result : results) {
> : -            if (result.getType() == ZooDefs.OpCode.setData) {
> : -              SetDataResult dresult = (SetDataResult) result;
> : -              Stat stat = dresult.getStat();
> : -              leaderZkNodeParentVersion = stat.getVersion();
> : -              return;
> : +          synchronized (lock) {
> : +            log.info("Creating leader registration node {} after
> winning as {}", leaderPath, leaderSeqPath);
> : +            List<Op> ops = new ArrayList<>(2);
> : +
> : +            // We use a multi operation to get the parent nodes
> version, which will
> : +            // be used to make sure we only remove our own leader
> registration node.
> : +            // The setData call used to get the parent version is also
> the trigger to
> : +            // increment the version. We also do a sanity check that
> our leaderSeqPath exists.
> : +
> : +            ops.add(Op.check(leaderSeqPath, -1));
> : +            ops.add(Op.create(leaderPath, Utils.toJSON(leaderProps),
> zkClient.getZkACLProvider().getACLsToAdd(leaderPath),
> CreateMode.EPHEMERAL));
> : +            ops.add(Op.setData(parent, null, -1));
> : +            List<OpResult> results;
> : +
> : +            results = zkClient.multi(ops, true);
> : +            for (OpResult result : results) {
> : +              if (result.getType() == ZooDefs.OpCode.setData) {
> : +                SetDataResult dresult = (SetDataResult) result;
> : +                Stat stat = dresult.getStat();
> : +                leaderZkNodeParentVersion = stat.getVersion();
> : +                return;
> : +              }
> :              }
> : +            assert leaderZkNodeParentVersion != null;
> :            }
> : -          assert leaderZkNodeParentVersion != null;
> :          }
> :        });
> :      } catch (Throwable t) {
> : @@ -225,7 +231,13 @@ class ShardLeaderElectionContextBase extends
> ElectionContext {
> :
> :    public LeaderElector getLeaderElector() {
> :      return leaderElector;
> : -  }
> : +  }
> : +
> : +  Integer getLeaderZkNodeParentVersion() {
> : +    synchronized (lock) {
> : +      return leaderZkNodeParentVersion;
> : +    }
> : +  }
> :  }
> :
> :  // add core container and stop passing core around...
> :
> :
> http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/efb7bb17/solr/core/src/java/org/apache/solr/cloud/ZkController.java
> : ----------------------------------------------------------------------
> : diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
> b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
> : index 4c826a7..aba2e59 100644
> : --- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
> : +++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
> : @@ -2120,7 +2120,7 @@ public final class ZkController {
> :      // we use this version and multi to ensure *only* the current zk
> registered leader
> :      // for a shard can put a replica into LIR
> :
> : -    Integer leaderZkNodeParentVersion =
> ((ShardLeaderElectionContextBase)context).leaderZkNodeParentVersion;
> : +    Integer leaderZkNodeParentVersion =
> ((ShardLeaderElectionContextBase)context).getLeaderZkNodeParentVersion();
> :
> :      // TODO: should we do this optimistically to avoid races?
> :      if (zkClient.exists(znodePath, retryOnConnLoss)) {
> :
> :
> http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/efb7bb17/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
> : ----------------------------------------------------------------------
> : diff --git a/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
> b/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
> : index 8ac0512..ea82cbf 100644
> : --- a/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
> : +++ b/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
> : @@ -121,6 +121,13 @@ public class OverseerTest extends SolrTestCaseJ4 {
> :      }
> :
> :      public void close() {
> : +      for (ElectionContext ec : electionContext.values()) {
> : +        try {
> : +          ec.cancelElection();
> : +        } catch (Exception e) {
> : +          log.warn(String.format("Error cancelling election for %s",
> ec.id), e);
> : +        }
> : +      }
> :        deleteNode(ZkStateReader.LIVE_NODES_ZKNODE + "/" + nodeName);
> :        zkClient.close();
> :      }
> :
> :
>
> -Hoss
> http://www.lucidworks.com/
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
> For additional commands, e-mail: dev-help@lucene.apache.org
>
>

Re: lucene-solr git commit: SOLR-8697: Add synchronization around registering as leader and canceling.

Posted by Chris Hostetter <ho...@fucit.org>.
this breaks precommit...


[forbidden-apis] Forbidden method invocation: 
java.lang.String#format(java.lang.String,java.lang.Object[]) [Uses default 
locale]
[forbidden-apis]   in org.apache.solr.cloud.OverseerTest$MockZKController 
(OverseerTest.java:128)
[forbidden-apis] Scanned 3181 (and 1946 related) class file(s) for 
forbidden API invocations (in 1.27s), 1 error(s).




: Date: Fri, 26 Feb 2016 17:32:25 +0000 (UTC)
: From: markrmiller@apache.org
: Reply-To: dev@lucene.apache.org
: To: commits@lucene.apache.org
: Subject: lucene-solr git commit: SOLR-8697: Add synchronization around
:     registering as leader and canceling.
: 
: Repository: lucene-solr
: Updated Branches:
:   refs/heads/master 0ed625b10 -> efb7bb171
: 
: 
: SOLR-8697: Add synchronization around registering as leader and canceling.
: 
: 
: Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
: Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/efb7bb17
: Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/efb7bb17
: Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/efb7bb17
: 
: Branch: refs/heads/master
: Commit: efb7bb171b22a3c6a00d30eefe935a0024df0c71
: Parents: 0ed625b
: Author: markrmiller <ma...@apache.org>
: Authored: Fri Feb 26 12:32:12 2016 -0500
: Committer: markrmiller <ma...@apache.org>
: Committed: Fri Feb 26 12:32:12 2016 -0500
: 
: ----------------------------------------------------------------------
:  .../org/apache/solr/cloud/ElectionContext.java  | 110 ++++++++++---------
:  .../org/apache/solr/cloud/ZkController.java     |   2 +-
:  .../org/apache/solr/cloud/OverseerTest.java     |   7 ++
:  3 files changed, 69 insertions(+), 50 deletions(-)
: ----------------------------------------------------------------------
: 
: 
: http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/efb7bb17/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
: ----------------------------------------------------------------------
: diff --git a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
: index da4b0c6..6743436 100644
: --- a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
: +++ b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
: @@ -110,8 +110,11 @@ class ShardLeaderElectionContextBase extends ElectionContext {
:    protected String shardId;
:    protected String collection;
:    protected LeaderElector leaderElector;
: -  protected volatile Integer leaderZkNodeParentVersion;
: -  
: +  private Integer leaderZkNodeParentVersion;
: +
: +  // Prevents a race between cancelling and becoming leader.
: +  private final Object lock = new Object();
: +
:    public ShardLeaderElectionContextBase(LeaderElector leaderElector,
:        final String shardId, final String collection, final String coreNodeName,
:        ZkNodeProps props, ZkStateReader zkStateReader) {
: @@ -138,31 +141,33 @@ class ShardLeaderElectionContextBase extends ElectionContext {
:    @Override
:    public void cancelElection() throws InterruptedException, KeeperException {
:      super.cancelElection();
: -    if (leaderZkNodeParentVersion != null) {
: -      try {
: -        // We need to be careful and make sure we *only* delete our own leader registration node.
: -        // We do this by using a multi and ensuring the parent znode of the leader registration node
: -        // matches the version we expect - there is a setData call that increments the parent's znode
: -        // version whenever a leader registers.
: -        log.info("Removing leader registration node on cancel: {} {}", leaderPath, leaderZkNodeParentVersion);
: -        List<Op> ops = new ArrayList<>(2);
: -        ops.add(Op.check(new Path(leaderPath).getParent().toString(), leaderZkNodeParentVersion));
: -        ops.add(Op.delete(leaderPath, -1));
: -        zkClient.multi(ops, true);
: -      } catch (KeeperException.NoNodeException nne) {
: -        // no problem
: -        log.info("No leader registration node found to remove: {}", leaderPath);
: -      } catch (KeeperException.BadVersionException bve) {
: -        log.info("Cannot remove leader registration node because the current registered node is not ours: {}", leaderPath);
: -        // no problem
: -      } catch (InterruptedException e) {
: -        throw e;
: -      } catch (Exception e) {
: -        SolrException.log(log, e);
: +    synchronized (lock) {
: +      if (leaderZkNodeParentVersion != null) {
: +        try {
: +          // We need to be careful and make sure we *only* delete our own leader registration node.
: +          // We do this by using a multi and ensuring the parent znode of the leader registration node
: +          // matches the version we expect - there is a setData call that increments the parent's znode
: +          // version whenever a leader registers.
: +          log.info("Removing leader registration node on cancel: {} {}", leaderPath, leaderZkNodeParentVersion);
: +          List<Op> ops = new ArrayList<>(2);
: +          ops.add(Op.check(new Path(leaderPath).getParent().toString(), leaderZkNodeParentVersion));
: +          ops.add(Op.delete(leaderPath, -1));
: +          zkClient.multi(ops, true);
: +        } catch (KeeperException.NoNodeException nne) {
: +          // no problem
: +          log.info("No leader registration node found to remove: {}", leaderPath);
: +        } catch (KeeperException.BadVersionException bve) {
: +          log.info("Cannot remove leader registration node because the current registered node is not ours: {}", leaderPath);
: +          // no problem
: +        } catch (InterruptedException e) {
: +          throw e;
: +        } catch (Exception e) {
: +          SolrException.log(log, e);
: +        }
: +        leaderZkNodeParentVersion = null;
: +      } else {
: +        log.info("No version found for ephemeral leader parent node, won't remove previous leader registration.");
:        }
: -      leaderZkNodeParentVersion = null;
: -    } else {
: -      log.info("No version found for ephemeral leader parent node, won't remove previous leader registration.");
:      }
:    }
:    
: @@ -179,30 +184,31 @@ class ShardLeaderElectionContextBase extends ElectionContext {
:          
:          @Override
:          public void execute() throws InterruptedException, KeeperException {
: -          log.info("Creating leader registration node {} after winning as {}", leaderPath, leaderSeqPath);
: -          List<Op> ops = new ArrayList<>(2);
: -          
: -          // We use a multi operation to get the parent nodes version, which will
: -          // be used to make sure we only remove our own leader registration node.
: -          // The setData call used to get the parent version is also the trigger to
: -          // increment the version. We also do a sanity check that our leaderSeqPath exists.
: -          
: -          ops.add(Op.check(leaderSeqPath, -1));
: -          ops.add(Op.create(leaderPath, Utils.toJSON(leaderProps), zkClient.getZkACLProvider().getACLsToAdd(leaderPath), CreateMode.EPHEMERAL));
: -          ops.add(Op.setData(parent, null, -1));
: -          List<OpResult> results;
: -          
: -          results = zkClient.multi(ops, true);
: -          
: -          for (OpResult result : results) {
: -            if (result.getType() == ZooDefs.OpCode.setData) {
: -              SetDataResult dresult = (SetDataResult) result;
: -              Stat stat = dresult.getStat();
: -              leaderZkNodeParentVersion = stat.getVersion();
: -              return;
: +          synchronized (lock) {
: +            log.info("Creating leader registration node {} after winning as {}", leaderPath, leaderSeqPath);
: +            List<Op> ops = new ArrayList<>(2);
: +
: +            // We use a multi operation to get the parent nodes version, which will
: +            // be used to make sure we only remove our own leader registration node.
: +            // The setData call used to get the parent version is also the trigger to
: +            // increment the version. We also do a sanity check that our leaderSeqPath exists.
: +
: +            ops.add(Op.check(leaderSeqPath, -1));
: +            ops.add(Op.create(leaderPath, Utils.toJSON(leaderProps), zkClient.getZkACLProvider().getACLsToAdd(leaderPath), CreateMode.EPHEMERAL));
: +            ops.add(Op.setData(parent, null, -1));
: +            List<OpResult> results;
: +
: +            results = zkClient.multi(ops, true);
: +            for (OpResult result : results) {
: +              if (result.getType() == ZooDefs.OpCode.setData) {
: +                SetDataResult dresult = (SetDataResult) result;
: +                Stat stat = dresult.getStat();
: +                leaderZkNodeParentVersion = stat.getVersion();
: +                return;
: +              }
:              }
: +            assert leaderZkNodeParentVersion != null;
:            }
: -          assert leaderZkNodeParentVersion != null;
:          }
:        });
:      } catch (Throwable t) {
: @@ -225,7 +231,13 @@ class ShardLeaderElectionContextBase extends ElectionContext {
:  
:    public LeaderElector getLeaderElector() {
:      return leaderElector;
: -  }  
: +  }
: +
: +  Integer getLeaderZkNodeParentVersion() {
: +    synchronized (lock) {
: +      return leaderZkNodeParentVersion;
: +    }
: +  }
:  }
:  
:  // add core container and stop passing core around...
: 
: http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/efb7bb17/solr/core/src/java/org/apache/solr/cloud/ZkController.java
: ----------------------------------------------------------------------
: diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
: index 4c826a7..aba2e59 100644
: --- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
: +++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
: @@ -2120,7 +2120,7 @@ public final class ZkController {
:      // we use this version and multi to ensure *only* the current zk registered leader
:      // for a shard can put a replica into LIR
:      
: -    Integer leaderZkNodeParentVersion = ((ShardLeaderElectionContextBase)context).leaderZkNodeParentVersion;
: +    Integer leaderZkNodeParentVersion = ((ShardLeaderElectionContextBase)context).getLeaderZkNodeParentVersion();
:      
:      // TODO: should we do this optimistically to avoid races?
:      if (zkClient.exists(znodePath, retryOnConnLoss)) {
: 
: http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/efb7bb17/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
: ----------------------------------------------------------------------
: diff --git a/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java b/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
: index 8ac0512..ea82cbf 100644
: --- a/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
: +++ b/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
: @@ -121,6 +121,13 @@ public class OverseerTest extends SolrTestCaseJ4 {
:      }
:  
:      public void close() {
: +      for (ElectionContext ec : electionContext.values()) {
: +        try {
: +          ec.cancelElection();
: +        } catch (Exception e) {
: +          log.warn(String.format("Error cancelling election for %s", ec.id), e);
: +        }
: +      }
:        deleteNode(ZkStateReader.LIVE_NODES_ZKNODE + "/" + nodeName);
:        zkClient.close();
:      }
: 
: 

-Hoss
http://www.lucidworks.com/

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org