You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2014/03/02 01:28:24 UTC

svn commit: r1573243 - in /lucene/dev/branches/branch_4x: ./ solr/ solr/core/ solr/core/src/java/org/apache/solr/cloud/ solr/core/src/test/org/apache/solr/cloud/ solr/solrj/ solr/solrj/src/java/org/apache/solr/common/util/

Author: markrmiller
Date: Sun Mar  2 00:28:23 2014
New Revision: 1573243

URL: http://svn.apache.org/r1573243
Log:
SOLR-5799: When registering as the leader, if an existing ephemeral registration exists, wait a short time to see if it goes away.

Added:
    lucene/dev/branches/branch_4x/solr/solrj/src/java/org/apache/solr/common/util/RetryUtil.java
      - copied, changed from r1573242, lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/util/RetryUtil.java
Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/solr/   (props changed)
    lucene/dev/branches/branch_4x/solr/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_4x/solr/core/   (props changed)
    lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
    lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
    lucene/dev/branches/branch_4x/solr/solrj/   (props changed)

Modified: lucene/dev/branches/branch_4x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/CHANGES.txt?rev=1573243&r1=1573242&r2=1573243&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/solr/CHANGES.txt Sun Mar  2 00:28:23 2014
@@ -83,6 +83,10 @@ Other Changes
 * SOLR-5771: Add SolrTestCaseJ4.SuppressSSL annotation to disable SSL (instead of static boolean).
   (Robert Muir)
 
+* SOLR-5799: When registering as the leader, if an existing ephemeral
+  registration exists, wait a short time to see if it goes away.
+  (Mark Miller)
+
 ==================  4.7.0 ==================
 
 Versions of Major Components

Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java?rev=1573243&r1=1573242&r2=1573243&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java Sun Mar  2 00:28:23 2014
@@ -14,6 +14,8 @@ import org.apache.solr.common.cloud.ZkCm
 import org.apache.solr.common.cloud.ZkCoreNodeProps;
 import org.apache.solr.common.cloud.ZkNodeProps;
 import org.apache.solr.common.cloud.ZkStateReader;
+import org.apache.solr.common.util.RetryUtil;
+import org.apache.solr.common.util.RetryUtil.RetryCmd;
 import org.apache.solr.core.CoreContainer;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.search.SolrIndexSearcher;
@@ -22,6 +24,7 @@ import org.apache.solr.util.RefCounted;
 import org.apache.zookeeper.CreateMode;
 import org.apache.zookeeper.KeeperException;
 import org.apache.zookeeper.KeeperException.NoNodeException;
+import org.apache.zookeeper.KeeperException.NodeExistsException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -79,24 +82,28 @@ public abstract class ElectionContext {
 }
 
 class ShardLeaderElectionContextBase extends ElectionContext {
-  private static Logger log = LoggerFactory.getLogger(ShardLeaderElectionContextBase.class);
+  private static Logger log = LoggerFactory
+      .getLogger(ShardLeaderElectionContextBase.class);
   protected final SolrZkClient zkClient;
   protected String shardId;
   protected String collection;
   protected LeaderElector leaderElector;
-
-  public ShardLeaderElectionContextBase(LeaderElector leaderElector, final String shardId,
-      final String collection, final String coreNodeName, ZkNodeProps props, ZkStateReader zkStateReader) {
-    super(coreNodeName, ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection + "/leader_elect/"
-        + shardId, ZkStateReader.getShardLeadersPath(collection, shardId),
-        props, zkStateReader.getZkClient());
+  
+  public ShardLeaderElectionContextBase(LeaderElector leaderElector,
+      final String shardId, final String collection, final String coreNodeName,
+      ZkNodeProps props, ZkStateReader zkStateReader) {
+    super(coreNodeName, ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection
+        + "/leader_elect/" + shardId, ZkStateReader.getShardLeadersPath(
+        collection, shardId), props, zkStateReader.getZkClient());
     this.leaderElector = leaderElector;
     this.zkClient = zkStateReader.getZkClient();
     this.shardId = shardId;
     this.collection = collection;
     
     try {
-      new ZkCmdExecutor(zkStateReader.getZkClient().getZkClientTimeout()).ensureExists(ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection, zkClient);
+      new ZkCmdExecutor(zkStateReader.getZkClient().getZkClientTimeout())
+          .ensureExists(ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection,
+              zkClient);
     } catch (KeeperException e) {
       throw new SolrException(ErrorCode.SERVER_ERROR, e);
     } catch (InterruptedException e) {
@@ -104,24 +111,39 @@ class ShardLeaderElectionContextBase ext
       throw new SolrException(ErrorCode.SERVER_ERROR, e);
     }
   }
-
+  
   @Override
-  void runLeaderProcess(boolean weAreReplacement, int pauseBeforeStart) throws KeeperException,
-      InterruptedException, IOException {
+  void runLeaderProcess(boolean weAreReplacement, int pauseBeforeStart)
+      throws KeeperException, InterruptedException, IOException {
+    // register as leader - if an ephemeral is already there, wait just a bit
+    // to see if it goes away
+    RetryUtil.retryOnThrowable(NodeExistsException.class, 15000, 1000,
+        new RetryCmd() {
+          
+          @Override
+          public void execute() throws InterruptedException {
+            try {
+              zkClient.makePath(leaderPath, ZkStateReader.toJSON(leaderProps),
+                  CreateMode.EPHEMERAL, true);
+            } catch (KeeperException e) {
+              throw new SolrException(
+                  ErrorCode.SERVER_ERROR,
+                  "Could not register as the leader because creating the ephemeral registration node in ZooKeeper failed", e);
+            }
+          }
+        });
     
-    zkClient.makePath(leaderPath, ZkStateReader.toJSON(leaderProps),
-        CreateMode.EPHEMERAL, true);
     assert shardId != null;
-    ZkNodeProps m = ZkNodeProps.fromKeyVals(Overseer.QUEUE_OPERATION, ZkStateReader.LEADER_PROP,
-        ZkStateReader.SHARD_ID_PROP, shardId, ZkStateReader.COLLECTION_PROP,
-        collection, ZkStateReader.BASE_URL_PROP, leaderProps.getProperties()
-            .get(ZkStateReader.BASE_URL_PROP), ZkStateReader.CORE_NAME_PROP,
+    ZkNodeProps m = ZkNodeProps.fromKeyVals(Overseer.QUEUE_OPERATION,
+        ZkStateReader.LEADER_PROP, ZkStateReader.SHARD_ID_PROP, shardId,
+        ZkStateReader.COLLECTION_PROP, collection, ZkStateReader.BASE_URL_PROP,
+        leaderProps.getProperties().get(ZkStateReader.BASE_URL_PROP),
+        ZkStateReader.CORE_NAME_PROP,
         leaderProps.getProperties().get(ZkStateReader.CORE_NAME_PROP),
         ZkStateReader.STATE_PROP, ZkStateReader.ACTIVE);
     Overseer.getInQueue(zkClient).offer(ZkStateReader.toJSON(m));
-    
   }
-
+  
 }
 
 // add core container and stop passing core around...

Modified: lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java?rev=1573243&r1=1573242&r2=1573243&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java Sun Mar  2 00:28:23 2014
@@ -564,8 +564,7 @@ public class OverseerTest extends SolrTe
       mockController.publishState("core1", "core_node1", ZkStateReader.ACTIVE,
           1);
       
-      while (version == getClusterStateVersion(controllerClient))
-        ;
+      while (version == getClusterStateVersion(controllerClient));
       
       verifyStatus(reader, ZkStateReader.ACTIVE);
       version = getClusterStateVersion(controllerClient);
@@ -593,9 +592,7 @@ public class OverseerTest extends SolrTe
       assertFalse("collection1 should be gone after publishing the null state",
           reader.getClusterState().getCollections().contains("collection1"));
     } finally {
-      
       close(mockController);
-      
       close(overseerClient);
       close(controllerClient);
       close(reader);

Copied: lucene/dev/branches/branch_4x/solr/solrj/src/java/org/apache/solr/common/util/RetryUtil.java (from r1573242, lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/util/RetryUtil.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/solrj/src/java/org/apache/solr/common/util/RetryUtil.java?p2=lucene/dev/branches/branch_4x/solr/solrj/src/java/org/apache/solr/common/util/RetryUtil.java&p1=lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/util/RetryUtil.java&r1=1573242&r2=1573243&rev=1573243&view=diff
==============================================================================
    (empty)