You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2012/10/12 20:30:59 UTC

svn commit: r1397672 - in /lucene/dev/branches/branch_4x: ./ solr/ solr/core/ solr/core/src/java/org/apache/solr/cloud/ solr/core/src/java/org/apache/solr/update/ solr/core/src/test/org/apache/solr/cloud/

Author: markrmiller
Date: Fri Oct 12 18:30:58 2012
New Revision: 1397672

URL: http://svn.apache.org/viewvc?rev=1397672&view=rev
Log:
SOLR-3939: Consider a sync attempt from leader to replica that fails due to 404 a success.
SOLR-3940: Rejoining the leader election incorrectly triggers the code path for a fresh cluster start rather than fail over.

Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/solr/   (props changed)
    lucene/dev/branches/branch_4x/solr/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_4x/solr/core/   (props changed)
    lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
    lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java
    lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/ZkController.java
    lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/update/PeerSync.java
    lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java
    lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/LeaderElectionTest.java
    lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java

Modified: lucene/dev/branches/branch_4x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/CHANGES.txt?rev=1397672&r1=1397671&r2=1397672&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/solr/CHANGES.txt Fri Oct 12 18:30:58 2012
@@ -38,9 +38,7 @@ New Features
   values of a multiValued field in their original order when highlighting.
   (Joel Bernstein via yonik)
 
-* SOLR-3929
-support configuring IndexWriter max thread count in solrconfig
-
+* SOLR-3929: Support configuring IndexWriter max thread count in solrconfig.
   (phunt via Mark Miller)
 
 Optimizations
@@ -69,6 +67,12 @@ Bug Fixes
 
 * SOLR-3917: Partial State on Schema-Browser UI is not defined for Dynamic
   Fields & Types (steffkes)
+  
+* SOLR-3939: Consider a sync attempt from leader to replica that fails due 
+  to 404 a success. (Mark Miller, Joel Bernstein)
+  
+* SOLR-3940: Rejoining the leader election incorrectly triggers the code path
+  for a fresh cluster start rather than fail over. (Mark Miller)
 
 Other Changes
 ----------------------

Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java?rev=1397672&r1=1397671&r2=1397672&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java Fri Oct 12 18:30:58 2012
@@ -324,7 +324,7 @@ final class ShardLeaderElectionContext e
       SolrException.log(log, "Error trying to start recovery", t);
     }
     
-    leaderElector.joinElection(this);
+    leaderElector.joinElection(this, true);
   }
 
   private boolean shouldIBeLeader(ZkNodeProps leaderProps, SolrCore core) {

Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java?rev=1397672&r1=1397671&r2=1397672&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java Fri Oct 12 18:30:58 2012
@@ -18,7 +18,6 @@ package org.apache.solr.cloud;
  */
 
 import java.io.IOException;
-import java.io.UnsupportedEncodingException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
@@ -43,7 +42,7 @@ import org.slf4j.LoggerFactory;
  * Leader Election process. This class contains the logic by which a
  * leader is chosen. First call * {@link #setup(ElectionContext)} to ensure
  * the election process is init'd. Next call
- * {@link #joinElection(ElectionContext)} to start the leader election.
+ * {@link #joinElection(ElectionContext, boolean)} to start the leader election.
  * 
  * The implementation follows the classic ZooKeeper recipe of creating an
  * ephemeral, sequential node for each candidate and then looking at the set
@@ -203,7 +202,7 @@ public  class LeaderElector {
    * 
    * @return sequential node number
    */
-  public int joinElection(ElectionContext context) throws KeeperException, InterruptedException, IOException {
+  public int joinElection(ElectionContext context, boolean replacement) throws KeeperException, InterruptedException, IOException {
     final String shardsElectZkPath = context.electionPath + LeaderElector.ELECTION_NODE;
     
     long sessionId = zkClient.getSolrZooKeeper().getSessionId();
@@ -259,7 +258,7 @@ public  class LeaderElector {
       }
     }
     int seq = getSeq(leaderSeqPath);
-    checkIfIamLeader(seq, context, false);
+    checkIfIamLeader(seq, context, replacement);
     
     return seq;
   }

Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/ZkController.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/ZkController.java?rev=1397672&r1=1397671&r2=1397672&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/ZkController.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/ZkController.java Fri Oct 12 18:30:58 2012
@@ -191,7 +191,7 @@ public final class ZkController {
 
               ZkController.this.overseer = new Overseer(shardHandler, adminPath, zkStateReader);
               ElectionContext context = new OverseerElectionContext(zkClient, overseer, getNodeName());
-              overseerElector.joinElection(context);
+              overseerElector.joinElection(context, true);
               zkStateReader.createClusterStateWatchersAndUpdate();
 
             //  cc.newCmdDistribExecutor();
@@ -422,7 +422,7 @@ public final class ZkController {
       this.overseer = new Overseer(shardHandler, adminPath, zkStateReader);
       ElectionContext context = new OverseerElectionContext(zkClient, overseer, getNodeName());
       overseerElector.setup(context);
-      overseerElector.joinElection(context);
+      overseerElector.joinElection(context, false);
       zkStateReader.createClusterStateWatchersAndUpdate();
       
     } catch (IOException e) {
@@ -730,7 +730,7 @@ public final class ZkController {
 
     leaderElector.setup(context);
     electionContexts.put(coreZkNodeName, context);
-    leaderElector.joinElection(context);
+    leaderElector.joinElection(context, false);
   }
 
 

Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/update/PeerSync.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/update/PeerSync.java?rev=1397672&r1=1397671&r2=1397672&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/update/PeerSync.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/update/PeerSync.java Fri Oct 12 18:30:58 2012
@@ -312,6 +312,11 @@ public class PeerSync  {
         log.warn(msg() + " got a 503 from " + srsp.getShardAddress() + ", counting as success");
         return true;
       }
+      
+      if (cantReachIsSuccess && sreq.purpose == 1 && srsp.getException() instanceof SolrException && ((SolrException) srsp.getException()).code() == 404) {
+        log.warn(msg() + " got a 404 from " + srsp.getShardAddress() + ", counting as success");
+        return true;
+      }
       // TODO: at least log???
       // srsp.getException().printStackTrace(System.out);
      

Modified: lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java?rev=1397672&r1=1397671&r2=1397672&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java Fri Oct 12 18:30:58 2012
@@ -49,6 +49,7 @@ import org.apache.solr.client.solrj.requ
 import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
 import org.apache.solr.client.solrj.request.CoreAdminRequest;
 import org.apache.solr.client.solrj.request.CoreAdminRequest.Create;
+import org.apache.solr.client.solrj.request.CoreAdminRequest.Unload;
 import org.apache.solr.client.solrj.request.QueryRequest;
 import org.apache.solr.client.solrj.response.CoreAdminResponse;
 import org.apache.solr.client.solrj.response.QueryResponse;
@@ -742,10 +743,10 @@ public class BasicDistributedZkTest exte
         0,
         ((HttpSolrServer) client).getBaseURL().length()
             - DEFAULT_COLLECTION.length() - 1);
-    createCollection(oneInstanceCollection2, collectionClients, baseUrl, 1, "slice1");
-    createCollection(oneInstanceCollection2, collectionClients, baseUrl, 2, "slice2");
-    createCollection(oneInstanceCollection2, collectionClients, baseUrl, 3, "slice2");
-    createCollection(oneInstanceCollection2, collectionClients, baseUrl, 4, "slice1");
+    createSolrCore(oneInstanceCollection2, collectionClients, baseUrl, 1, "slice1");
+    createSolrCore(oneInstanceCollection2, collectionClients, baseUrl, 2, "slice2");
+    createSolrCore(oneInstanceCollection2, collectionClients, baseUrl, 3, "slice2");
+    createSolrCore(oneInstanceCollection2, collectionClients, baseUrl, 4, "slice1");
     
    while (pending != null && pending.size() > 0) {
       
@@ -764,7 +765,7 @@ public class BasicDistributedZkTest exte
     
     assertAllActive(oneInstanceCollection2, solrj.getZkStateReader());
     
-    printLayout();
+    //printLayout();
     
    // TODO: enable when we don't falsely get slice1...
    // solrj.getZkStateReader().getLeaderUrl(oneInstanceCollection2, "slice1", 30000);
@@ -803,6 +804,27 @@ public class BasicDistributedZkTest exte
     assertNotNull(slices);
     String roles = slices.get("slice1").getReplicasMap().values().iterator().next().getStr(ZkStateReader.ROLES_PROP);
     assertEquals("none", roles);
+    
+    
+    ZkCoreNodeProps props = new ZkCoreNodeProps(solrj.getZkStateReader().getClusterState().getLeader(oneInstanceCollection2, "slice1"));
+    
+    // now test that unloading a core gets us a new leader
+    HttpSolrServer server = new HttpSolrServer(baseUrl);
+    Unload unloadCmd = new Unload(true);
+    unloadCmd.setCoreName(props.getCoreName());
+    
+    String leader = props.getCoreUrl();
+    
+    server.request(unloadCmd);
+    
+    int tries = 50;
+    while (leader.equals(zkStateReader.getLeaderUrl(oneInstanceCollection2, "slice1", 10000))) {
+      Thread.sleep(100);
+      if (tries-- == 0) {
+        fail("Leader never changed");
+      }
+    }
+
   }
 
   private void testSearchByCollectionName() throws SolrServerException {
@@ -875,10 +897,10 @@ public class BasicDistributedZkTest exte
 
   private void createCollection(String collection,
       List<SolrServer> collectionClients, String baseUrl, int num) {
-    createCollection(collection, collectionClients, baseUrl, num, null);
+    createSolrCore(collection, collectionClients, baseUrl, num, null);
   }
   
-  private void createCollection(final String collection,
+  private void createSolrCore(final String collection,
       List<SolrServer> collectionClients, final String baseUrl, final int num,
       final String shardId) {
     Callable call = new Callable() {

Modified: lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/LeaderElectionTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/LeaderElectionTest.java?rev=1397672&r1=1397671&r2=1397672&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/LeaderElectionTest.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/LeaderElectionTest.java Fri Oct 12 18:30:58 2012
@@ -40,7 +40,6 @@ import org.apache.zookeeper.KeeperExcept
 import org.apache.zookeeper.KeeperException.NoNodeException;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
-import org.junit.Ignore;
 import org.junit.Test;
 
 @Slow
@@ -114,7 +113,7 @@ public class LeaderElectionTest extends 
           elector, "shard1", "collection1", Integer.toString(nodeNumber),
           props, zkStateReader);
       elector.setup(context);
-      seq = elector.joinElection(context);
+      seq = elector.joinElection(context, false);
       electionDone = true;
       seqToThread.put(seq, this);
     }
@@ -175,7 +174,7 @@ public class LeaderElectionTest extends 
     ElectionContext context = new ShardLeaderElectionContextBase(elector,
         "shard2", "collection1", "dummynode1", props, zkStateReader);
     elector.setup(context);
-    elector.joinElection(context);
+    elector.joinElection(context, false);
     assertEquals("http://127.0.0.1/solr/",
         getLeaderUrl("collection1", "shard2"));
   }
@@ -188,7 +187,7 @@ public class LeaderElectionTest extends 
     ElectionContext firstContext = new ShardLeaderElectionContextBase(first,
         "slice1", "collection2", "dummynode1", props, zkStateReader);
     first.setup(firstContext);
-    first.joinElection(firstContext);
+    first.joinElection(firstContext, false);
 
     Thread.sleep(1000);
     assertEquals("original leader was not registered", "http://127.0.0.1/solr/1/", getLeaderUrl("collection2", "slice1"));
@@ -199,7 +198,7 @@ public class LeaderElectionTest extends 
     ElectionContext context = new ShardLeaderElectionContextBase(second,
         "slice1", "collection2", "dummynode1", props, zkStateReader);
     second.setup(context);
-    second.joinElection(context);
+    second.joinElection(context, false);
     Thread.sleep(1000);
     assertEquals("original leader should have stayed leader", "http://127.0.0.1/solr/1/", getLeaderUrl("collection2", "slice1"));
     firstContext.cancelElection();

Modified: lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java?rev=1397672&r1=1397671&r2=1397672&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java Fri Oct 12 18:30:58 2012
@@ -139,7 +139,7 @@ public class OverseerTest extends SolrTe
           ShardLeaderElectionContextBase ctx = new ShardLeaderElectionContextBase(
               elector, shardId, collection, nodeName + "_" + coreName, props,
               zkStateReader);
-          elector.joinElection(ctx);
+          elector.joinElection(ctx, false);
           return shardId;
         }
         Thread.sleep(500);
@@ -876,7 +876,7 @@ public class OverseerTest extends SolrTe
         new HttpShardHandlerFactory().getShardHandler(), "/admin/cores", reader);
     ElectionContext ec = new OverseerElectionContext(zkClient, overseer, address.replaceAll("/", "_"));
     overseerElector.setup(ec);
-    overseerElector.joinElection(ec);
+    overseerElector.joinElection(ec, false);
     return zkClient;
   }