You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sa...@apache.org on 2014/03/25 03:59:22 UTC

svn commit: r1581195 - in /lucene/dev/branches/lucene_solr_4_7: ./ solr/ solr/core/ solr/core/src/java/org/apache/solr/cloud/ solr/core/src/java/org/apache/solr/core/ solr/core/src/test/org/apache/solr/cloud/

Author: sarowe
Date: Tue Mar 25 02:59:22 2014
New Revision: 1581195

URL: http://svn.apache.org/r1581195
Log:
SOLR-5796: Increase how long we are willing to wait for a core to see the ZK advertised leader in it's local state.
SOLR-5796: Make how long we are willing to wait for a core to see the ZK advertised leader in it's local state configurable. 
SOLR-5796: Fix illegal API call to format. (merged branch_4x revisions r1574641 and r1574682)

Modified:
    lucene/dev/branches/lucene_solr_4_7/   (props changed)
    lucene/dev/branches/lucene_solr_4_7/solr/   (props changed)
    lucene/dev/branches/lucene_solr_4_7/solr/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/lucene_solr_4_7/solr/core/   (props changed)
    lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/cloud/ZkController.java
    lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/core/ConfigSolr.java
    lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/core/ConfigSolrXml.java
    lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/core/ZkContainer.java
    lucene/dev/branches/lucene_solr_4_7/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java

Modified: lucene/dev/branches/lucene_solr_4_7/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_7/solr/CHANGES.txt?rev=1581195&r1=1581194&r2=1581195&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_7/solr/CHANGES.txt (original)
+++ lucene/dev/branches/lucene_solr_4_7/solr/CHANGES.txt Tue Mar 25 02:59:22 2014
@@ -99,6 +99,16 @@ Bug Fixes
 * SOLR-5811: The Overseer will retry work items until success, which is a serious
   problem if you hit a bad work item. (Mark Miller)
 
+* SOLR-5796: Increase how long we are willing to wait for a core to see the ZK
+  advertised leader in it's local state. (Timothy Potter, Mark Miller)  
+
+Other Changes
+---------------------
+
+* SOLR-5796: Make how long we are willing to wait for a core to see the ZK
+  advertised leader in it's local state configurable. 
+  (Timothy Potter via Mark Miller)
+
 ==================  4.7.0 ==================
 
 Versions of Major Components

Modified: lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/cloud/ZkController.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/cloud/ZkController.java?rev=1581195&r1=1581194&r2=1581195&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/cloud/ZkController.java (original)
+++ lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/cloud/ZkController.java Tue Mar 25 02:59:22 2014
@@ -31,6 +31,7 @@ import java.util.Enumeration;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 import java.util.Properties;
 import java.util.Set;
@@ -162,16 +163,19 @@ public final class ZkController {
   protected volatile Overseer overseer;
 
   private int leaderVoteWait;
+  private int leaderConflictResolveWait;
   
   private boolean genericCoreNodeNames;
 
   private int clientTimeout;
 
   private volatile boolean isClosed;
-
+  
   public ZkController(final CoreContainer cc, String zkServerAddress, int zkClientTimeout, int zkClientConnectTimeout, String localHost, String locaHostPort,
-      String localHostContext, int leaderVoteWait, boolean genericCoreNodeNames, final CurrentCoreDescriptorProvider registerOnReconnect) throws InterruptedException,
-      TimeoutException, IOException {
+        String localHostContext, int leaderVoteWait, int leaderConflictResolveWait, boolean genericCoreNodeNames, final CurrentCoreDescriptorProvider registerOnReconnect) 
+      throws InterruptedException, TimeoutException, IOException
+  {
+
     if (cc == null) throw new IllegalArgumentException("CoreContainer cannot be null.");
     this.cc = cc;
     this.genericCoreNodeNames = genericCoreNodeNames;
@@ -190,6 +194,8 @@ public final class ZkController {
                                      this.localHostContext);
 
     this.leaderVoteWait = leaderVoteWait;
+    this.leaderConflictResolveWait = leaderConflictResolveWait;
+    
     this.clientTimeout = zkClientTimeout;
     zkClient = new SolrZkClient(zkServerAddress, zkClientTimeout,
         zkClientConnectTimeout, new DefaultConnectionStrategy(),
@@ -852,19 +858,28 @@ public final class ZkController {
           shardId, timeoutms * 2); // since we found it in zk, we are willing to
                                    // wait a while to find it in state
       int tries = 0;
+      final long msInSec = 1000L;
+      int maxTries = (int)Math.floor(leaderConflictResolveWait/msInSec);
       while (!leaderUrl.equals(clusterStateLeaderUrl)) {
-        if (tries == 60) {
+        if (tries > maxTries) {
           throw new SolrException(ErrorCode.SERVER_ERROR,
               "There is conflicting information about the leader of shard: "
                   + cloudDesc.getShardId() + " our state says:"
                   + clusterStateLeaderUrl + " but zookeeper says:" + leaderUrl);
         }
-        Thread.sleep(1000);
+        Thread.sleep(msInSec);
         tries++;
         clusterStateLeaderUrl = zkStateReader.getLeaderUrl(collection, shardId,
             timeoutms);
         leaderUrl = getLeaderProps(collection, cloudDesc.getShardId(), timeoutms)
             .getCoreUrl();
+        
+        if (tries % 30 == 0) {
+          String warnMsg = String.format(Locale.ENGLISH, "Still seeing conflicting information about the leader "
+              + "of shard %s for collection %s after %d seconds; our state says %s, but ZooKeeper says %s",
+              cloudDesc.getShardId(), collection, tries, clusterStateLeaderUrl, leaderUrl);
+          log.warn(warnMsg);
+        }
       }
       
     } catch (Exception e) {

Modified: lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/core/ConfigSolr.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/core/ConfigSolr.java?rev=1581195&r1=1581194&r2=1581195&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/core/ConfigSolr.java (original)
+++ lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/core/ConfigSolr.java Tue Mar 25 02:59:22 2014
@@ -142,6 +142,7 @@ public abstract class ConfigSolr {
 
   private static final int DEFAULT_ZK_CLIENT_TIMEOUT = 15000;
   private static final int DEFAULT_LEADER_VOTE_WAIT = 180000;  // 3 minutes
+  private static final int DEFAULT_LEADER_CONFLICT_RESOLVE_WAIT = 180000;
   private static final int DEFAULT_CORE_LOAD_THREADS = 3;
 
   protected static final String DEFAULT_CORE_ADMIN_PATH = "/admin/cores";
@@ -161,6 +162,10 @@ public abstract class ConfigSolr {
   public int getLeaderVoteWait() {
     return getInt(CfgProp.SOLR_LEADERVOTEWAIT, DEFAULT_LEADER_VOTE_WAIT);
   }
+  
+  public int getLeaderConflictResolveWait() {
+    return getInt(CfgProp.SOLR_LEADERCONFLICTRESOLVEWAIT, DEFAULT_LEADER_CONFLICT_RESOLVE_WAIT);
+  }
 
   public boolean getGenericCoreNodeNames() {
     return getBool(CfgProp.SOLR_GENERICCORENODENAMES, false);
@@ -259,6 +264,7 @@ public abstract class ConfigSolr {
     SOLR_GENERICCORENODENAMES,
     SOLR_ZKCLIENTTIMEOUT,
     SOLR_ZKHOST,
+    SOLR_LEADERCONFLICTRESOLVEWAIT,
 
     //TODO: Remove all of these elements for 5.0
     SOLR_PERSISTENT,

Modified: lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/core/ConfigSolrXml.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/core/ConfigSolrXml.java?rev=1581195&r1=1581194&r2=1581195&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/core/ConfigSolrXml.java (original)
+++ lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/core/ConfigSolrXml.java Tue Mar 25 02:59:22 2014
@@ -67,6 +67,7 @@ public class ConfigSolrXml extends Confi
     failIfFound("solr/cores/@hostContext");
     failIfFound("solr/cores/@hostPort");
     failIfFound("solr/cores/@leaderVoteWait");
+    failIfFound("solr/cores/@leaderConflictResolveWait");
     failIfFound("solr/cores/@genericCoreNodeNames");
     failIfFound("solr/cores/@managementPath");
     failIfFound("solr/cores/@shareSchema");
@@ -113,6 +114,7 @@ public class ConfigSolrXml extends Confi
     propMap.put(CfgProp.SOLR_HOSTCONTEXT, doSub("solr/solrcloud/str[@name='hostContext']"));
     propMap.put(CfgProp.SOLR_HOSTPORT, doSub("solr/solrcloud/int[@name='hostPort']"));
     propMap.put(CfgProp.SOLR_LEADERVOTEWAIT, doSub("solr/solrcloud/int[@name='leaderVoteWait']"));
+    propMap.put(CfgProp.SOLR_LEADERCONFLICTRESOLVEWAIT, doSub("solr/solrcloud/int[@name='leaderConflictResolveWait']"));
     propMap.put(CfgProp.SOLR_GENERICCORENODENAMES, doSub("solr/solrcloud/bool[@name='genericCoreNodeNames']"));
     propMap.put(CfgProp.SOLR_MANAGEMENTPATH, doSub("solr/str[@name='managementPath']"));
     propMap.put(CfgProp.SOLR_SHAREDLIB, doSub("solr/str[@name='sharedLib']"));

Modified: lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/core/ZkContainer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/core/ZkContainer.java?rev=1581195&r1=1581194&r2=1581195&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/core/ZkContainer.java (original)
+++ lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/core/ZkContainer.java Tue Mar 25 02:59:22 2014
@@ -72,7 +72,7 @@ public class ZkContainer {
 
     initZooKeeper(cc, solrHome,
         config.getZkHost(), config.getZkClientTimeout(), config.getZkHostPort(), config.getZkHostContext(),
-        config.getHost(), config.getLeaderVoteWait(), config.getGenericCoreNodeNames());
+        config.getHost(), config.getLeaderVoteWait(), config.getLeaderConflictResolveWait(), config.getGenericCoreNodeNames());
   }
   // TODO: 5.0 remove this, it's only here for back-compat and only called from ConfigSolr.
   public static boolean isZkMode() {
@@ -84,7 +84,8 @@ public class ZkContainer {
   }
 
   public void initZooKeeper(final CoreContainer cc, String solrHome, String zkHost, int zkClientTimeout, String hostPort,
-                            String hostContext, String host, int leaderVoteWait, boolean genericCoreNodeNames) {
+        String hostContext, String host, int leaderVoteWait, int leaderConflictResolveWait, boolean genericCoreNodeNames) {
+
     ZkController zkController = null;
     
     // if zkHost sys property is not set, we are not using ZooKeeper
@@ -156,7 +157,7 @@ public class ZkContainer {
         }
         zkController = new ZkController(cc, zookeeperHost, zkClientTimeout,
             zkClientConnectTimeout, host, hostPort, hostContext,
-            leaderVoteWait, genericCoreNodeNames,
+            leaderVoteWait, leaderConflictResolveWait, genericCoreNodeNames,
             new CurrentCoreDescriptorProvider() {
 
               @Override

Modified: lucene/dev/branches/lucene_solr_4_7/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_7/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java?rev=1581195&r1=1581194&r2=1581195&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_7/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java (original)
+++ lucene/dev/branches/lucene_solr_4_7/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java Tue Mar 25 02:59:22 2014
@@ -190,7 +190,7 @@ public class ZkControllerTest extends So
       cc = getCoreContainer();
       
       ZkController zkController = new ZkController(cc, server.getZkAddress(), TIMEOUT, 10000,
-          "127.0.0.1", "8983", "solr", 0, true, new CurrentCoreDescriptorProvider() {
+          "127.0.0.1", "8983", "solr", 0, 60000, true, new CurrentCoreDescriptorProvider() {
             
             @Override
             public List<CoreDescriptor> getCurrentDescriptors() {
@@ -230,7 +230,7 @@ public class ZkControllerTest extends So
       cc = getCoreContainer();
       
       zkController = new ZkController(cc, server.getZkAddress(),
-          TIMEOUT, 10000, "127.0.0.1", "8983", "solr", 0, true, new CurrentCoreDescriptorProvider() {
+          TIMEOUT, 10000, "127.0.0.1", "8983", "solr", 0, 60000, true, new CurrentCoreDescriptorProvider() {
             
             @Override
             public List<CoreDescriptor> getCurrentDescriptors() {
@@ -284,7 +284,7 @@ public class ZkControllerTest extends So
 
       try {
         zkController = new ZkController(cc, server.getZkAddress(), TIMEOUT, 10000,
-            "http://127.0.0.1", "8983", "solr", 0, true, new CurrentCoreDescriptorProvider() {
+            "http://127.0.0.1", "8983", "solr", 0, 60000, true, new CurrentCoreDescriptorProvider() {
 
           @Override
           public List<CoreDescriptor> getCurrentDescriptors() {