You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sa...@apache.org on 2014/03/25 03:59:22 UTC
svn commit: r1581195 - in /lucene/dev/branches/lucene_solr_4_7: ./ solr/
solr/core/ solr/core/src/java/org/apache/solr/cloud/
solr/core/src/java/org/apache/solr/core/
solr/core/src/test/org/apache/solr/cloud/
Author: sarowe
Date: Tue Mar 25 02:59:22 2014
New Revision: 1581195
URL: http://svn.apache.org/r1581195
Log:
SOLR-5796: Increase how long we are willing to wait for a core to see the ZK advertised leader in it's local state.
SOLR-5796: Make how long we are willing to wait for a core to see the ZK advertised leader in it's local state configurable.
SOLR-5796: Fix illegal API call to format. (merged branch_4x revisions r1574641 and r1574682)
Modified:
lucene/dev/branches/lucene_solr_4_7/ (props changed)
lucene/dev/branches/lucene_solr_4_7/solr/ (props changed)
lucene/dev/branches/lucene_solr_4_7/solr/CHANGES.txt (contents, props changed)
lucene/dev/branches/lucene_solr_4_7/solr/core/ (props changed)
lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/cloud/ZkController.java
lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/core/ConfigSolr.java
lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/core/ConfigSolrXml.java
lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/core/ZkContainer.java
lucene/dev/branches/lucene_solr_4_7/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java
Modified: lucene/dev/branches/lucene_solr_4_7/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_7/solr/CHANGES.txt?rev=1581195&r1=1581194&r2=1581195&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_7/solr/CHANGES.txt (original)
+++ lucene/dev/branches/lucene_solr_4_7/solr/CHANGES.txt Tue Mar 25 02:59:22 2014
@@ -99,6 +99,16 @@ Bug Fixes
* SOLR-5811: The Overseer will retry work items until success, which is a serious
problem if you hit a bad work item. (Mark Miller)
+* SOLR-5796: Increase how long we are willing to wait for a core to see the ZK
+ advertised leader in it's local state. (Timothy Potter, Mark Miller)
+
+Other Changes
+---------------------
+
+* SOLR-5796: Make how long we are willing to wait for a core to see the ZK
+ advertised leader in it's local state configurable.
+ (Timothy Potter via Mark Miller)
+
================== 4.7.0 ==================
Versions of Major Components
Modified: lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/cloud/ZkController.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/cloud/ZkController.java?rev=1581195&r1=1581194&r2=1581195&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/cloud/ZkController.java (original)
+++ lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/cloud/ZkController.java Tue Mar 25 02:59:22 2014
@@ -31,6 +31,7 @@ import java.util.Enumeration;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
+import java.util.Locale;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
@@ -162,16 +163,19 @@ public final class ZkController {
protected volatile Overseer overseer;
private int leaderVoteWait;
+ private int leaderConflictResolveWait;
private boolean genericCoreNodeNames;
private int clientTimeout;
private volatile boolean isClosed;
-
+
public ZkController(final CoreContainer cc, String zkServerAddress, int zkClientTimeout, int zkClientConnectTimeout, String localHost, String locaHostPort,
- String localHostContext, int leaderVoteWait, boolean genericCoreNodeNames, final CurrentCoreDescriptorProvider registerOnReconnect) throws InterruptedException,
- TimeoutException, IOException {
+ String localHostContext, int leaderVoteWait, int leaderConflictResolveWait, boolean genericCoreNodeNames, final CurrentCoreDescriptorProvider registerOnReconnect)
+ throws InterruptedException, TimeoutException, IOException
+ {
+
if (cc == null) throw new IllegalArgumentException("CoreContainer cannot be null.");
this.cc = cc;
this.genericCoreNodeNames = genericCoreNodeNames;
@@ -190,6 +194,8 @@ public final class ZkController {
this.localHostContext);
this.leaderVoteWait = leaderVoteWait;
+ this.leaderConflictResolveWait = leaderConflictResolveWait;
+
this.clientTimeout = zkClientTimeout;
zkClient = new SolrZkClient(zkServerAddress, zkClientTimeout,
zkClientConnectTimeout, new DefaultConnectionStrategy(),
@@ -852,19 +858,28 @@ public final class ZkController {
shardId, timeoutms * 2); // since we found it in zk, we are willing to
// wait a while to find it in state
int tries = 0;
+ final long msInSec = 1000L;
+ int maxTries = (int)Math.floor(leaderConflictResolveWait/msInSec);
while (!leaderUrl.equals(clusterStateLeaderUrl)) {
- if (tries == 60) {
+ if (tries > maxTries) {
throw new SolrException(ErrorCode.SERVER_ERROR,
"There is conflicting information about the leader of shard: "
+ cloudDesc.getShardId() + " our state says:"
+ clusterStateLeaderUrl + " but zookeeper says:" + leaderUrl);
}
- Thread.sleep(1000);
+ Thread.sleep(msInSec);
tries++;
clusterStateLeaderUrl = zkStateReader.getLeaderUrl(collection, shardId,
timeoutms);
leaderUrl = getLeaderProps(collection, cloudDesc.getShardId(), timeoutms)
.getCoreUrl();
+
+ if (tries % 30 == 0) {
+ String warnMsg = String.format(Locale.ENGLISH, "Still seeing conflicting information about the leader "
+ + "of shard %s for collection %s after %d seconds; our state says %s, but ZooKeeper says %s",
+ cloudDesc.getShardId(), collection, tries, clusterStateLeaderUrl, leaderUrl);
+ log.warn(warnMsg);
+ }
}
} catch (Exception e) {
Modified: lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/core/ConfigSolr.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/core/ConfigSolr.java?rev=1581195&r1=1581194&r2=1581195&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/core/ConfigSolr.java (original)
+++ lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/core/ConfigSolr.java Tue Mar 25 02:59:22 2014
@@ -142,6 +142,7 @@ public abstract class ConfigSolr {
private static final int DEFAULT_ZK_CLIENT_TIMEOUT = 15000;
private static final int DEFAULT_LEADER_VOTE_WAIT = 180000; // 3 minutes
+ private static final int DEFAULT_LEADER_CONFLICT_RESOLVE_WAIT = 180000;
private static final int DEFAULT_CORE_LOAD_THREADS = 3;
protected static final String DEFAULT_CORE_ADMIN_PATH = "/admin/cores";
@@ -161,6 +162,10 @@ public abstract class ConfigSolr {
public int getLeaderVoteWait() {
return getInt(CfgProp.SOLR_LEADERVOTEWAIT, DEFAULT_LEADER_VOTE_WAIT);
}
+
+ public int getLeaderConflictResolveWait() {
+ return getInt(CfgProp.SOLR_LEADERCONFLICTRESOLVEWAIT, DEFAULT_LEADER_CONFLICT_RESOLVE_WAIT);
+ }
public boolean getGenericCoreNodeNames() {
return getBool(CfgProp.SOLR_GENERICCORENODENAMES, false);
@@ -259,6 +264,7 @@ public abstract class ConfigSolr {
SOLR_GENERICCORENODENAMES,
SOLR_ZKCLIENTTIMEOUT,
SOLR_ZKHOST,
+ SOLR_LEADERCONFLICTRESOLVEWAIT,
//TODO: Remove all of these elements for 5.0
SOLR_PERSISTENT,
Modified: lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/core/ConfigSolrXml.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/core/ConfigSolrXml.java?rev=1581195&r1=1581194&r2=1581195&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/core/ConfigSolrXml.java (original)
+++ lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/core/ConfigSolrXml.java Tue Mar 25 02:59:22 2014
@@ -67,6 +67,7 @@ public class ConfigSolrXml extends Confi
failIfFound("solr/cores/@hostContext");
failIfFound("solr/cores/@hostPort");
failIfFound("solr/cores/@leaderVoteWait");
+ failIfFound("solr/cores/@leaderConflictResolveWait");
failIfFound("solr/cores/@genericCoreNodeNames");
failIfFound("solr/cores/@managementPath");
failIfFound("solr/cores/@shareSchema");
@@ -113,6 +114,7 @@ public class ConfigSolrXml extends Confi
propMap.put(CfgProp.SOLR_HOSTCONTEXT, doSub("solr/solrcloud/str[@name='hostContext']"));
propMap.put(CfgProp.SOLR_HOSTPORT, doSub("solr/solrcloud/int[@name='hostPort']"));
propMap.put(CfgProp.SOLR_LEADERVOTEWAIT, doSub("solr/solrcloud/int[@name='leaderVoteWait']"));
+ propMap.put(CfgProp.SOLR_LEADERCONFLICTRESOLVEWAIT, doSub("solr/solrcloud/int[@name='leaderConflictResolveWait']"));
propMap.put(CfgProp.SOLR_GENERICCORENODENAMES, doSub("solr/solrcloud/bool[@name='genericCoreNodeNames']"));
propMap.put(CfgProp.SOLR_MANAGEMENTPATH, doSub("solr/str[@name='managementPath']"));
propMap.put(CfgProp.SOLR_SHAREDLIB, doSub("solr/str[@name='sharedLib']"));
Modified: lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/core/ZkContainer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/core/ZkContainer.java?rev=1581195&r1=1581194&r2=1581195&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/core/ZkContainer.java (original)
+++ lucene/dev/branches/lucene_solr_4_7/solr/core/src/java/org/apache/solr/core/ZkContainer.java Tue Mar 25 02:59:22 2014
@@ -72,7 +72,7 @@ public class ZkContainer {
initZooKeeper(cc, solrHome,
config.getZkHost(), config.getZkClientTimeout(), config.getZkHostPort(), config.getZkHostContext(),
- config.getHost(), config.getLeaderVoteWait(), config.getGenericCoreNodeNames());
+ config.getHost(), config.getLeaderVoteWait(), config.getLeaderConflictResolveWait(), config.getGenericCoreNodeNames());
}
// TODO: 5.0 remove this, it's only here for back-compat and only called from ConfigSolr.
public static boolean isZkMode() {
@@ -84,7 +84,8 @@ public class ZkContainer {
}
public void initZooKeeper(final CoreContainer cc, String solrHome, String zkHost, int zkClientTimeout, String hostPort,
- String hostContext, String host, int leaderVoteWait, boolean genericCoreNodeNames) {
+ String hostContext, String host, int leaderVoteWait, int leaderConflictResolveWait, boolean genericCoreNodeNames) {
+
ZkController zkController = null;
// if zkHost sys property is not set, we are not using ZooKeeper
@@ -156,7 +157,7 @@ public class ZkContainer {
}
zkController = new ZkController(cc, zookeeperHost, zkClientTimeout,
zkClientConnectTimeout, host, hostPort, hostContext,
- leaderVoteWait, genericCoreNodeNames,
+ leaderVoteWait, leaderConflictResolveWait, genericCoreNodeNames,
new CurrentCoreDescriptorProvider() {
@Override
Modified: lucene/dev/branches/lucene_solr_4_7/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_7/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java?rev=1581195&r1=1581194&r2=1581195&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_7/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java (original)
+++ lucene/dev/branches/lucene_solr_4_7/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java Tue Mar 25 02:59:22 2014
@@ -190,7 +190,7 @@ public class ZkControllerTest extends So
cc = getCoreContainer();
ZkController zkController = new ZkController(cc, server.getZkAddress(), TIMEOUT, 10000,
- "127.0.0.1", "8983", "solr", 0, true, new CurrentCoreDescriptorProvider() {
+ "127.0.0.1", "8983", "solr", 0, 60000, true, new CurrentCoreDescriptorProvider() {
@Override
public List<CoreDescriptor> getCurrentDescriptors() {
@@ -230,7 +230,7 @@ public class ZkControllerTest extends So
cc = getCoreContainer();
zkController = new ZkController(cc, server.getZkAddress(),
- TIMEOUT, 10000, "127.0.0.1", "8983", "solr", 0, true, new CurrentCoreDescriptorProvider() {
+ TIMEOUT, 10000, "127.0.0.1", "8983", "solr", 0, 60000, true, new CurrentCoreDescriptorProvider() {
@Override
public List<CoreDescriptor> getCurrentDescriptors() {
@@ -284,7 +284,7 @@ public class ZkControllerTest extends So
try {
zkController = new ZkController(cc, server.getZkAddress(), TIMEOUT, 10000,
- "http://127.0.0.1", "8983", "solr", 0, true, new CurrentCoreDescriptorProvider() {
+ "http://127.0.0.1", "8983", "solr", 0, 60000, true, new CurrentCoreDescriptorProvider() {
@Override
public List<CoreDescriptor> getCurrentDescriptors() {