You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2014/03/21 16:14:21 UTC
svn commit: r1579954 - in /lucene/dev/trunk/solr: CHANGES.txt
core/src/java/org/apache/solr/cloud/ZkController.java
core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
Author: shalin
Date: Fri Mar 21 15:14:20 2014
New Revision: 1579954
URL: http://svn.apache.org/r1579954
Log:
SOLR-5860: Use leaderConflictResolveWait in WaitForState during recovery/startup, improve logging and force refresh cluster state every 15 seconds
Modified:
lucene/dev/trunk/solr/CHANGES.txt
lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/ZkController.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1579954&r1=1579953&r2=1579954&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Fri Mar 21 15:14:20 2014
@@ -139,6 +139,10 @@ New Features
* SOLR-5865: Provide a MiniSolrCloudCluster to enable easier testing.
(Greg Chanan via Mark Miller)
+* SOLR-5860: Use leaderConflictResolveWait in WaitForState during recovery/startup,
+ improve logging and force refresh cluster state every 15 seconds.
+ (Timothy Potter via shalin)
+
Bug Fixes
----------------------
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/ZkController.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/ZkController.java?rev=1579954&r1=1579953&r2=1579954&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/ZkController.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/ZkController.java Fri Mar 21 15:14:20 2014
@@ -299,6 +299,10 @@ public final class ZkController {
public int getLeaderVoteWait() {
return leaderVoteWait;
}
+
+ public int getLeaderConflictResolveWait() {
+ return leaderConflictResolveWait;
+ }
public void forceOverSeer(){
try {
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java?rev=1579954&r1=1579953&r2=1579954&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java Fri Mar 21 15:14:20 2014
@@ -34,6 +34,7 @@ import org.apache.solr.common.SolrExcept
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.DocRouter;
+import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.cloud.ZkStateReader;
@@ -968,6 +969,7 @@ public class CoreAdminHandler extends Re
log.info("Going to wait for coreNodeName: " + coreNodeName + ", state: " + waitForState
+ ", checkLive: " + checkLive + ", onlyIfLeader: " + onlyIfLeader);
+ int maxTries = 0;
String state = null;
boolean live = false;
int retry = 0;
@@ -991,10 +993,25 @@ public class CoreAdminHandler extends Re
CloudDescriptor cloudDescriptor = core.getCoreDescriptor()
.getCloudDescriptor();
- if (retry == 15 || retry == 60) {
+ if (retry % 15 == 0) {
+ if (retry > 0 && log.isInfoEnabled())
+ log.info("After " + retry + " seconds, core " + cname + " (" +
+ cloudDescriptor.getShardId() + " of " +
+ cloudDescriptor.getCollectionName() + ") still does not have state: " +
+ waitForState + "; forcing ClusterState update from ZooKeeper");
+
// force a cluster state update
coreContainer.getZkController().getZkStateReader().updateClusterState(true);
}
+
+ if (maxTries == 0) {
+ // wait long enough for the leader conflict to work itself out plus a little extra
+ int conflictWaitMs = coreContainer.getZkController().getLeaderConflictResolveWait();
+ maxTries = (int) Math.round(conflictWaitMs / 1000) + 3;
+ log.info("Will wait a max of " + maxTries + " seconds to see " + cname + " (" +
+ cloudDescriptor.getShardId() + " of " +
+ cloudDescriptor.getCollectionName() + ") have state: " + waitForState);
+ }
ClusterState clusterState = coreContainer.getZkController()
.getClusterState();
@@ -1023,13 +1040,28 @@ public class CoreAdminHandler extends Re
}
}
}
-
- if (retry++ == 120) {
+
+ if (retry++ == maxTries) {
+ String collection = null;
+ String leaderInfo = null;
+ String shardId = null;
+ try {
+ CloudDescriptor cloudDescriptor =
+ core.getCoreDescriptor().getCloudDescriptor();
+ collection = cloudDescriptor.getCollectionName();
+ shardId = cloudDescriptor.getShardId();
+ leaderInfo = coreContainer.getZkController().
+ getZkStateReader().getLeaderUrl(collection, shardId, 0);
+ } catch (Exception exc) {
+ leaderInfo = "Not available due to: " + exc;
+ }
+
throw new SolrException(ErrorCode.BAD_REQUEST,
"I was asked to wait on state " + waitForState + " for "
- + nodeName
+ + shardId + " in " + collection + " on " + nodeName
+ " but I still do not see the requested state. I see state: "
- + state + " live:" + live);
+ + state + " live:" + live + " leader from ZK: " + leaderInfo
+ );
}
if (coreContainer.isShutDown()) {
@@ -1040,7 +1072,6 @@ public class CoreAdminHandler extends Re
// solrcloud_debug
if (log.isDebugEnabled()) {
try {
- ;
LocalSolrQueryRequest r = new LocalSolrQueryRequest(core,
new ModifiableSolrParams());
CommitUpdateCommand commitCmd = new CommitUpdateCommand(r, false);