You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by th...@apache.org on 2014/10/08 17:20:23 UTC
svn commit: r1630137 - in
/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr:
cloud/ZkController.java update/processor/DistributedUpdateProcessor.java
Author: thelabdude
Date: Wed Oct 8 15:20:23 2014
New Revision: 1630137
URL: http://svn.apache.org/r1630137
Log:
SOLR-6511: backport latest changes to branch_5x
Modified:
lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/cloud/ZkController.java
lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/cloud/ZkController.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/cloud/ZkController.java?rev=1630137&r1=1630136&r2=1630137&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/cloud/ZkController.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/cloud/ZkController.java Wed Oct 8 15:20:23 2014
@@ -30,6 +30,7 @@ import java.util.Collections;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Iterator;
+import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
@@ -78,6 +79,8 @@ import org.apache.zookeeper.KeeperExcept
import org.apache.zookeeper.KeeperException.NoNodeException;
import org.apache.zookeeper.KeeperException.SessionExpiredException;
import org.apache.zookeeper.data.Stat;
+import org.noggit.JSONParser;
+import org.noggit.ObjectBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -1930,16 +1933,19 @@ public final class ZkController {
}
public String getLeaderInitiatedRecoveryState(String collection, String shardId, String coreNodeName) {
-
+ Map<String,Object> stateObj = getLeaderInitiatedRecoveryStateObject(collection, shardId, coreNodeName);
+ return (stateObj != null) ? (String)stateObj.get("state") : null;
+ }
+
+ public Map<String,Object> getLeaderInitiatedRecoveryStateObject(String collection, String shardId, String coreNodeName) {
+
if (collection == null || shardId == null || coreNodeName == null)
return null; // if we don't have complete data about a core in cloud mode, return null
String znodePath = getLeaderInitiatedRecoveryZnodePath(collection, shardId, coreNodeName);
- String state = null;
+ byte[] stateData = null;
try {
- byte[] data = zkClient.getData(znodePath, null, new Stat(), false);
- if (data != null && data.length > 0)
- state = new String(data, "UTF-8");
+ stateData = zkClient.getData(znodePath, null, new Stat(), false);
} catch (NoNodeException ignoreMe) {
// safe to ignore as this znode will only exist if the leader initiated recovery
} catch (ConnectionLossException cle) {
@@ -1950,8 +1956,6 @@ public final class ZkController {
// sort of safe to ignore ??? Usually these are seen when the core is going down
// or there are bigger issues to deal with than reading this znode
log.warn("Unable to read "+znodePath+" due to: "+see);
- } catch (UnsupportedEncodingException e) {
- throw new Error("JVM Does not seem to support UTF-8", e);
} catch (Exception exc) {
log.error("Failed to read data from znode "+znodePath+" due to: "+exc);
if (exc instanceof SolrException) {
@@ -1961,7 +1965,12 @@ public final class ZkController {
"Failed to read data from znodePath: "+znodePath, exc);
}
}
- return state;
+
+ Map<String,Object> stateObj = null;
+ if (stateData != null && stateData.length > 0)
+ stateObj = (Map<String, Object>) ZkStateReader.fromJSON(stateData);
+
+ return stateObj;
}
private void updateLeaderInitiatedRecoveryState(String collection, String shardId, String coreNodeName, String state) {
@@ -1982,14 +1991,22 @@ public final class ZkController {
}
return;
}
-
- byte[] znodeData = null;
+
+ Map<String,Object> stateObj = null;
try {
- znodeData = state.getBytes("UTF-8");
- } catch (UnsupportedEncodingException e) {
- throw new Error("JVM Does not seem to support UTF-8", e);
+ stateObj = getLeaderInitiatedRecoveryStateObject(collection, shardId, coreNodeName);
+ } catch (Exception exc) {
+ log.warn(exc.getMessage(), exc);
}
+ if (stateObj == null)
+ stateObj = new LinkedHashMap<String,Object>();
+
+ stateObj.put("state", state);
+ // only update the createdBy value if its not set
+ if (stateObj.get("createdByNodeName") == null)
+ stateObj.put("createdByNodeName", String.valueOf(this.nodeName));
+ byte[] znodeData = ZkStateReader.toJSON(stateObj);
boolean retryOnConnLoss = true; // be a little more robust when trying to write data
try {
if (zkClient.exists(znodePath, retryOnConnLoss)) {
Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java?rev=1630137&r1=1630136&r2=1630137&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java Wed Oct 8 15:20:23 2014
@@ -868,17 +868,16 @@ public class DistributedUpdateProcessor
if (sendRecoveryCommand) {
maxTries = 120;
} // else the node is no longer "live" so no need to send any recovery command
-
- } catch (KeeperException.SessionExpiredException see) {
- log.error("Leader failed to set replica " +
- error.req.node.getUrl() + " state to DOWN due to: " + see, see);
- // our session is expired, which means our state is suspect, so don't go
- // putting other replicas in recovery (see SOLR-6511)
- sendRecoveryCommand = false;
- } catch (Exception e) {
+ } catch (Exception exc) {
+ Throwable setLirZnodeFailedCause = SolrException.getRootCause(exc);
log.error("Leader failed to set replica " +
- error.req.node.getUrl() + " state to DOWN due to: " + e, e);
- // will go ahead and try to send the recovery command once after this error
+ error.req.node.getUrl() + " state to DOWN due to: " + setLirZnodeFailedCause, setLirZnodeFailedCause);
+ if (setLirZnodeFailedCause instanceof KeeperException.SessionExpiredException ||
+ setLirZnodeFailedCause instanceof KeeperException.ConnectionLossException) {
+ // our session is expired, which means our state is suspect, so don't go
+ // putting other replicas in recovery (see SOLR-6511)
+ sendRecoveryCommand = false;
+ } // else will go ahead and try to send the recovery command once after this error
}
} else {
// not the leader anymore maybe or the error'd node is not my replica?