You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by th...@apache.org on 2014/10/08 17:20:23 UTC

svn commit: r1630137 - in /lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr: cloud/ZkController.java update/processor/DistributedUpdateProcessor.java

Author: thelabdude
Date: Wed Oct  8 15:20:23 2014
New Revision: 1630137

URL: http://svn.apache.org/r1630137
Log:
SOLR-6511: backport latest changes to branch_5x

Modified:
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/cloud/ZkController.java
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java

Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/cloud/ZkController.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/cloud/ZkController.java?rev=1630137&r1=1630136&r2=1630137&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/cloud/ZkController.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/cloud/ZkController.java Wed Oct  8 15:20:23 2014
@@ -30,6 +30,7 @@ import java.util.Collections;
 import java.util.Enumeration;
 import java.util.HashMap;
 import java.util.Iterator;
+import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
@@ -78,6 +79,8 @@ import org.apache.zookeeper.KeeperExcept
 import org.apache.zookeeper.KeeperException.NoNodeException;
 import org.apache.zookeeper.KeeperException.SessionExpiredException;
 import org.apache.zookeeper.data.Stat;
+import org.noggit.JSONParser;
+import org.noggit.ObjectBuilder;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -1930,16 +1933,19 @@ public final class ZkController {
   }  
   
   public String getLeaderInitiatedRecoveryState(String collection, String shardId, String coreNodeName) {
-    
+    Map<String,Object> stateObj = getLeaderInitiatedRecoveryStateObject(collection, shardId, coreNodeName);
+    return (stateObj != null) ? (String)stateObj.get("state") : null;
+  }
+
+  public Map<String,Object> getLeaderInitiatedRecoveryStateObject(String collection, String shardId, String coreNodeName) {
+
     if (collection == null || shardId == null || coreNodeName == null)
       return null; // if we don't have complete data about a core in cloud mode, return null
     
     String znodePath = getLeaderInitiatedRecoveryZnodePath(collection, shardId, coreNodeName);
-    String state = null;
+    byte[] stateData = null;
     try {
-      byte[] data = zkClient.getData(znodePath, null, new Stat(), false);
-      if (data != null && data.length > 0)
-        state = new String(data, "UTF-8");
+      stateData = zkClient.getData(znodePath, null, new Stat(), false);
     } catch (NoNodeException ignoreMe) {
       // safe to ignore as this znode will only exist if the leader initiated recovery
     } catch (ConnectionLossException cle) {
@@ -1950,8 +1956,6 @@ public final class ZkController {
       // sort of safe to ignore ??? Usually these are seen when the core is going down
       // or there are bigger issues to deal with than reading this znode
       log.warn("Unable to read "+znodePath+" due to: "+see);
-    } catch (UnsupportedEncodingException e) {
-      throw new Error("JVM Does not seem to support UTF-8", e);
     } catch (Exception exc) {
       log.error("Failed to read data from znode "+znodePath+" due to: "+exc);
       if (exc instanceof SolrException) {
@@ -1961,7 +1965,12 @@ public final class ZkController {
             "Failed to read data from znodePath: "+znodePath, exc);
       }
     }
-    return state;
+
+    Map<String,Object> stateObj = null;
+    if (stateData != null && stateData.length > 0)
+      stateObj = (Map<String, Object>) ZkStateReader.fromJSON(stateData);
+
+    return stateObj;
   }
   
   private void updateLeaderInitiatedRecoveryState(String collection, String shardId, String coreNodeName, String state) {
@@ -1982,14 +1991,22 @@ public final class ZkController {
       }
       return;
     }
-    
-    byte[] znodeData = null;
+
+    Map<String,Object> stateObj = null;
     try {
-      znodeData = state.getBytes("UTF-8");
-    } catch (UnsupportedEncodingException e) {
-      throw new Error("JVM Does not seem to support UTF-8", e);
+      stateObj = getLeaderInitiatedRecoveryStateObject(collection, shardId, coreNodeName);
+    } catch (Exception exc) {
+      log.warn(exc.getMessage(), exc);
     }
+    if (stateObj == null)
+      stateObj = new LinkedHashMap<String,Object>();
+
+    stateObj.put("state", state);
+    // only update the createdBy value if its not set
+    if (stateObj.get("createdByNodeName") == null)
+      stateObj.put("createdByNodeName", String.valueOf(this.nodeName));
 
+    byte[] znodeData = ZkStateReader.toJSON(stateObj);
     boolean retryOnConnLoss = true; // be a little more robust when trying to write data
     try {
       if (zkClient.exists(znodePath, retryOnConnLoss)) {

Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java?rev=1630137&r1=1630136&r2=1630137&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java Wed Oct  8 15:20:23 2014
@@ -868,17 +868,16 @@ public class DistributedUpdateProcessor 
             if (sendRecoveryCommand) {
               maxTries = 120;
             } // else the node is no longer "live" so no need to send any recovery command
-
-          } catch (KeeperException.SessionExpiredException see) {
-            log.error("Leader failed to set replica " +
-                error.req.node.getUrl() + " state to DOWN due to: " + see, see);
-            // our session is expired, which means our state is suspect, so don't go
-            // putting other replicas in recovery (see SOLR-6511)
-            sendRecoveryCommand = false;
-          } catch (Exception e) {
+          } catch (Exception exc) {
+            Throwable setLirZnodeFailedCause = SolrException.getRootCause(exc);
             log.error("Leader failed to set replica " +
-                error.req.node.getUrl() + " state to DOWN due to: " + e, e);
-            // will go ahead and try to send the recovery command once after this error
+                error.req.node.getUrl() + " state to DOWN due to: " + setLirZnodeFailedCause, setLirZnodeFailedCause);
+            if (setLirZnodeFailedCause instanceof KeeperException.SessionExpiredException ||
+                setLirZnodeFailedCause instanceof KeeperException.ConnectionLossException) {
+              // our session is expired, which means our state is suspect, so don't go
+              // putting other replicas in recovery (see SOLR-6511)
+              sendRecoveryCommand = false;
+            } // else will go ahead and try to send the recovery command once after this error
           }
         } else {
           // not the leader anymore maybe or the error'd node is not my replica?