You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by da...@apache.org on 2017/07/20 07:40:51 UTC

lucene-solr:branch_7x: SOLR-11124: MoveReplicaCmd should skip deleting old replica in case of its node is not live

Repository: lucene-solr
Updated Branches:
  refs/heads/branch_7x 8e3cbb256 -> 18878811f


SOLR-11124: MoveReplicaCmd should skip deleting old replica in case of its node is not live


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/18878811
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/18878811
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/18878811

Branch: refs/heads/branch_7x
Commit: 18878811f85ed1fd0b5fcae57e713f584e305bb5
Parents: 8e3cbb2
Author: Cao Manh Dat <da...@apache.org>
Authored: Thu Jul 20 14:39:30 2017 +0700
Committer: Cao Manh Dat <da...@apache.org>
Committed: Thu Jul 20 14:40:39 2017 +0700

----------------------------------------------------------------------
 solr/CHANGES.txt                                |  2 +
 .../org/apache/solr/cloud/MoveReplicaCmd.java   | 62 +++++++++++---------
 2 files changed, 36 insertions(+), 28 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/18878811/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 80bc7b8..0e61531 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -62,6 +62,8 @@ Optimizations
 * SOLR-10985: Remove unnecessary toString() calls in solr-core's search package's debug logging.
   (Michael Braun via Christine Poerschke)
 
+* SOLR-11124: MoveReplicaCmd should skip deleting old replica in case of its node is not live (Cao Manh Dat)
+
 Other Changes
 ----------------------
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/18878811/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java b/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java
index 8c4e9d7..3f29fc5 100644
--- a/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java
@@ -119,36 +119,41 @@ public class MoveReplicaCmd implements Cmd{
 
   private void moveHdfsReplica(ClusterState clusterState, NamedList results, String dataDir, String targetNode, String async,
                                  DocCollection coll, Replica replica, Slice slice, int timeout) throws Exception {
-    ZkNodeProps removeReplicasProps = new ZkNodeProps(
-        COLLECTION_PROP, coll.getName(),
-        SHARD_ID_PROP, slice.getName(),
-        REPLICA_PROP, replica.getName()
-        );
-    removeReplicasProps.getProperties().put(CoreAdminParams.DELETE_DATA_DIR, false);
-    removeReplicasProps.getProperties().put(CoreAdminParams.DELETE_INDEX, false);
-    if(async!=null) removeReplicasProps.getProperties().put(ASYNC, async);
-    NamedList deleteResult = new NamedList();
-    ocmh.deleteReplica(clusterState, removeReplicasProps, deleteResult, null);
-    if (deleteResult.get("failure") != null) {
-      String errorString = String.format(Locale.ROOT, "Failed to cleanup replica collection=%s shard=%s name=%s",
-          coll.getName(), slice.getName(), replica.getName());
-      log.warn(errorString);
-      results.add("failure", errorString + ", because of : " + deleteResult.get("failure"));
-      return;
-    }
+    String skipCreateReplicaInClusterState = "true";
+    if (clusterState.getLiveNodes().contains(replica.getNodeName())) {
+      skipCreateReplicaInClusterState = "false";
+      ZkNodeProps removeReplicasProps = new ZkNodeProps(
+          COLLECTION_PROP, coll.getName(),
+          SHARD_ID_PROP, slice.getName(),
+          REPLICA_PROP, replica.getName()
+      );
+      removeReplicasProps.getProperties().put(CoreAdminParams.DELETE_DATA_DIR, false);
+      removeReplicasProps.getProperties().put(CoreAdminParams.DELETE_INDEX, false);
+      if(async!=null) removeReplicasProps.getProperties().put(ASYNC, async);
+      NamedList deleteResult = new NamedList();
+      ocmh.deleteReplica(clusterState, removeReplicasProps, deleteResult, null);
+      if (deleteResult.get("failure") != null) {
+        String errorString = String.format(Locale.ROOT, "Failed to cleanup replica collection=%s shard=%s name=%s",
+            coll.getName(), slice.getName(), replica.getName());
+        log.warn(errorString);
+        results.add("failure", errorString + ", because of : " + deleteResult.get("failure"));
+        return;
+      }
 
-    TimeOut timeOut = new TimeOut(20L, TimeUnit.SECONDS);
-    while (!timeOut.hasTimedOut()) {
-      coll = ocmh.zkStateReader.getClusterState().getCollection(coll.getName());
-      if (coll.getReplica(replica.getName()) != null) {
-        Thread.sleep(100);
-      } else {
-        break;
+      TimeOut timeOut = new TimeOut(20L, TimeUnit.SECONDS);
+      while (!timeOut.hasTimedOut()) {
+        coll = ocmh.zkStateReader.getClusterState().getCollection(coll.getName());
+        if (coll.getReplica(replica.getName()) != null) {
+          Thread.sleep(100);
+        } else {
+          break;
+        }
       }
-    }
-    if (timeOut.hasTimedOut()) {
-      results.add("failure", "Still see deleted replica in clusterstate!");
-      return;
+      if (timeOut.hasTimedOut()) {
+        results.add("failure", "Still see deleted replica in clusterstate!");
+        return;
+      }
+
     }
 
     String ulogDir = replica.getStr(CoreAdminParams.ULOG_DIR);
@@ -158,6 +163,7 @@ public class MoveReplicaCmd implements Cmd{
         CoreAdminParams.NODE, targetNode,
         CoreAdminParams.CORE_NODE_NAME, replica.getName(),
         CoreAdminParams.NAME, replica.getCoreName(),
+        SKIP_CREATE_REPLICA_IN_CLUSTER_STATE, skipCreateReplicaInClusterState,
         CoreAdminParams.ULOG_DIR, ulogDir.substring(0, ulogDir.lastIndexOf(UpdateLog.TLOG_NAME)),
         CoreAdminParams.DATA_DIR, dataDir);
     if(async!=null) addReplicasProps.getProperties().put(ASYNC, async);