You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2017/12/05 13:46:17 UTC

lucene-solr:master: SOLR-11458: Improve error handling in MoveReplicaCmd to avoid potential loss of data.

Repository: lucene-solr
Updated Branches:
  refs/heads/master 9268b2b30 -> 41644bdcd


SOLR-11458: Improve error handling in MoveReplicaCmd to avoid potential loss of data.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/41644bdc
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/41644bdc
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/41644bdc

Branch: refs/heads/master
Commit: 41644bdcdcc0734115ce08ec24d6b408e1f8cf28
Parents: 9268b2b
Author: Andrzej Bialecki <ab...@apache.org>
Authored: Tue Dec 5 14:00:56 2017 +0100
Committer: Andrzej Bialecki <ab...@apache.org>
Committed: Tue Dec 5 14:46:05 2017 +0100

----------------------------------------------------------------------
 solr/CHANGES.txt                                |  2 +
 .../org/apache/solr/cloud/ActionThrottle.java   |  4 +
 .../src/java/org/apache/solr/cloud/Assign.java  |  3 +-
 .../org/apache/solr/cloud/MoveReplicaCmd.java   | 85 +++++++++++++++-----
 .../cloud/OverseerCollectionMessageHandler.java |  2 +-
 .../org/apache/solr/cloud/ZkController.java     |  1 +
 .../cloud/autoscaling/ScheduledTriggers.java    | 31 ++++---
 .../apache/solr/core/HdfsDirectoryFactory.java  | 12 ++-
 .../solr/handler/admin/CollectionsHandler.java  |  2 +
 .../cloud/CollectionTooManyReplicasTest.java    |  8 +-
 .../apache/solr/cloud/MoveReplicaHDFSTest.java  | 13 +++
 .../org/apache/solr/cloud/MoveReplicaTest.java  | 38 ++++++---
 .../autoscaling/TriggerIntegrationTest.java     | 40 ++++++---
 .../solrj/request/CollectionAdminRequest.java   | 14 ++++
 .../apache/solr/common/cloud/ZkStateReader.java |  1 +
 .../solr/common/params/CommonAdminParams.java   |  7 +-
 .../collections.collection.Commands.json        | 24 +++++-
 17 files changed, 220 insertions(+), 67 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/41644bdc/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 456363f..5a8c2b2 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -168,6 +168,8 @@ Bug Fixes
 * SOLR-9137: bin/solr script ignored custom STOP_PORT on shutdown.
   (Joachim Kohlhammer, Steve Rowe, Christine Poerschke)
 
+* SOLR-11458: Improve error handling in MoveReplicaCmd to avoid potential loss of data. (ab)
+
 Optimizations
 ----------------------
 * SOLR-11285: Refactor autoscaling framework to avoid direct references to Zookeeper and Solr

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/41644bdc/solr/core/src/java/org/apache/solr/cloud/ActionThrottle.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/ActionThrottle.java b/solr/core/src/java/org/apache/solr/cloud/ActionThrottle.java
index 9476c3c..f60332c 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ActionThrottle.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ActionThrottle.java
@@ -52,6 +52,10 @@ public class ActionThrottle {
     this.lastActionStartedAt = lastActionStartedAt;
     this.timeSource = TimeSource.NANO_TIME;
   }
+
+  public void reset() {
+    lastActionStartedAt = null;
+  }
   
   public void markAttemptingAction() {
     lastActionStartedAt = timeSource.getTime();

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/41644bdc/solr/core/src/java/org/apache/solr/cloud/Assign.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/Assign.java b/solr/core/src/java/org/apache/solr/cloud/Assign.java
index 1b48881..9967b23 100644
--- a/solr/core/src/java/org/apache/solr/cloud/Assign.java
+++ b/solr/core/src/java/org/apache/solr/cloud/Assign.java
@@ -457,7 +457,8 @@ public class Assign {
     if (createNodeList != null) { // Overrides petty considerations about maxShardsPerNode
       if (createNodeList.size() != nodeNameVsShardCount.size()) {
         throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
-            "At least one of the node(s) specified are not currently active, no action taken.");
+            "At least one of the node(s) specified " + createNodeList + " are not currently active in "
+                + nodeNameVsShardCount.keySet() + ", no action taken.");
       }
       return nodeNameVsShardCount;
     }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/41644bdc/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java b/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java
index a2ed407..71d5c82 100644
--- a/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java
@@ -31,6 +31,7 @@ import org.apache.solr.common.cloud.DocCollection;
 import org.apache.solr.common.cloud.Replica;
 import org.apache.solr.common.cloud.Slice;
 import org.apache.solr.common.cloud.ZkNodeProps;
+import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.solr.common.params.CollectionParams;
 import org.apache.solr.common.params.CoreAdminParams;
 import org.apache.solr.common.util.NamedList;
@@ -45,6 +46,8 @@ import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP;
 import static org.apache.solr.common.cloud.ZkStateReader.REPLICA_PROP;
 import static org.apache.solr.common.cloud.ZkStateReader.SHARD_ID_PROP;
 import static org.apache.solr.common.params.CommonAdminParams.ASYNC;
+import static org.apache.solr.common.params.CommonAdminParams.IN_PLACE_MOVE;
+import static org.apache.solr.common.params.CommonAdminParams.TIMEOUT;
 import static org.apache.solr.common.params.CommonAdminParams.WAIT_FOR_FINAL_STATE;
 
 public class MoveReplicaCmd implements Cmd{
@@ -63,11 +66,12 @@ public class MoveReplicaCmd implements Cmd{
 
   private void moveReplica(ClusterState clusterState, ZkNodeProps message, NamedList results) throws Exception {
     log.debug("moveReplica() : {}", Utils.toJSONString(message));
-    ocmh.checkRequired(message, COLLECTION_PROP, "targetNode");
+    ocmh.checkRequired(message, COLLECTION_PROP, CollectionParams.TARGET_NODE);
     String collection = message.getStr(COLLECTION_PROP);
-    String targetNode = message.getStr("targetNode");
+    String targetNode = message.getStr(CollectionParams.TARGET_NODE);
     boolean waitForFinalState = message.getBool(WAIT_FOR_FINAL_STATE, false);
-    int timeout = message.getInt("timeout", 10 * 60); // 10 minutes
+    boolean inPlaceMove = message.getBool(IN_PLACE_MOVE, true);
+    int timeout = message.getInt(TIMEOUT, 10 * 60); // 10 minutes
 
     String async = message.getStr(ASYNC);
 
@@ -75,6 +79,9 @@ public class MoveReplicaCmd implements Cmd{
     if (coll == null) {
       throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Collection: " + collection + " does not exist");
     }
+    if (!clusterState.getLiveNodes().contains(targetNode)) {
+      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Target node: " + targetNode + " not in live nodes: " + clusterState.getLiveNodes());
+    }
     Replica replica = null;
     if (message.containsKey(REPLICA_PROP)) {
       String replicaName = message.getStr(REPLICA_PROP);
@@ -86,12 +93,17 @@ public class MoveReplicaCmd implements Cmd{
     } else {
       String sourceNode = message.getStr(CollectionParams.SOURCE_NODE, message.getStr(CollectionParams.FROM_NODE));
       if (sourceNode == null) {
-        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "sourceNode is a required param" );
+        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "'" + CollectionParams.SOURCE_NODE +
+            " or '" + CollectionParams.FROM_NODE + "' is a required param");
       }
       String shardId = message.getStr(SHARD_ID_PROP);
+      if (shardId == null) {
+        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "'" + SHARD_ID_PROP + "' is a required param");
+      }
       Slice slice = clusterState.getCollection(collection).getSlice(shardId);
       List<Replica> sliceReplicas = new ArrayList<>(slice.getReplicas());
       Collections.shuffle(sliceReplicas, RANDOM);
+      // this picks up a single random replica from the sourceNode
       for (Replica r : slice.getReplicas()) {
         if (r.getNodeName().equals(sourceNode)) {
           replica = r;
@@ -99,11 +111,11 @@ public class MoveReplicaCmd implements Cmd{
       }
       if (replica == null) {
         throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
-            "Collection: " + collection + " node: " + sourceNode + " do not have any replica belong to shard: " + shardId);
+            "Collection: " + collection + " node: " + sourceNode + " does not have any replica belonging to shard: " + shardId);
       }
     }
 
-    log.info("Replica will be moved {}", replica);
+    log.info("Replica will be moved to node {}: {}", targetNode, replica);
     Slice slice = null;
     for (Slice s : coll.getSlices()) {
       if (s.getReplicas().contains(replica)) {
@@ -112,9 +124,13 @@ public class MoveReplicaCmd implements Cmd{
     }
     assert slice != null;
     Object dataDir = replica.get("dataDir");
-    if (dataDir != null && dataDir.toString().startsWith("hdfs:/")) {
+    boolean isSharedFS = replica.getBool(ZkStateReader.SHARED_STORAGE_PROP, false) && dataDir != null;
+
+    if (isSharedFS && inPlaceMove) {
+      log.debug("-- moveHdfsReplica");
       moveHdfsReplica(clusterState, results, dataDir.toString(), targetNode, async, coll, replica, slice, timeout, waitForFinalState);
     } else {
+      log.debug("-- moveNormalReplica (inPlaceMove=" + inPlaceMove + ", isSharedFS=" + isSharedFS);
       moveNormalReplica(clusterState, results, targetNode, async, coll, replica, slice, timeout, waitForFinalState);
     }
   }
@@ -135,10 +151,10 @@ public class MoveReplicaCmd implements Cmd{
       NamedList deleteResult = new NamedList();
       ocmh.deleteReplica(clusterState, removeReplicasProps, deleteResult, null);
       if (deleteResult.get("failure") != null) {
-        String errorString = String.format(Locale.ROOT, "Failed to cleanup replica collection=%s shard=%s name=%s",
-            coll.getName(), slice.getName(), replica.getName());
+        String errorString = String.format(Locale.ROOT, "Failed to cleanup replica collection=%s shard=%s name=%s, failure=%s",
+            coll.getName(), slice.getName(), replica.getName(), deleteResult.get("failure"));
         log.warn(errorString);
-        results.add("failure", errorString + ", because of : " + deleteResult.get("failure"));
+        results.add("failure", errorString);
         return;
       }
 
@@ -165,17 +181,48 @@ public class MoveReplicaCmd implements Cmd{
         CoreAdminParams.NODE, targetNode,
         CoreAdminParams.CORE_NODE_NAME, replica.getName(),
         CoreAdminParams.NAME, replica.getCoreName(),
+        WAIT_FOR_FINAL_STATE, String.valueOf(waitForFinalState),
         SKIP_CREATE_REPLICA_IN_CLUSTER_STATE, skipCreateReplicaInClusterState,
         CoreAdminParams.ULOG_DIR, ulogDir.substring(0, ulogDir.lastIndexOf(UpdateLog.TLOG_NAME)),
         CoreAdminParams.DATA_DIR, dataDir);
     if(async!=null) addReplicasProps.getProperties().put(ASYNC, async);
     NamedList addResult = new NamedList();
-    ocmh.addReplica(ocmh.zkStateReader.getClusterState(), addReplicasProps, addResult, null);
+    try {
+      ocmh.addReplica(ocmh.zkStateReader.getClusterState(), addReplicasProps, addResult, null);
+    } catch (Exception e) {
+      // fatal error - try rolling back
+      String errorString = String.format(Locale.ROOT, "Failed to create replica for collection=%s shard=%s" +
+          " on node=%s, failure=%s", coll.getName(), slice.getName(), targetNode, addResult.get("failure"));
+      results.add("failure", errorString);
+      log.warn("Error adding replica " + addReplicasProps + " - trying to roll back...", e);
+      addReplicasProps = addReplicasProps.plus(CoreAdminParams.NODE, replica.getNodeName());
+      NamedList rollback = new NamedList();
+      ocmh.addReplica(ocmh.zkStateReader.getClusterState(), addReplicasProps, rollback, null);
+      if (rollback.get("failure") != null) {
+        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Fatal error during MOVEREPLICA of " + replica
+            + ", collection may be inconsistent: " + rollback.get("failure"));
+      }
+      return;
+    }
     if (addResult.get("failure") != null) {
       String errorString = String.format(Locale.ROOT, "Failed to create replica for collection=%s shard=%s" +
-          " on node=%s", coll.getName(), slice.getName(), targetNode);
+          " on node=%s, failure=%s", coll.getName(), slice.getName(), targetNode, addResult.get("failure"));
       log.warn(errorString);
       results.add("failure", errorString);
+      log.debug("--- trying to roll back...");
+      // try to roll back
+      addReplicasProps = addReplicasProps.plus(CoreAdminParams.NODE, replica.getNodeName());
+      NamedList rollback = new NamedList();
+      try {
+        ocmh.addReplica(ocmh.zkStateReader.getClusterState(), addReplicasProps, rollback, null);
+      } catch (Exception e) {
+        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Fatal error during MOVEREPLICA of " + replica
+            + ", collection may be inconsistent!", e);
+      }
+      if (rollback.get("failure") != null) {
+        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Fatal error during MOVEREPLICA of " + replica
+            + ", collection may be inconsistent! Failure: " + rollback.get("failure"));
+      }
       return;
     } else {
       String successString = String.format(Locale.ROOT, "MOVEREPLICA action completed successfully, moved replica=%s at node=%s " +
@@ -192,20 +239,20 @@ public class MoveReplicaCmd implements Cmd{
         SHARD_ID_PROP, slice.getName(),
         CoreAdminParams.NODE, targetNode,
         CoreAdminParams.NAME, newCoreName);
-    if(async!=null) addReplicasProps.getProperties().put(ASYNC, async);
+    if (async != null) addReplicasProps.getProperties().put(ASYNC, async);
     NamedList addResult = new NamedList();
     SolrCloseableLatch countDownLatch = new SolrCloseableLatch(1, ocmh);
     ActiveReplicaWatcher watcher = null;
     ZkNodeProps props = ocmh.addReplica(clusterState, addReplicasProps, addResult, null);
-    log.info("props " + props);
+    log.debug("props " + props);
     if (replica.equals(slice.getLeader()) || waitForFinalState) {
       watcher = new ActiveReplicaWatcher(coll.getName(), null, Collections.singletonList(newCoreName), countDownLatch);
-      log.info("-- registered watcher " + watcher);
+      log.debug("-- registered watcher " + watcher);
       ocmh.zkStateReader.registerCollectionStateWatcher(coll.getName(), watcher);
     }
     if (addResult.get("failure") != null) {
       String errorString = String.format(Locale.ROOT, "Failed to create replica for collection=%s shard=%s" +
-          " on node=%s", coll.getName(), slice.getName(), targetNode);
+          " on node=%s, failure=", coll.getName(), slice.getName(), targetNode, addResult.get("failure"));
       log.warn(errorString);
       results.add("failure", errorString);
       if (watcher != null) { // unregister
@@ -239,10 +286,10 @@ public class MoveReplicaCmd implements Cmd{
     NamedList deleteResult = new NamedList();
     ocmh.deleteReplica(clusterState, removeReplicasProps, deleteResult, null);
     if (deleteResult.get("failure") != null) {
-      String errorString = String.format(Locale.ROOT, "Failed to cleanup replica collection=%s shard=%s name=%s",
-          coll.getName(), slice.getName(), replica.getName());
+      String errorString = String.format(Locale.ROOT, "Failed to cleanup replica collection=%s shard=%s name=%s, failure=%s",
+          coll.getName(), slice.getName(), replica.getName(), deleteResult.get("failure"));
       log.warn(errorString);
-      results.add("failure", errorString + ", because of : " + deleteResult.get("failure"));
+      results.add("failure", errorString);
     } else {
       String successString = String.format(Locale.ROOT, "MOVEREPLICA action completed successfully, moved replica=%s at node=%s " +
           "to replica=%s at node=%s", replica.getCoreName(), replica.getNodeName(), newCoreName, targetNode);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/41644bdc/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java
index 8db306f..26f1709 100644
--- a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java
+++ b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java
@@ -698,7 +698,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
       if (result.size() == coreNames.size()) {
         return result;
       } else {
-        log.debug("Expecting {} cores but found {}", coreNames.size(), result.size());
+        log.debug("Expecting {} cores but found {}", coreNames, result);
       }
       if (timeout.hasTimedOut()) {
         throw new SolrException(ErrorCode.SERVER_ERROR, "Timed out waiting to see all replicas: " + coreNames + " in cluster state. Last state: " + coll);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/41644bdc/solr/core/src/java/org/apache/solr/cloud/ZkController.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
index cc4a590..365da65 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
@@ -1417,6 +1417,7 @@ public class ZkController {
         }
         if (core != null && core.getDirectoryFactory().isSharedStorage()) {
           if (core.getDirectoryFactory().isSharedStorage()) {
+            props.put(ZkStateReader.SHARED_STORAGE_PROP, "true");
             props.put("dataDir", core.getDataDir());
             UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
             if (ulog != null) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/41644bdc/solr/core/src/java/org/apache/solr/cloud/autoscaling/ScheduledTriggers.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/ScheduledTriggers.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/ScheduledTriggers.java
index a57faa4..8278977 100644
--- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/ScheduledTriggers.java
+++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/ScheduledTriggers.java
@@ -118,7 +118,7 @@ public class ScheduledTriggers implements Closeable {
 
   private final AtomicReference<ActionThrottle> actionThrottle;
 
-  private final SolrCloudManager dataProvider;
+  private final SolrCloudManager cloudManager;
 
   private final DistribStateManager stateManager;
 
@@ -130,15 +130,15 @@ public class ScheduledTriggers implements Closeable {
 
   private AutoScalingConfig autoScalingConfig;
 
-  public ScheduledTriggers(SolrResourceLoader loader, SolrCloudManager dataProvider) {
+  public ScheduledTriggers(SolrResourceLoader loader, SolrCloudManager cloudManager) {
     scheduledThreadPoolExecutor = (ScheduledThreadPoolExecutor) Executors.newScheduledThreadPool(DEFAULT_TRIGGER_CORE_POOL_SIZE,
         new DefaultSolrThreadFactory("ScheduledTrigger"));
     scheduledThreadPoolExecutor.setRemoveOnCancelPolicy(true);
     scheduledThreadPoolExecutor.setExecuteExistingDelayedTasksAfterShutdownPolicy(false);
     actionExecutor = ExecutorUtil.newMDCAwareSingleThreadExecutor(new DefaultSolrThreadFactory("AutoscalingActionExecutor"));
     actionThrottle = new AtomicReference<>(new ActionThrottle("action", TimeUnit.SECONDS.toMillis(DEFAULT_ACTION_THROTTLE_PERIOD_SECONDS)));
-    this.dataProvider = dataProvider;
-    this.stateManager = dataProvider.getDistribStateManager();
+    this.cloudManager = cloudManager;
+    this.stateManager = cloudManager.getDistribStateManager();
     this.loader = loader;
     queueStats = new Stats();
     listeners = new TriggerListeners();
@@ -198,6 +198,11 @@ public class ScheduledTriggers implements Closeable {
       }
     }
     this.autoScalingConfig = autoScalingConfig;
+
+    // reset cooldown and actionThrottle
+    cooldownStart.set(System.nanoTime() - cooldownPeriod.get());
+    actionThrottle.get().reset();
+
     listeners.setAutoScalingConfig(autoScalingConfig);
   }
 
@@ -215,12 +220,12 @@ public class ScheduledTriggers implements Closeable {
     }
     ScheduledTrigger st;
     try {
-      st = new ScheduledTrigger(newTrigger, dataProvider, queueStats);
+      st = new ScheduledTrigger(newTrigger, cloudManager, queueStats);
     } catch (Exception e) {
       if (isClosed) {
         throw new AlreadyClosedException("ScheduledTriggers has been closed and cannot be used anymore");
       }
-      if (dataProvider.isClosed()) {
+      if (cloudManager.isClosed()) {
         log.error("Failed to add trigger " + newTrigger.getName() + " - closing or disconnected from data provider", e);
       } else {
         log.error("Failed to add trigger " + newTrigger.getName(), e);
@@ -241,7 +246,7 @@ public class ScheduledTriggers implements Closeable {
       scheduledTriggers.replace(newTrigger.getName(), scheduledTrigger);
     }
     newTrigger.setProcessor(event -> {
-      if (dataProvider.isClosed()) {
+      if (cloudManager.isClosed()) {
         String msg = String.format(Locale.ROOT, "Ignoring autoscaling event %s because Solr has been shutdown.", event.toString());
         log.warn(msg);
         listeners.fireListeners(event.getSource(), event, TriggerEventProcessorStage.ABORTED, msg);
@@ -296,7 +301,7 @@ public class ScheduledTriggers implements Closeable {
               // this event so that we continue processing other events and not block this action executor
               waitForPendingTasks(newTrigger, actions);
 
-              ActionContext actionContext = new ActionContext(dataProvider, newTrigger, new HashMap<>());
+              ActionContext actionContext = new ActionContext(cloudManager, newTrigger, new HashMap<>());
               for (TriggerAction action : actions) {
                 List<String> beforeActions = (List<String>) actionContext.getProperties().computeIfAbsent(TriggerEventProcessorStage.BEFORE_ACTION.toString(), k -> new ArrayList<String>());
                 beforeActions.add(action.getName());
@@ -346,7 +351,7 @@ public class ScheduledTriggers implements Closeable {
   }
 
   private void waitForPendingTasks(AutoScaling.Trigger newTrigger, List<TriggerAction> actions) throws AlreadyClosedException {
-    DistribStateManager stateManager = dataProvider.getDistribStateManager();
+    DistribStateManager stateManager = cloudManager.getDistribStateManager();
     try {
 
       for (TriggerAction action : actions) {
@@ -365,7 +370,7 @@ public class ScheduledTriggers implements Closeable {
                 String requestid = (String) map.get("requestid");
                 try {
                   log.debug("Found pending task with requestid={}", requestid);
-                  RequestStatusResponse statusResponse = waitForTaskToFinish(dataProvider, requestid,
+                  RequestStatusResponse statusResponse = waitForTaskToFinish(cloudManager, requestid,
                       ExecutePlanAction.DEFAULT_TASK_TIMEOUT_SECONDS, TimeUnit.SECONDS);
                   if (statusResponse != null) {
                     RequestStatusState state = statusResponse.getRequestStatus();
@@ -374,7 +379,7 @@ public class ScheduledTriggers implements Closeable {
                     }
                   }
                 } catch (Exception e) {
-                  if (dataProvider.isClosed())  {
+                  if (cloudManager.isClosed())  {
                     throw e; // propagate the abort to the caller
                   }
                   Throwable rootCause = ExceptionUtils.getRootCause(e);
@@ -395,7 +400,7 @@ public class ScheduledTriggers implements Closeable {
       Thread.currentThread().interrupt();
       throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Thread interrupted", e);
     } catch (Exception e) {
-      if (dataProvider.isClosed())  {
+      if (cloudManager.isClosed())  {
         throw new AlreadyClosedException("The Solr instance has been shutdown");
       }
       // we catch but don't rethrow because a failure to wait for pending tasks
@@ -618,7 +623,7 @@ public class ScheduledTriggers implements Closeable {
             }
             if (listener != null) {
               try {
-                listener.init(dataProvider, config);
+                listener.init(cloudManager, config);
                 listenersPerName.put(config.name, listener);
               } catch (Exception e) {
                 log.warn("Error initializing TriggerListener " + config, e);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/41644bdc/solr/core/src/java/org/apache/solr/core/HdfsDirectoryFactory.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/core/HdfsDirectoryFactory.java b/solr/core/src/java/org/apache/solr/core/HdfsDirectoryFactory.java
index 260a991..f248152 100644
--- a/solr/core/src/java/org/apache/solr/core/HdfsDirectoryFactory.java
+++ b/solr/core/src/java/org/apache/solr/core/HdfsDirectoryFactory.java
@@ -18,6 +18,7 @@ package org.apache.solr.core;
 
 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION;
 
+import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.lang.invoke.MethodHandles;
 import java.net.URLEncoder;
@@ -551,18 +552,21 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory implements Sol
           return accept;
         }
       });
+    } catch (FileNotFoundException fnfe) {
+      // already deleted - ignore
+      LOG.debug("Old index directory already deleted - skipping...", fnfe);
     } catch (IOException ioExc) {
       LOG.error("Error checking for old index directories to clean-up.", ioExc);
     }
-    
+
+    if (oldIndexDirs == null || oldIndexDirs.length == 0)
+      return; // nothing to clean-up
+
     List<Path> oldIndexPaths = new ArrayList<>(oldIndexDirs.length);
     for (FileStatus ofs : oldIndexDirs) {
       oldIndexPaths.add(ofs.getPath());
     }
 
-    if (oldIndexDirs == null || oldIndexDirs.length == 0)
-      return; // nothing to clean-up
-
     Collections.sort(oldIndexPaths, Collections.reverseOrder());
     
     Set<String> livePaths = getLivePaths();

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/41644bdc/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java
index 2204c05..de066d5 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java
@@ -131,6 +131,7 @@ import static org.apache.solr.common.cloud.ZkStateReader.TLOG_REPLICAS;
 import static org.apache.solr.common.params.CollectionAdminParams.COUNT_PROP;
 import static org.apache.solr.common.params.CollectionParams.CollectionAction.*;
 import static org.apache.solr.common.params.CommonAdminParams.ASYNC;
+import static org.apache.solr.common.params.CommonAdminParams.IN_PLACE_MOVE;
 import static org.apache.solr.common.params.CommonAdminParams.WAIT_FOR_FINAL_STATE;
 import static org.apache.solr.common.params.CommonParams.NAME;
 import static org.apache.solr.common.params.CommonParams.VALUE_LONG;
@@ -921,6 +922,7 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
           CollectionParams.SOURCE_NODE,
           CollectionParams.TARGET_NODE,
           WAIT_FOR_FINAL_STATE,
+          IN_PLACE_MOVE,
           "replica",
           "shard");
     }),

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/41644bdc/solr/core/src/test/org/apache/solr/cloud/CollectionTooManyReplicasTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/CollectionTooManyReplicasTest.java b/solr/core/src/test/org/apache/solr/cloud/CollectionTooManyReplicasTest.java
index a1c2175..daa267d 100644
--- a/solr/core/src/test/org/apache/solr/cloud/CollectionTooManyReplicasTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/CollectionTooManyReplicasTest.java
@@ -195,8 +195,8 @@ public class CollectionTooManyReplicasTest extends SolrCloudTestCase {
             .setNode(deadNode)
             .process(cluster.getSolrClient());
       });
-      assertTrue("Should have gotten a message about shard not ",
-          e1.getMessage().contains("At least one of the node(s) specified are not currently active, no action taken."));
+      assertTrue("Should have gotten a message about shard not currently active: " + e1.toString(),
+          e1.toString().contains("At least one of the node(s) specified [" + deadNode + "] are not currently active in"));
 
       // Should also die if we just add a shard
       Exception e2 = expectThrows(Exception.class, () -> {
@@ -205,8 +205,8 @@ public class CollectionTooManyReplicasTest extends SolrCloudTestCase {
             .process(cluster.getSolrClient());
       });
 
-      assertTrue("Should have gotten a message about shard not ",
-          e2.getMessage().contains("At least one of the node(s) specified are not currently active, no action taken."));
+      assertTrue("Should have gotten a message about shard not currently active: " + e2.toString(),
+          e2.toString().contains("At least one of the node(s) specified [" + deadNode + "] are not currently active in"));
     }
     finally {
       cluster.startJettySolrRunner(jetty);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/41644bdc/solr/core/src/test/org/apache/solr/cloud/MoveReplicaHDFSTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/MoveReplicaHDFSTest.java b/solr/core/src/test/org/apache/solr/cloud/MoveReplicaHDFSTest.java
index fb60d39..f5e9e7e 100644
--- a/solr/core/src/test/org/apache/solr/cloud/MoveReplicaHDFSTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/MoveReplicaHDFSTest.java
@@ -25,6 +25,7 @@ import org.apache.solr.util.BadHdfsThreadsFilter;
 import org.apache.solr.util.LogLevel;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
+import org.junit.Test;
 
 /**
  *
@@ -56,6 +57,18 @@ public class MoveReplicaHDFSTest extends MoveReplicaTest {
     dfsCluster = null;
   }
 
+  @Test
+  public void testNormalMove() throws Exception {
+    inPlaceMove = false;
+    test();
+  }
+
+  @Test
+  public void testNormalFailedMove() throws Exception {
+    inPlaceMove = false;
+    testFailedMove();
+  }
+
   public static class ForkJoinThreadsFilter implements ThreadFilter {
 
     @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/41644bdc/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java b/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java
index 3e116fc..631d949 100644
--- a/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java
@@ -48,6 +48,7 @@ import org.apache.solr.common.params.CollectionParams;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.NamedList;
+import org.apache.solr.util.IdUtils;
 import org.apache.solr.util.LogLevel;
 import org.junit.Before;
 import org.junit.BeforeClass;
@@ -62,6 +63,9 @@ public class MoveReplicaTest extends SolrCloudTestCase {
   private static ZkStateReaderAccessor accessor;
   private static int overseerLeaderIndex;
 
+  // used by MoveReplicaHDFSTest
+  protected boolean inPlaceMove = true;
+
   @BeforeClass
   public static void setupCluster() throws Exception {
     configureCluster(4)
@@ -91,12 +95,17 @@ public class MoveReplicaTest extends SolrCloudTestCase {
   @Before
   public void beforeTest() throws Exception {
     cluster.deleteAllCollections();
+    // restart any shut down nodes
+    for (int i = cluster.getJettySolrRunners().size(); i < 5; i++) {
+      cluster.startJettySolrRunner();
+    }
+    cluster.waitForAllNodes(5000);
+    inPlaceMove = true;
   }
 
   @Test
   public void test() throws Exception {
-    cluster.waitForAllNodes(5000);
-    String coll = "movereplicatest_coll";
+    String coll = getTestClass().getSimpleName() + "_coll_" + inPlaceMove;
     log.info("total_jettys: " + cluster.getJettySolrRunners().size());
     int REPLICATION = 2;
 
@@ -104,6 +113,7 @@ public class MoveReplicaTest extends SolrCloudTestCase {
 
     CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(coll, "conf1", 2, REPLICATION);
     create.setMaxShardsPerNode(2);
+    create.setAutoAddReplicas(false);
     cloudClient.request(create);
 
     addDocs(coll, 100);
@@ -133,8 +143,10 @@ public class MoveReplicaTest extends SolrCloudTestCase {
     int targetNumCores = getNumOfCores(cloudClient, targetNode, coll);
 
     CollectionAdminRequest.MoveReplica moveReplica = createMoveReplicaRequest(coll, replica, targetNode);
-    moveReplica.processAsync("000", cloudClient);
-    CollectionAdminRequest.RequestStatus requestStatus = CollectionAdminRequest.requestStatus("000");
+    moveReplica.setInPlaceMove(inPlaceMove);
+    String asyncId = IdUtils.randomId();
+    moveReplica.processAsync(asyncId, cloudClient);
+    CollectionAdminRequest.RequestStatus requestStatus = CollectionAdminRequest.requestStatus(asyncId);
     // wait for async request success
     boolean success = false;
     for (int i = 0; i < 200; i++) {
@@ -193,6 +205,7 @@ public class MoveReplicaTest extends SolrCloudTestCase {
     assertEquals(watchers, newWatchers);
 
     moveReplica = createMoveReplicaRequest(coll, replica, targetNode, shardId);
+    moveReplica.setInPlaceMove(inPlaceMove);
     moveReplica.process(cloudClient);
     checkNumOfCores(cloudClient, replica.getNodeName(), coll, sourceNumCores);
     // wait for recovery
@@ -232,12 +245,14 @@ public class MoveReplicaTest extends SolrCloudTestCase {
     assertTrue("replica never fully recovered", recovered);
     newWatchers = new HashSet<>(accessor.getStateWatchers(coll));
     assertEquals(watchers, newWatchers);
+
+    assertEquals(100, cluster.getSolrClient().query(coll, new SolrQuery("*:*")).getResults().getNumFound());
   }
 
-  @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-11458")
+ // @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-11458")
   @Test
   public void testFailedMove() throws Exception {
-    String coll = "movereplicatest_failed_coll";
+    String coll = getTestClass().getSimpleName() + "_failed_coll_" + inPlaceMove;
     int REPLICATION = 2;
 
     CloudSolrClient cloudClient = cluster.getSolrClient();
@@ -245,6 +260,7 @@ public class MoveReplicaTest extends SolrCloudTestCase {
     Set<CollectionStateWatcher> watchers = new HashSet<>(accessor.getStateWatchers(coll));
 
     CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(coll, "conf1", 2, REPLICATION);
+    create.setAutoAddReplicas(false);
     cloudClient.request(create);
 
     addDocs(coll, 100);
@@ -262,15 +278,17 @@ public class MoveReplicaTest extends SolrCloudTestCase {
     }
     assertNotNull(targetNode);
     CollectionAdminRequest.MoveReplica moveReplica = createMoveReplicaRequest(coll, replica, targetNode);
+    moveReplica.setInPlaceMove(inPlaceMove);
     // start moving
-    moveReplica.processAsync("001", cloudClient);
+    String asyncId = IdUtils.randomId();
+    moveReplica.processAsync(asyncId, cloudClient);
     // shut down target node
     for (int i = 0; i < cluster.getJettySolrRunners().size(); i++) {
       if (cluster.getJettySolrRunner(i).getNodeName().equals(targetNode)) {
         cluster.stopJettySolrRunner(i);
       }
     }
-    CollectionAdminRequest.RequestStatus requestStatus = CollectionAdminRequest.requestStatus("001");
+    CollectionAdminRequest.RequestStatus requestStatus = CollectionAdminRequest.requestStatus(asyncId);
     // wait for async request success
     boolean success = true;
     for (int i = 0; i < 200; i++) {
@@ -286,6 +304,7 @@ public class MoveReplicaTest extends SolrCloudTestCase {
 
     Set<CollectionStateWatcher> newWatchers = new HashSet<>(accessor.getStateWatchers(coll));
     assertEquals(watchers, newWatchers);
+
     log.info("--- current collection state: " + cloudClient.getZkStateReader().getClusterState().getCollection(coll));
     assertEquals(100, cluster.getSolrClient().query(coll, new SolrQuery("*:*")).getResults().getNumFound());
   }
@@ -375,4 +394,5 @@ public class MoveReplicaTest extends SolrCloudTestCase {
     }
     solrClient.commit(collection);
     Thread.sleep(5000);
-  }}
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/41644bdc/solr/core/src/test/org/apache/solr/cloud/autoscaling/TriggerIntegrationTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/TriggerIntegrationTest.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/TriggerIntegrationTest.java
index fd74c9e..cacf39c 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/TriggerIntegrationTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/TriggerIntegrationTest.java
@@ -119,6 +119,10 @@ public class TriggerIntegrationTest extends SolrCloudTestCase {
 
   @Before
   public void setupTest() throws Exception {
+    // clear any persisted auto scaling configuration
+    Stat stat = zkClient().setData(SOLR_AUTOSCALING_CONF_PATH, Utils.toJSON(new ZkNodeProps()), true);
+    log.info(SOLR_AUTOSCALING_CONF_PATH + " reset, new znode version {}", stat.getVersion());
+
     throttlingDelayMs.set(TimeUnit.SECONDS.toMillis(ScheduledTriggers.DEFAULT_ACTION_THROTTLE_PERIOD_SECONDS));
     waitForSeconds = 1 + random().nextInt(3);
     actionConstructorCalled = new CountDownLatch(1);
@@ -130,9 +134,7 @@ public class TriggerIntegrationTest extends SolrCloudTestCase {
     actionCompleted = new CountDownLatch(1);
     events.clear();
     listenerEvents.clear();
-    // clear any persisted auto scaling configuration
-    Stat stat = zkClient().setData(SOLR_AUTOSCALING_CONF_PATH, Utils.toJSON(new ZkNodeProps()), true);
-    log.info(SOLR_AUTOSCALING_CONF_PATH + " reset, new znode version {}", stat.getVersion());
+    lastActionExecutedAt.set(0);
     // clear any events or markers
     // todo: consider the impact of such cleanup on regular cluster restarts
     deleteChildrenRecursively(ZkStateReader.SOLR_AUTOSCALING_EVENTS_PATH);
@@ -201,7 +203,7 @@ public class TriggerIntegrationTest extends SolrCloudTestCase {
 
     JettySolrRunner newNode = cluster.startJettySolrRunner();
 
-    if (!triggerFiredLatch.await(20, TimeUnit.SECONDS)) {
+    if (!triggerFiredLatch.await(30, TimeUnit.SECONDS)) {
       fail("Both triggers should have fired by now");
     }
 
@@ -249,7 +251,7 @@ public class TriggerIntegrationTest extends SolrCloudTestCase {
       }
     }
 
-    if (!triggerFiredLatch.await(20, TimeUnit.SECONDS)) {
+    if (!triggerFiredLatch.await(30, TimeUnit.SECONDS)) {
       fail("Both triggers should have fired by now");
     }
   }
@@ -272,16 +274,20 @@ public class TriggerIntegrationTest extends SolrCloudTestCase {
         return;
       }
       try {
+        long currentTime = timeSource.getTime();
         if (lastActionExecutedAt.get() != 0)  {
-          log.info("last action at " + lastActionExecutedAt.get() + " time = " + timeSource.getTime() + " expected diff: " + TimeUnit.MILLISECONDS.toNanos(throttlingDelayMs.get() - DELTA_MS));
-          if (timeSource.getTime() - lastActionExecutedAt.get() < TimeUnit.MILLISECONDS.toNanos(throttlingDelayMs.get() - DELTA_MS)) {
+          long minDiff = TimeUnit.MILLISECONDS.toNanos(throttlingDelayMs.get() - DELTA_MS);
+          log.info("last action at " + lastActionExecutedAt.get() + " current time = " + currentTime +
+              "\nreal diff: " + (currentTime - lastActionExecutedAt.get()) +
+              "\n min diff: " + minDiff);
+          if (currentTime - lastActionExecutedAt.get() < minDiff) {
             log.info("action executed again before minimum wait time from {}", event.getSource());
             fail("TriggerListener was fired before the throttling period");
           }
         }
         if (onlyOnce.compareAndSet(false, true)) {
           log.info("action executed from {}", event.getSource());
-          lastActionExecutedAt.set(timeSource.getTime());
+          lastActionExecutedAt.set(currentTime);
           getTriggerFiredLatch().countDown();
         } else  {
           log.info("action executed more than once from {}", event.getSource());
@@ -1225,6 +1231,7 @@ public class TriggerIntegrationTest extends SolrCloudTestCase {
     assertTrue("timestamp delta is less than default cooldown period", ev.timestamp - prevTimestamp > TimeUnit.SECONDS.toNanos(ScheduledTriggers.DEFAULT_COOLDOWN_PERIOD_SECONDS));
     prevTimestamp = ev.timestamp;
 
+    // this also resets the cooldown period
     long modifiedCooldownPeriodSeconds = 7;
     String setPropertiesCommand = "{\n" +
         "\t\"set-properties\" : {\n" +
@@ -1243,13 +1250,24 @@ public class TriggerIntegrationTest extends SolrCloudTestCase {
     JettySolrRunner newNode3 = cluster.startJettySolrRunner();
     await = triggerFiredLatch.await(20, TimeUnit.SECONDS);
     assertTrue("The trigger did not fire at all", await);
+    triggerFiredLatch = new CountDownLatch(1);
+    triggerFired.compareAndSet(true, false);
+    // add another node
+    JettySolrRunner newNode4 = cluster.startJettySolrRunner();
+    await = triggerFiredLatch.await(20, TimeUnit.SECONDS);
+    assertTrue("The trigger did not fire at all", await);
     // wait for listener to capture the SUCCEEDED stage
     Thread.sleep(2000);
 
-    // there must be at least one IGNORED event due to cooldown, and one SUCCEEDED event
+    // there must be at least one SUCCEEDED (due to newNode3) then for newNode4 one IGNORED
+    // event due to cooldown, and one SUCCEEDED
     capturedEvents = listenerEvents.get("bar");
-    assertTrue(capturedEvents.toString(), capturedEvents.size() > 1);
-    for (int i = 0; i < capturedEvents.size() - 1; i++) {
+    assertTrue(capturedEvents.toString(), capturedEvents.size() > 2);
+    // first event should be SUCCEEDED
+    ev = capturedEvents.get(0);
+    assertEquals(ev.toString(), TriggerEventProcessorStage.SUCCEEDED, ev.stage);
+
+    for (int i = 1; i < capturedEvents.size() - 1; i++) {
       ev = capturedEvents.get(i);
       assertEquals(ev.toString(), TriggerEventProcessorStage.IGNORED, ev.stage);
       assertTrue(ev.toString(), ev.message.contains("cooldown"));

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/41644bdc/solr/solrj/src/java/org/apache/solr/client/solrj/request/CollectionAdminRequest.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/request/CollectionAdminRequest.java b/solr/solrj/src/java/org/apache/solr/client/solrj/request/CollectionAdminRequest.java
index 08811bd..edc5a8b 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/request/CollectionAdminRequest.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/request/CollectionAdminRequest.java
@@ -617,6 +617,8 @@ public abstract class CollectionAdminRequest<T extends CollectionAdminResponse>
     protected String collection, replica, targetNode;
     protected String shard, sourceNode;
     protected boolean randomlyMoveReplica;
+    protected boolean inPlaceMove = true;
+    protected int timeout = -1;
 
     public MoveReplica(String collection, String replica, String targetNode) {
       super(CollectionAction.MOVEREPLICA);
@@ -635,11 +637,23 @@ public abstract class CollectionAdminRequest<T extends CollectionAdminResponse>
       this.randomlyMoveReplica = true;
     }
 
+    public void setInPlaceMove(boolean inPlaceMove) {
+      this.inPlaceMove = inPlaceMove;
+    }
+
+    public void setTimeout(int timeout) {
+      this.timeout = timeout;
+    }
+
     @Override
     public SolrParams getParams() {
       ModifiableSolrParams params = (ModifiableSolrParams) super.getParams();
       params.set("collection", collection);
       params.set(CollectionParams.TARGET_NODE, targetNode);
+      params.set(CommonAdminParams.IN_PLACE_MOVE, inPlaceMove);
+      if (timeout != -1) {
+        params.set(CommonAdminParams.TIMEOUT, timeout);
+      }
       if (randomlyMoveReplica) {
         params.set("shard", shard);
         params.set(CollectionParams.SOURCE_NODE, sourceNode);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/41644bdc/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java
index e778151..4c2be1a 100644
--- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java
+++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java
@@ -89,6 +89,7 @@ public class ZkStateReader implements Closeable {
   public static final String SHARD_PARENT_PROP = "shard_parent";
   public static final String NUM_SHARDS_PROP = "numShards";
   public static final String LEADER_PROP = "leader";
+  public static final String SHARED_STORAGE_PROP = "shared_storage";
   public static final String PROPERTY_PROP = "property";
   public static final String PROPERTY_PROP_PREFIX = "property.";
   public static final String PROPERTY_VALUE_PROP = "property.value";

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/41644bdc/solr/solrj/src/java/org/apache/solr/common/params/CommonAdminParams.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/params/CommonAdminParams.java b/solr/solrj/src/java/org/apache/solr/common/params/CommonAdminParams.java
index f20afa7..c39b4a8 100644
--- a/solr/solrj/src/java/org/apache/solr/common/params/CommonAdminParams.java
+++ b/solr/solrj/src/java/org/apache/solr/common/params/CommonAdminParams.java
@@ -19,7 +19,12 @@ package org.apache.solr.common.params;
 public interface CommonAdminParams
 {
 
-  /** async or not? **/
+  /** Async or not? **/
   String ASYNC = "async";
+  /** Wait for final state of the operation. */
   String WAIT_FOR_FINAL_STATE = "waitForFinalState";
+  /** Allow in-place move of replicas that use shared filesystems. */
+  String IN_PLACE_MOVE = "inPlaceMove";
+  /** Timeout for replicas to become active. */
+  String TIMEOUT = "timeout";
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/41644bdc/solr/solrj/src/resources/apispec/collections.collection.Commands.json
----------------------------------------------------------------------
diff --git a/solr/solrj/src/resources/apispec/collections.collection.Commands.json b/solr/solrj/src/resources/apispec/collections.collection.Commands.json
index dfae2f2..0cd3644 100644
--- a/solr/solrj/src/resources/apispec/collections.collection.Commands.json
+++ b/solr/solrj/src/resources/apispec/collections.collection.Commands.json
@@ -20,7 +20,7 @@
     "move-replica": {
       "type": "object",
       "documentation": "https://lucene.apache.org/solr/guide/collections-api.html#movereplica",
-      "description": "This command moves a replica from one node to a new node. In case of shared filesystems the `dataDir` will be reused.",
+      "description": "This command moves a replica from one node to a new node. In case of shared filesystems the `dataDir` and `ulogDir` may be reused.",
       "properties": {
         "replica": {
           "type": "string",
@@ -32,13 +32,29 @@
         },
         "sourceNode": {
           "type": "string",
-          "description": "The name of the node that contains the replica"
+          "description": "The name of the node that contains the replica."
         },
         "targetNode": {
           "type": "string",
-          "description": "The name of the destination node. This parameter is required"
+          "description": "The name of the destination node. This parameter is required."
+        },
+        "waitForFinalState": {
+          "type": "boolean",
+          "default": "false",
+          "description": "Wait for the moved replica to become active."
+        },
+        "timeout": {
+          "type": "integer",
+          "default": 600,
+          "description": "Timeout to wait for replica to become active. For very large replicas this may need to be increased."
+        },
+        "inPlaceMove": {
+          "type": "boolean",
+          "default": "true",
+          "description": "For replicas that use shared filesystems allow 'in-place' move that reuses shared data."
         }
-      }
+      },
+      "required":["targetNode"]
     },
     "migrate-docs":{
       "type":"object",