You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2019/04/29 22:19:35 UTC

[lucene-solr] branch master updated: SOLR-13427: Support simulating the execution of autoscaling suggestions.

This is an automated email from the ASF dual-hosted git repository.

ab pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/master by this push:
     new 6eccf2b  SOLR-13427: Support simulating the execution of autoscaling suggestions.
6eccf2b is described below

commit 6eccf2bf53899e0b1f47c8bb67cc5bca82966cb4
Author: Andrzej Bialecki <ab...@apache.org>
AuthorDate: Mon Apr 29 23:14:38 2019 +0200

    SOLR-13427: Support simulating the execution of autoscaling suggestions.
---
 solr/CHANGES.txt                                   |   2 +
 solr/bin/solr                                      |   2 +-
 .../src/java/org/apache/solr/cloud/CloudUtil.java  | 140 +++++++
 .../solr/cloud/autoscaling/IndexSizeTrigger.java   |   4 +-
 .../solr/cloud/autoscaling/sim/ActionError.java    |   0
 .../autoscaling/sim/GenericDistributedQueue.java   |   0
 .../sim/GenericDistributedQueueFactory.java        |   0
 .../solr/cloud/autoscaling/sim/LiveNodesSet.java   |   0
 .../cloud/autoscaling/sim/SimCloudManager.java     | 120 ++++--
 .../autoscaling/sim/SimClusterStateProvider.java   |  72 +++-
 .../autoscaling/sim/SimDistribStateManager.java    |  30 +-
 .../sim/SimDistributedQueueFactory.java            |   0
 .../autoscaling/sim/SimNodeStateProvider.java      |   0
 .../solr/handler/admin/MetricsHistoryHandler.java  |   3 +-
 .../apache/solr/util/MockSearchableSolrClient.java |   4 +-
 .../src/java/org/apache/solr/util/SolrCLI.java     | 403 +++++++++++++--------
 .../test/org/apache/solr/cloud/CloudTestUtils.java | 145 --------
 .../solr/cloud/MetricsHistoryIntegrationTest.java  |   4 +-
 .../OverseerCollectionConfigSetProcessorTest.java  |   5 +-
 .../apache/solr/cloud/ReindexCollectionTest.java   |  16 +-
 .../cloud/autoscaling/IndexSizeTriggerTest.java    |  32 +-
 .../ScheduledMaintenanceTriggerTest.java           |  12 +-
 .../SearchRateTriggerIntegrationTest.java          |  33 +-
 .../cloud/autoscaling/SearchRateTriggerTest.java   |   8 +-
 .../sim/TestSimClusterStateProvider.java           |  40 +-
 .../autoscaling/sim/TestSimComputePlanAction.java  |  12 +-
 .../autoscaling/sim/TestSimExecutePlanAction.java  |  18 +-
 .../autoscaling/sim/TestSimExtremeIndexing.java    |   8 +-
 .../cloud/autoscaling/sim/TestSimLargeCluster.java |  37 +-
 .../cloud/autoscaling/sim/TestSimPolicyCloud.java  |  38 +-
 .../autoscaling/sim/TestSimTriggerIntegration.java |   5 +-
 .../handler/admin/MetricsHistoryHandlerTest.java   |   6 +-
 .../solrcloud-autoscaling-policy-preferences.adoc  |  27 ++
 .../client/solrj/cloud/DistribStateManager.java    |   2 +-
 .../solrj/cloud/autoscaling/VersionedData.java     |  11 +-
 .../client/solrj/impl/ZkDistribStateManager.java   |   4 +-
 36 files changed, 773 insertions(+), 470 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index c1be844..64be490 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -139,6 +139,8 @@ New Features
 
 * SOLR-13391: Add variance and standard deviation stream evaluators (Nazerke Seidan, Joel Bernstein)
 
+* SOLR-13427: Support simulating the execution of autoscaling suggestions. (ab)
+
 Bug Fixes
 ----------------------
 
diff --git a/solr/bin/solr b/solr/bin/solr
index 1d01ded..9b435f3 100755
--- a/solr/bin/solr
+++ b/solr/bin/solr
@@ -706,7 +706,7 @@ function run_tool() {
 
   "$JAVA" $SOLR_SSL_OPTS $AUTHC_OPTS $SOLR_ZK_CREDS_AND_ACLS -Dsolr.install.dir="$SOLR_TIP" \
     -Dlog4j.configurationFile="file:$DEFAULT_SERVER_DIR/resources/log4j2-console.xml" \
-    -classpath "$DEFAULT_SERVER_DIR/solr-webapp/webapp/WEB-INF/lib/*:$DEFAULT_SERVER_DIR/lib/ext/*" \
+    -classpath "$DEFAULT_SERVER_DIR/solr-webapp/webapp/WEB-INF/lib/*:$DEFAULT_SERVER_DIR/lib/ext/*:$DEFAULT_SERVER_DIR/lib/*" \
     org.apache.solr.util.SolrCLI "$@"
 
   return $?
diff --git a/solr/core/src/java/org/apache/solr/cloud/CloudUtil.java b/solr/core/src/java/org/apache/solr/cloud/CloudUtil.java
index 302703b..6a62be91 100644
--- a/solr/core/src/java/org/apache/solr/cloud/CloudUtil.java
+++ b/solr/core/src/java/org/apache/solr/cloud/CloudUtil.java
@@ -19,15 +19,23 @@ package org.apache.solr.cloud;
 import java.io.File;
 import java.io.IOException;
 import java.lang.invoke.MethodHandles;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+import java.util.concurrent.atomic.AtomicReference;
 
 import org.apache.commons.io.FileUtils;
+import org.apache.solr.client.solrj.cloud.SolrCloudManager;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrException.ErrorCode;
 import org.apache.solr.common.cloud.ClusterState;
+import org.apache.solr.common.cloud.CollectionStatePredicate;
 import org.apache.solr.common.cloud.DocCollection;
 import org.apache.solr.common.cloud.Replica;
 import org.apache.solr.common.cloud.Slice;
@@ -36,6 +44,7 @@ import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.solr.core.CoreContainer;
 import org.apache.solr.core.CoreDescriptor;
 import org.apache.solr.core.SolrResourceLoader;
+import org.apache.solr.util.TimeOut;
 import org.apache.zookeeper.KeeperException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -44,6 +53,7 @@ import org.slf4j.LoggerFactory;
 public class CloudUtil {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
+  public static final int DEFAULT_TIMEOUT = 90;
 
   /**
    * See if coreNodeName has been taken over by another baseUrl and unload core
@@ -142,4 +152,134 @@ public class CloudUtil {
 
   }
 
+  /**
+   * Wait for a particular collection state to appear.
+   *
+   * This is a convenience method using the {@link #DEFAULT_TIMEOUT}
+   *
+   * @param cloudManager current instance of {@link SolrCloudManager}
+   * @param message     a message to report on failure
+   * @param collection  the collection to watch
+   * @param predicate   a predicate to match against the collection state
+   */
+  public static long waitForState(final SolrCloudManager cloudManager,
+                                  final String message,
+                                  final String collection,
+                                  final CollectionStatePredicate predicate) {
+    AtomicReference<DocCollection> state = new AtomicReference<>();
+    AtomicReference<Set<String>> liveNodesLastSeen = new AtomicReference<>();
+    try {
+      return waitForState(cloudManager, collection, DEFAULT_TIMEOUT, TimeUnit.SECONDS, (n, c) -> {
+        state.set(c);
+        liveNodesLastSeen.set(n);
+        return predicate.matches(n, c);
+      });
+    } catch (Exception e) {
+      throw new AssertionError(message + "\n" + "Live Nodes: " + liveNodesLastSeen.get() + "\nLast available state: " + state.get(), e);
+    }
+  }
+
+  /**
+   * Wait for a particular collection state to appear.
+   *
+   * This is a convenience method using the {@link #DEFAULT_TIMEOUT}
+   *
+   * @param cloudManager current instance of {@link SolrCloudManager}
+   * @param collection  the collection to watch
+   * @param wait timeout value
+   * @param unit timeout unit
+   * @param predicate   a predicate to match against the collection state
+   */
+  public static long waitForState(final SolrCloudManager cloudManager,
+                                  final String collection,
+                                  long wait,
+                                  final TimeUnit unit,
+                                  final CollectionStatePredicate predicate) throws InterruptedException, TimeoutException, IOException {
+    TimeOut timeout = new TimeOut(wait, unit, cloudManager.getTimeSource());
+    long timeWarn = timeout.timeLeft(TimeUnit.MILLISECONDS) / 4;
+    ClusterState state = null;
+    DocCollection coll = null;
+    while (!timeout.hasTimedOut()) {
+      state = cloudManager.getClusterStateProvider().getClusterState();
+      coll = state.getCollectionOrNull(collection);
+      // due to the way we manage collections in SimClusterStateProvider a null here
+      // can mean that a collection is still being created but has no replicas
+      if (coll == null) { // does not yet exist?
+        timeout.sleep(100);
+        continue;
+      }
+      if (predicate.matches(state.getLiveNodes(), coll)) {
+        log.trace("-- predicate matched with state {}", state);
+        return timeout.timeElapsed(TimeUnit.MILLISECONDS);
+      }
+      timeout.sleep(100);
+      if (timeout.timeLeft(TimeUnit.MILLISECONDS) < timeWarn) {
+        log.trace("-- still not matching predicate: {}", state);
+      }
+    }
+    throw new TimeoutException("last ClusterState: " + state + ", last coll state: " + coll);
+  }
+
+  /**
+   * Return a {@link CollectionStatePredicate} that returns true if a collection has the expected
+   * number of active shards and replicas
+   * @param expectedShards expected number of active shards
+   * @param expectedReplicas expected number of active replicas
+   */
+  public static CollectionStatePredicate clusterShape(int expectedShards, int expectedReplicas) {
+    return clusterShape(expectedShards, expectedReplicas, false, false);
+  }
+
+  /**
+   * Return a {@link CollectionStatePredicate} that returns true if a collection has the expected
+   * number of shards and replicas.
+   * <p>Note: for shards marked as inactive the current Solr behavior is that replicas remain active.
+   * {@link org.apache.solr.cloud.autoscaling.sim.SimCloudManager} follows this behavior.</p>
+   * @param expectedShards expected number of shards
+   * @param expectedReplicas expected number of active replicas
+   * @param withInactive if true then count also inactive shards
+   * @param requireLeaders if true then require that each shard has a leader
+   */
+  public static CollectionStatePredicate clusterShape(int expectedShards, int expectedReplicas, boolean withInactive,
+                                                      boolean requireLeaders) {
+    return (liveNodes, collectionState) -> {
+      if (collectionState == null) {
+        log.info("-- null collection");
+        return false;
+      }
+      Collection<Slice> slices = withInactive ? collectionState.getSlices() : collectionState.getActiveSlices();
+      if (slices.size() != expectedShards) {
+        log.info("-- wrong number of slices for collection {}, expected={}, found={}: {}", collectionState.getName(), expectedShards, collectionState.getSlices().size(), collectionState.getSlices());
+        return false;
+      }
+      Set<String> leaderless = new HashSet<>();
+      for (Slice slice : slices) {
+        int activeReplicas = 0;
+        if (requireLeaders && slice.getState() != Slice.State.INACTIVE && slice.getLeader() == null) {
+          leaderless.add(slice.getName());
+          continue;
+        }
+        // skip other checks, we're going to fail anyway
+        if (!leaderless.isEmpty()) {
+          continue;
+        }
+        for (Replica replica : slice) {
+          if (replica.isActive(liveNodes))
+            activeReplicas++;
+        }
+        if (activeReplicas != expectedReplicas) {
+          log.info("-- wrong number of active replicas for collection {} in slice {}, expected={}, found={}", collectionState.getName(), slice.getName(), expectedReplicas, activeReplicas);
+          return false;
+        }
+      }
+      if (leaderless.isEmpty()) {
+        return true;
+      } else {
+        log.info("-- shards without leaders: {}", leaderless);
+        return false;
+      }
+    };
+  }
+
+
 }
diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/IndexSizeTrigger.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/IndexSizeTrigger.java
index 7483501..76edd44 100644
--- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/IndexSizeTrigger.java
+++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/IndexSizeTrigger.java
@@ -51,6 +51,8 @@ import org.apache.solr.update.SolrIndexSplitter;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import static org.apache.solr.client.solrj.cloud.autoscaling.Variable.Type.CORE_IDX;
+
 /**
  *
  */
@@ -284,7 +286,7 @@ public class IndexSizeTrigger extends TriggerBase {
               replicaName = info.getName(); // which is actually coreNode name...
             }
             String registry = SolrCoreMetricManager.createRegistryName(true, coll, sh, replicaName, null);
-            String tag = "metrics:" + registry + ":INDEX.sizeInBytes";
+            String tag = "metrics:" + registry + ":" + CORE_IDX.metricsAttribute;
             metricTags.put(tag, info);
             tag = "metrics:" + registry + ":SEARCHER.searcher.numDocs";
             metricTags.put(tag, info);
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/ActionError.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/ActionError.java
similarity index 100%
rename from solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/ActionError.java
rename to solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/ActionError.java
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/GenericDistributedQueue.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/GenericDistributedQueue.java
similarity index 100%
rename from solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/GenericDistributedQueue.java
rename to solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/GenericDistributedQueue.java
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/GenericDistributedQueueFactory.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/GenericDistributedQueueFactory.java
similarity index 100%
rename from solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/GenericDistributedQueueFactory.java
rename to solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/GenericDistributedQueueFactory.java
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/LiveNodesSet.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/LiveNodesSet.java
similarity index 100%
rename from solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/LiveNodesSet.java
rename to solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/LiveNodesSet.java
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java
similarity index 89%
rename from solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java
rename to solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java
index 7ce4534..eb5027b 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java
+++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java
@@ -22,6 +22,7 @@ import java.io.File;
 import java.io.IOException;
 import java.lang.invoke.MethodHandles;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
@@ -39,7 +40,6 @@ import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicLong;
 
-import com.carrotsearch.randomizedtesting.RandomizedContext;
 import com.codahale.metrics.jvm.ClassLoadingGaugeSet;
 import com.codahale.metrics.jvm.GarbageCollectorMetricSet;
 import com.codahale.metrics.jvm.MemoryUsageGaugeSet;
@@ -57,9 +57,11 @@ import org.apache.solr.client.solrj.cloud.autoscaling.Variable;
 import org.apache.solr.client.solrj.impl.ClusterStateProvider;
 import org.apache.solr.client.solrj.request.AbstractUpdateRequest;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.client.solrj.request.CollectionApiMapping;
 import org.apache.solr.client.solrj.request.QueryRequest;
 import org.apache.solr.client.solrj.request.RequestWriter;
 import org.apache.solr.client.solrj.request.UpdateRequest;
+import org.apache.solr.client.solrj.request.V2Request;
 import org.apache.solr.client.solrj.response.RequestStatusState;
 import org.apache.solr.client.solrj.response.SolrResponseBase;
 import org.apache.solr.client.solrj.response.UpdateResponse;
@@ -111,6 +113,17 @@ import static org.apache.solr.cloud.api.collections.OverseerCollectionMessageHan
 public class SimCloudManager implements SolrCloudManager {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
+  private static final Random random;
+
+  static {
+    String seed = System.getProperty("tests.seed");
+    if (seed == null) {
+      random = new Random();
+    } else {
+      random = new Random(seed.hashCode());
+    }
+  }
+
   private final SimDistribStateManager stateManager;
   private final SimClusterStateProvider clusterStateProvider;
   private final SimNodeStateProvider nodeStateProvider;
@@ -125,7 +138,7 @@ public class SimCloudManager implements SolrCloudManager {
   private final Map<String, Map<String, AtomicInteger>> eventCounts = new ConcurrentHashMap<>();
   private final MockSearchableSolrClient solrClient;
   private final Map<String, AtomicLong> opCounts = new ConcurrentSkipListMap<>();
-  /** 
+  /**
    * @see #submit
    * @see #getBackgroundTaskFailureCount
    * @see LoggingCallable
@@ -283,6 +296,25 @@ public class SimCloudManager implements SolrCloudManager {
     return cloudManager;
   }
 
+  public static SimCloudManager createCluster(SolrCloudManager other, TimeSource timeSource) throws Exception {
+    SimCloudManager cloudManager = new SimCloudManager(timeSource);
+    cloudManager.getSimClusterStateProvider().copyFrom(other.getClusterStateProvider());
+    List<String> replicaTags = Arrays.asList(
+        Variable.Type.CORE_IDX.metricsAttribute,
+        "QUERY./select.requests",
+        "UPDATE./update.requests"
+    );
+    Set<String> nodeTags = createNodeValues("unused:1234_solr").keySet();
+    for (String node : other.getClusterStateProvider().getLiveNodes()) {
+      SimClusterStateProvider simClusterStateProvider = cloudManager.getSimClusterStateProvider();
+      cloudManager.getSimNodeStateProvider().simSetNodeValues(node, other.getNodeStateProvider().getNodeValues(node, nodeTags));
+      Map<String, Map<String, List<ReplicaInfo>>> infos = other.getNodeStateProvider().getReplicaInfo(node, replicaTags);
+      simClusterStateProvider.simSetReplicaValues(node, infos, true);
+    }
+    cloudManager.getSimDistribStateManager().copyFrom(other.getDistribStateManager(), false);
+    return cloudManager;
+  }
+
   /**
    * Create simulated node values (metrics) for a node.
    * @param nodeName node name (eg. '127.0.0.1:10000_solr'). If null then a new node name will be
@@ -415,7 +447,7 @@ public class SimCloudManager implements SolrCloudManager {
    * Get the source of randomness (usually initialized by the test suite).
    */
   public Random getRandom() {
-    return RandomizedContext.current().getRandom();
+    return random;
   }
 
   /**
@@ -689,6 +721,13 @@ public class SimCloudManager implements SolrCloudManager {
     count.incrementAndGet();
   }
 
+  private static final Map<String, String> v2v1Mapping = new HashMap<>();
+  static {
+    for (CollectionApiMapping.Meta meta : CollectionApiMapping.Meta.values()) {
+      if (meta.action != null) v2v1Mapping.put(meta.commandName, meta.action.toLower());
+    }
+  }
+
   /**
    * Handler method for autoscaling requests. NOTE: only a specific subset of autoscaling requests is
    * supported!
@@ -700,7 +739,7 @@ public class SimCloudManager implements SolrCloudManager {
     timeSource.sleep(5);
 
     log.trace("--- got SolrRequest: " + req.getMethod() + " " + req.getPath() +
-        (req.getParams() != null ? " " + req.getParams().toQueryString() : ""));
+        (req.getParams() != null ? " " + req.getParams() : ""));
     if (req.getPath() != null) {
       if (req.getPath().startsWith("/admin/autoscaling") ||
           req.getPath().startsWith("/cluster/autoscaling") ||
@@ -789,27 +828,56 @@ public class SimCloudManager implements SolrCloudManager {
       }
     }
     // support only a specific subset of collection admin ops
-    if (!(req instanceof CollectionAdminRequest)) {
-      throw new UnsupportedOperationException("Only some CollectionAdminRequest-s are supported: " + req.getClass().getName());
-    }
-    metricManager.registry("solr.node").counter("ADMIN." + req.getPath() + ".requests").inc();
     SolrParams params = req.getParams();
-    String a = params.get(CoreAdminParams.ACTION);
+    String a = params != null ? params.get(CoreAdminParams.ACTION) : null;
     SolrResponse rsp = new SolrResponseBase();
     rsp.setResponse(new NamedList<>());
+    if (!(req instanceof CollectionAdminRequest)) {
+      // maybe a V2Request?
+      if (req instanceof V2Request) {
+        Map<String, Object> reqMap = new HashMap<>();
+        ((V2Request)req).toMap(reqMap);
+        String path = (String)reqMap.get("path");
+        if (!path.startsWith("/c/") || path.length() < 4) {
+          throw new UnsupportedOperationException("Unsupported V2 request path: " + reqMap);
+        }
+        Map<String, Object> cmd = (Map<String, Object>)reqMap.get("command");
+        if (cmd.size() != 1) {
+          throw new UnsupportedOperationException("Unsupported multi-command V2 request: " + reqMap);
+        }
+        a = cmd.keySet().iterator().next();
+        params = new ModifiableSolrParams();
+        ((ModifiableSolrParams)params).add(CollectionAdminParams.COLLECTION, path.substring(3));
+        if (req.getParams() != null) {
+          ((ModifiableSolrParams)params).add(req.getParams());
+        }
+        Map<String, Object> reqParams = (Map<String, Object>)cmd.get(a);
+        for (Map.Entry<String, Object> e : reqParams.entrySet()) {
+          ((ModifiableSolrParams)params).add(e.getKey(), e.getValue().toString());
+        }
+        // re-map from v2 to v1 action
+        a = v2v1Mapping.get(a);
+        if (a == null) {
+          throw new UnsupportedOperationException("Unsupported V2 request: " + reqMap);
+        }
+      } else {
+        throw new UnsupportedOperationException("Only some CollectionAdminRequest-s are supported: " + req.getClass().getName() + ": " + req.getPath() + " " + req.getParams());
+      }
+    }
+    metricManager.registry("solr.node").counter("ADMIN." + req.getPath() + ".requests").inc();
     if (a != null) {
       CollectionParams.CollectionAction action = CollectionParams.CollectionAction.get(a);
       if (action == null) {
         throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown action: " + a);
       }
-      log.trace("Invoking Collection Action :{} with params {}", action.toLower(), req.getParams().toQueryString());
+      log.trace("Invoking Collection Action :{} with params {}", action.toLower(), params.toQueryString());
       NamedList results = new NamedList();
       rsp.setResponse(results);
       incrementCount(action.name());
       switch (action) {
         case REQUESTSTATUS:
           // we complete all async ops immediately
-          String requestId = req.getParams().get(REQUESTID);
+          String requestId = params.get(REQUESTID);
           SimpleOrderedMap<String> status = new SimpleOrderedMap<>();
           status.add("state", RequestStatusState.COMPLETED.getKey());
           status.add("msg", "found [" + requestId + "] in completed tasks");
@@ -820,21 +888,21 @@ public class SimCloudManager implements SolrCloudManager {
           rsp.setResponse(results);
           break;
         case DELETESTATUS:
-          requestId = req.getParams().get(REQUESTID);
+          requestId = params.get(REQUESTID);
           results.add("status", "successfully removed stored response for [" + requestId + "]");
           results.add("success", "");
           break;
         case CREATE:
           try {
-            clusterStateProvider.simCreateCollection(new ZkNodeProps(req.getParams().toNamedList().asMap(10)), results);
+            clusterStateProvider.simCreateCollection(new ZkNodeProps(params.toNamedList().asMap(10)), results);
           } catch (Exception e) {
             throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
           }
           break;
         case DELETE:
           try {
-            clusterStateProvider.simDeleteCollection(req.getParams().get(CommonParams.NAME),
-                req.getParams().get(CommonAdminParams.ASYNC), results);
+            clusterStateProvider.simDeleteCollection(params.get(CommonParams.NAME),
+                params.get(CommonAdminParams.ASYNC), results);
           } catch (Exception e) {
             throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
           }
@@ -844,21 +912,21 @@ public class SimCloudManager implements SolrCloudManager {
           break;
         case ADDREPLICA:
           try {
-            clusterStateProvider.simAddReplica(new ZkNodeProps(req.getParams().toNamedList().asMap(10)), results);
+            clusterStateProvider.simAddReplica(new ZkNodeProps(params.toNamedList().asMap(10)), results);
           } catch (Exception e) {
             throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
           }
           break;
         case MOVEREPLICA:
           try {
-            clusterStateProvider.simMoveReplica(new ZkNodeProps(req.getParams().toNamedList().asMap(10)), results);
+            clusterStateProvider.simMoveReplica(new ZkNodeProps(params.toNamedList().asMap(10)), results);
           } catch (Exception e) {
             throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
           }
           break;
         case OVERSEERSTATUS:
-          if (req.getParams().get(CommonAdminParams.ASYNC) != null) {
-            results.add(REQUESTID, req.getParams().get(CommonAdminParams.ASYNC));
+          if (params.get(CommonAdminParams.ASYNC) != null) {
+            results.add(REQUESTID, params.get(CommonAdminParams.ASYNC));
           }
           if (!liveNodesSet.get().isEmpty()) {
             results.add("leader", liveNodesSet.get().iterator().next());
@@ -869,34 +937,34 @@ public class SimCloudManager implements SolrCloudManager {
           results.add("success", "");
           break;
         case ADDROLE:
-          nodeStateProvider.simSetNodeValue(req.getParams().get("node"), "nodeRole", req.getParams().get("role"));
+          nodeStateProvider.simSetNodeValue(params.get("node"), "nodeRole", params.get("role"));
           break;
         case CREATESHARD:
           try {
-            clusterStateProvider.simCreateShard(new ZkNodeProps(req.getParams().toNamedList().asMap(10)), results);
+            clusterStateProvider.simCreateShard(new ZkNodeProps(params.toNamedList().asMap(10)), results);
           } catch (Exception e) {
             throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
           }
           break;
         case SPLITSHARD:
           try {
-            clusterStateProvider.simSplitShard(new ZkNodeProps(req.getParams().toNamedList().asMap(10)), results);
+            clusterStateProvider.simSplitShard(new ZkNodeProps(params.toNamedList().asMap(10)), results);
           } catch (Exception e) {
             throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
           }
           break;
         case DELETESHARD:
           try {
-            clusterStateProvider.simDeleteShard(new ZkNodeProps(req.getParams().toNamedList().asMap(10)), results);
+            clusterStateProvider.simDeleteShard(new ZkNodeProps(params.toNamedList().asMap(10)), results);
           } catch (Exception e) {
             throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
           }
           break;
         default:
-          throw new UnsupportedOperationException("Unsupported collection admin action=" + action + " in request: " + req.getParams());
+          throw new UnsupportedOperationException("Unsupported collection admin action=" + action + " in request: " + params);
       }
     } else {
-      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "action is a required param in request: " + req.getParams());
+      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "action is a required param in request: " + params);
     }
     return rsp;
 
@@ -970,7 +1038,7 @@ public class SimCloudManager implements SolrCloudManager {
         // be forgiving of errors that occured as a result of interuption, even if
         // the inner Callable didn't realize it...
         if (Thread.currentThread().isInterrupted()) {
-          log.warn("Callable interupted w/o noticing", t);
+          log.warn("Callable interrupted w/o noticing", t);
           throw t;
         }
         Throwable cause = t;
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java
similarity index 96%
rename from solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java
rename to solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java
index ef20c87..0b03ecf 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java
+++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java
@@ -61,7 +61,7 @@ import org.apache.solr.client.solrj.request.UpdateRequest;
 import org.apache.solr.client.solrj.response.QueryResponse;
 import org.apache.solr.client.solrj.response.UpdateResponse;
 import org.apache.solr.cloud.ActionThrottle;
-import org.apache.solr.cloud.CloudTestUtils;
+import org.apache.solr.cloud.CloudUtil;
 import org.apache.solr.cloud.Overseer;
 import org.apache.solr.cloud.api.collections.AddReplicaCmd;
 import org.apache.solr.cloud.api.collections.Assign;
@@ -94,7 +94,6 @@ import org.apache.solr.common.util.Utils;
 import org.apache.solr.core.SolrInfoBean;
 import org.apache.solr.metrics.SolrMetricManager;
 import org.apache.zookeeper.CreateMode;
-import org.junit.Assert;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -185,6 +184,13 @@ public class SimClusterStateProvider implements ClusterStateProvider {
 
   // ============== SIMULATOR SETUP METHODS ====================
 
+  public void copyFrom(ClusterStateProvider other) throws Exception {
+    ClusterState state = other.getClusterState();
+    simSetClusterState(state);
+    clusterProperties.clear();
+    clusterProperties.putAll(other.getClusterProperties());
+  }
+
   /**
    * Initialize from an existing cluster state
    * @param initialState initial cluster state
@@ -561,11 +567,13 @@ public class SimClusterStateProvider implements ClusterStateProvider {
         cores = 0;
       }
       cloudManager.getSimNodeStateProvider().simSetNodeValue(nodeId, ImplicitSnitch.CORES, cores + 1);
-      Integer disk = (Integer)values.get(ImplicitSnitch.DISK);
+      Number disk = (Number)values.get(ImplicitSnitch.DISK);
       if (disk == null) {
         disk = SimCloudManager.DEFAULT_FREE_DISK;
       }
-      cloudManager.getSimNodeStateProvider().simSetNodeValue(nodeId, ImplicitSnitch.DISK, disk - 1);
+      long replicaSize = ((Number)replicaInfo.getVariable(Type.CORE_IDX.metricsAttribute)).longValue();
+      Number replicaSizeGB = (Number)Type.CORE_IDX.convertVal(replicaSize);
+      cloudManager.getSimNodeStateProvider().simSetNodeValue(nodeId, ImplicitSnitch.DISK, disk.doubleValue() - replicaSizeGB.doubleValue());
       // fake metrics
       String registry = SolrMetricManager.getRegistryName(SolrInfoBean.Group.core, replicaInfo.getCollection(),
           replicaInfo.getShard(),
@@ -573,8 +581,7 @@ public class SimClusterStateProvider implements ClusterStateProvider {
       cloudManager.getMetricManager().registry(registry).counter("UPDATE./update.requests");
       cloudManager.getMetricManager().registry(registry).counter("QUERY./select.requests");
       cloudManager.getMetricManager().registerGauge(null, registry,
-          () -> ((Number)replicaInfo.getVariable(Type.CORE_IDX.metricsAttribute)).longValue(),
-          "", true, "INDEX.sizeInBytes");
+          () -> replicaSize, "", true, Type.CORE_IDX.metricsAttribute);
       // at this point nuke our cached DocCollection state
       collectionsStatesRef.set(null);
       log.trace("-- simAddReplica {}", replicaInfo);
@@ -616,11 +623,16 @@ public class SimClusterStateProvider implements ClusterStateProvider {
                 throw new Exception("Unexpected value of 'cores' (" + cores + ") on node: " + nodeId);
               }
               cloudManager.getSimNodeStateProvider().simSetNodeValue(nodeId, ImplicitSnitch.CORES, cores - 1);
-              Integer disk = (Integer)cloudManager.getSimNodeStateProvider().simGetNodeValue(nodeId, ImplicitSnitch.DISK);
-              if (disk == null || disk == 0) {
+              Number disk = (Number)cloudManager.getSimNodeStateProvider().simGetNodeValue(nodeId, ImplicitSnitch.DISK);
+              if (disk == null || disk.doubleValue() == 0.0) {
                 throw new Exception("Unexpected value of 'freedisk' (" + disk + ") on node: " + nodeId);
               }
-              cloudManager.getSimNodeStateProvider().simSetNodeValue(nodeId, ImplicitSnitch.DISK, disk + 1);
+              if (ri.getVariable(Type.CORE_IDX.metricsAttribute) == null) {
+                throw new RuntimeException("Missing replica size: " + ri);
+              }
+              long replicaSize = ((Number)ri.getVariable(Type.CORE_IDX.metricsAttribute)).longValue();
+              Number replicaSizeGB = (Number)Type.CORE_IDX.convertVal(replicaSize);
+              cloudManager.getSimNodeStateProvider().simSetNodeValue(nodeId, ImplicitSnitch.DISK, disk.doubleValue() + replicaSizeGB.doubleValue());
             }
             log.trace("-- simRemoveReplica {}", ri);
             simRunLeaderElection(ri.getCollection(), ri.getShard(), true);
@@ -645,7 +657,7 @@ public class SimClusterStateProvider implements ClusterStateProvider {
     try {
       VersionedData oldData = stateManager.getData(ZkStateReader.CLUSTER_STATE);
       int version = oldData != null ? oldData.getVersion() : 0;
-      Assert.assertEquals(clusterStateVersion, version);
+      assert clusterStateVersion == version : "local clusterStateVersion out of sync";
       stateManager.setData(ZkStateReader.CLUSTER_STATE, data, version);
       log.debug("** saved cluster state version " + (version));
       clusterStateVersion++;
@@ -919,7 +931,9 @@ public class SimClusterStateProvider implements ClusterStateProvider {
     int numNrtReplicas = props.getInt(NRT_REPLICAS, props.getInt(REPLICATION_FACTOR, numTlogReplicas>0?0:1));
     int numPullReplicas = props.getInt(PULL_REPLICAS, 0);
     int totalReplicas = shardNames.size() * (numNrtReplicas + numPullReplicas + numTlogReplicas);
-    Assert.assertEquals("unexpected number of replica positions", totalReplicas, replicaPositions.size());
+    if (totalReplicas != replicaPositions.size()) {
+      throw new RuntimeException("unexpected number of replica positions: expected " + totalReplicas + " but got " + replicaPositions.size());
+    }
     final CountDownLatch finalStateLatch = new CountDownLatch(replicaPositions.size());
     AtomicInteger replicaNum = new AtomicInteger(1);
     replicaPositions.forEach(pos -> {
@@ -1076,7 +1090,7 @@ public class SimClusterStateProvider implements ClusterStateProvider {
   }
 
   /**
-   * Move replica. This uses a similar algorithm as {@link org.apache.solr.cloud.api.collections.MoveReplicaCmd#moveNormalReplica(ClusterState, NamedList, String, String, DocCollection, Replica, Slice, int, boolean)}.
+   * Move replica. This uses a similar algorithm as {@link org.apache.solr.cloud.api.collections.MoveReplicaCmd} <code>moveNormalReplica(...)</code> method.
    * @param message operation details
    * @param results operation results.
    */
@@ -1305,7 +1319,7 @@ public class SimClusterStateProvider implements ClusterStateProvider {
 
     boolean success = false;
     try {
-      CloudTestUtils.waitForState(cloudManager, collectionName, 30, TimeUnit.SECONDS, (liveNodes, state) -> {
+      CloudUtil.waitForState(cloudManager, collectionName, 30, TimeUnit.SECONDS, (liveNodes, state) -> {
         for (String subSlice : subSlices) {
           Slice s = state.getSlice(subSlice);
           if (s.getLeader() == null) {
@@ -1436,8 +1450,8 @@ public class SimClusterStateProvider implements ClusterStateProvider {
           OverseerCollectionMessageHandler.NUM_SLICES, "1",
           CommonAdminParams.WAIT_FOR_FINAL_STATE, "true");
       simCreateCollection(props, new NamedList());
-      CloudTestUtils.waitForState(cloudManager, CollectionAdminParams.SYSTEM_COLL, 120, TimeUnit.SECONDS,
-          CloudTestUtils.clusterShape(1, Integer.parseInt(repFactor), false, true));
+      CloudUtil.waitForState(cloudManager, CollectionAdminParams.SYSTEM_COLL, 120, TimeUnit.SECONDS,
+          CloudUtil.clusterShape(1, Integer.parseInt(repFactor), false, true));
     } catch (Exception e) {
       throw new IOException(e);
     }
@@ -1980,6 +1994,30 @@ public class SimClusterStateProvider implements ClusterStateProvider {
     }
   }
 
+  public void simSetReplicaValues(String node, Map<String, Map<String, List<ReplicaInfo>>> source, boolean overwrite) {
+    List<ReplicaInfo> infos = nodeReplicaMap.get(node);
+    Map<String, ReplicaInfo> infoMap = new HashMap<>();
+    infos.forEach(ri -> infoMap.put(ri.getName(), ri));
+    if (infos == null) {
+      throw new RuntimeException("Node not present: " + node);
+    }
+    source.forEach((coll, shards) -> shards.forEach((shard, replicas) -> replicas.forEach(r -> {
+      ReplicaInfo target = infoMap.get(r.getName());
+      if (target == null) {
+        throw new RuntimeException("Unable to find simulated replica of " + r);
+      }
+      r.getVariables().forEach((k, v) -> {
+        if (target.getVariables().containsKey(k)) {
+          if (overwrite) {
+            target.getVariables().put(k, v);
+          }
+        } else {
+          target.getVariables().put(k, v);
+        }
+      });
+    })));
+  }
+
   /**
    * Return all replica infos for a node.
    * @param node node id
@@ -2043,7 +2081,7 @@ public class SimClusterStateProvider implements ClusterStateProvider {
               ReplicaInfo ri = getReplicaInfo(s.getReplicas().iterator().next());
               if (ri != null) {
                 Number numDocs = (Number)ri.getVariable("SEARCHER.searcher.numDocs");
-                Number numBytes = (Number)ri.getVariable("INDEX.sizeInBytes");
+                Number numBytes = (Number)ri.getVariable(Type.CORE_IDX.metricsAttribute);
                 if (numDocs != null) {
                   inactiveDocs.addValue(numDocs.doubleValue());
                 }
@@ -2081,7 +2119,7 @@ public class SimClusterStateProvider implements ClusterStateProvider {
           if (ri != null) {
             Number numDocs = (Number)ri.getVariable("SEARCHER.searcher.numDocs");
             Number delDocs = (Number)ri.getVariable("SEARCHER.searcher.deleteDocs");
-            Number numBytes = (Number)ri.getVariable("INDEX.sizeInBytes");
+            Number numBytes = (Number)ri.getVariable(Type.CORE_IDX.metricsAttribute);
             if (numDocs != null) {
               docs.addValue(numDocs.doubleValue());
             }
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimDistribStateManager.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimDistribStateManager.java
similarity index 94%
rename from solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimDistribStateManager.java
rename to solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimDistribStateManager.java
index 9b02b57..1c50007 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimDistribStateManager.java
+++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimDistribStateManager.java
@@ -136,7 +136,7 @@ public class SimDistribStateManager implements DistribStateManager {
     public VersionedData getData(Watcher w) {
       dataLock.lock();
       try {
-        VersionedData res = new VersionedData(version, data, clientId);
+        VersionedData res = new VersionedData(version, data, mode, clientId);
         if (w != null && !dataWatches.contains(w)) {
           dataWatches.add(w);
         }
@@ -236,6 +236,32 @@ public class SimDistribStateManager implements DistribStateManager {
     juteMaxbuffer = Integer.parseInt(bufferSize);
   }
 
+  /**
+   * Copy all content from another DistribStateManager.
+   * @param other another state manager.
+   * @param failOnExists abort copy when one or more paths already exist (the state of this manager remains unchanged).
+   */
+  public void copyFrom(DistribStateManager other, boolean failOnExists) throws InterruptedException, IOException, KeeperException, AlreadyExistsException, BadVersionException {
+    List<String> tree = other.listTree("/");
+    // check if any node exists
+    for (String path : tree) {
+      if (hasData(path) && failOnExists) {
+        throw new AlreadyExistsException(path);
+      }
+    }
+    for (String path : tree) {
+      VersionedData data = other.getData(path);
+      if (hasData(path)) {
+        setData(path, data.getData(), -1);
+      } else {
+        makePath(path, data.getData(), data.getMode(), failOnExists);
+      }
+      // hack: set the version to be the same as the source
+      Node n = traverse(path, false, CreateMode.PERSISTENT);
+      n.version = data.getVersion();
+    }
+  }
+
   public SimDistribStateManager(ActionThrottle actionThrottle, ActionError actionError) {
     this(null, actionThrottle, actionError);
   }
@@ -520,7 +546,7 @@ public class SimDistribStateManager implements DistribStateManager {
 
   @Override
   public void setData(String path, byte[] data, int version) throws NoSuchElementException, BadVersionException, IOException {
-    if (data.length > juteMaxbuffer) {
+    if (data != null && data.length > juteMaxbuffer) {
       throw new IOException("Len error " + data.length);
     }
     multiLock.lock();
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimDistributedQueueFactory.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimDistributedQueueFactory.java
similarity index 100%
rename from solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimDistributedQueueFactory.java
rename to solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimDistributedQueueFactory.java
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimNodeStateProvider.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimNodeStateProvider.java
similarity index 100%
rename from solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimNodeStateProvider.java
rename to solr/core/src/java/org/apache/solr/cloud/autoscaling/sim/SimNodeStateProvider.java
diff --git a/solr/core/src/java/org/apache/solr/handler/admin/MetricsHistoryHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/MetricsHistoryHandler.java
index 51bad59..c69f99c 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/MetricsHistoryHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/MetricsHistoryHandler.java
@@ -61,6 +61,7 @@ import org.apache.solr.client.solrj.SolrRequest;
 import org.apache.solr.client.solrj.cloud.NodeStateProvider;
 import org.apache.solr.client.solrj.cloud.SolrCloudManager;
 import org.apache.solr.client.solrj.cloud.autoscaling.ReplicaInfo;
+import org.apache.solr.client.solrj.cloud.autoscaling.Variable;
 import org.apache.solr.client.solrj.cloud.autoscaling.VersionedData;
 import org.apache.solr.client.solrj.impl.HttpClientUtil;
 import org.apache.solr.cloud.LeaderElector;
@@ -136,7 +137,7 @@ public class MetricsHistoryHandler extends RequestHandlerBase implements Permiss
 
     DEFAULT_NODE_GAUGES.add("CONTAINER.fs.coreRoot.usableSpace");
 
-    DEFAULT_CORE_GAUGES.add("INDEX.sizeInBytes");
+    DEFAULT_CORE_GAUGES.add(Variable.Type.CORE_IDX.metricsAttribute);
 
     DEFAULT_CORE_COUNTERS.add("QUERY./select.requests");
     DEFAULT_CORE_COUNTERS.add("UPDATE./update.requests");
diff --git a/solr/core/src/test/org/apache/solr/util/MockSearchableSolrClient.java b/solr/core/src/java/org/apache/solr/util/MockSearchableSolrClient.java
similarity index 98%
rename from solr/core/src/test/org/apache/solr/util/MockSearchableSolrClient.java
rename to solr/core/src/java/org/apache/solr/util/MockSearchableSolrClient.java
index 6fce498..c4d1d0c 100644
--- a/solr/core/src/test/org/apache/solr/util/MockSearchableSolrClient.java
+++ b/solr/core/src/java/org/apache/solr/util/MockSearchableSolrClient.java
@@ -20,6 +20,7 @@ import java.io.IOException;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
 import java.util.concurrent.ConcurrentHashMap;
 
 import org.apache.solr.client.solrj.SolrClient;
@@ -32,7 +33,6 @@ import org.apache.solr.common.SolrDocumentList;
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.NamedList;
-import org.junit.Assert;
 
 /**
  * Simple mock client that collects added documents and supports simple search by id
@@ -62,7 +62,7 @@ public class MockSearchableSolrClient extends SolrClient {
       if (docList != null) {
         docList.forEach(doc -> {
           String id = (String) doc.getFieldValue("id");
-          Assert.assertNotNull(doc.toString(), id);
+          Objects.requireNonNull(id, doc.toString());
           docs.computeIfAbsent(collection, c -> new LinkedHashMap<>()).put(id, doc);
         });
       }
diff --git a/solr/core/src/java/org/apache/solr/util/SolrCLI.java b/solr/core/src/java/org/apache/solr/util/SolrCLI.java
index 791a9d5..b383ec8 100755
--- a/solr/core/src/java/org/apache/solr/util/SolrCLI.java
+++ b/solr/core/src/java/org/apache/solr/util/SolrCLI.java
@@ -101,11 +101,13 @@ import org.apache.solr.client.solrj.SolrRequest;
 import org.apache.solr.client.solrj.SolrServerException;
 import org.apache.solr.client.solrj.cloud.DistributedQueue;
 import org.apache.solr.client.solrj.cloud.DistributedQueueFactory;
+import org.apache.solr.client.solrj.cloud.SolrCloudManager;
 import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig;
 import org.apache.solr.client.solrj.cloud.autoscaling.Policy;
 import org.apache.solr.client.solrj.cloud.autoscaling.PolicyHelper;
 import org.apache.solr.client.solrj.cloud.autoscaling.Row;
 import org.apache.solr.client.solrj.cloud.autoscaling.Suggester;
+import org.apache.solr.client.solrj.cloud.autoscaling.Variable;
 import org.apache.solr.client.solrj.impl.CloudSolrClient;
 import org.apache.solr.client.solrj.impl.HttpClientUtil;
 import org.apache.solr.client.solrj.impl.HttpSolrClient;
@@ -116,6 +118,7 @@ import org.apache.solr.client.solrj.request.CollectionAdminRequest;
 import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
 import org.apache.solr.client.solrj.response.CollectionAdminResponse;
 import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.cloud.autoscaling.sim.SimCloudManager;
 import org.apache.solr.common.MapWriter;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.cloud.ClusterState;
@@ -197,6 +200,9 @@ public class SolrCLI implements CLIO {
         String excMsg = exc.getMessage();
         if (excMsg != null) {
           CLIO.err("\nERROR: " + excMsg + "\n");
+          if (verbose) {
+            exc.printStackTrace(CLIO.getErrStream());
+          }
           toolExitStatus = 1;
         } else {
           throw exc;
@@ -856,6 +862,8 @@ public class SolrCLI implements CLIO {
     static final String NODE_REDACTION_PREFIX = "N_";
     static final String COLL_REDACTION_PREFIX = "COLL_";
 
+    private boolean verbose;
+
     public AutoscalingTool() {
       this(CLIO.getOutStream());
     }
@@ -904,6 +912,15 @@ public class SolrCLI implements CLIO {
               .withDescription("Show summarized collection & node statistics.")
               .create("stats"),
           OptionBuilder
+              .withDescription("Simulate execution of all suggestions.")
+              .create("simulate"),
+          OptionBuilder
+              .withDescription("Max number of simulation iterations.")
+              .withArgName("NUMBER")
+              .hasArg()
+              .withLongOpt("iterations")
+              .create("i"),
+          OptionBuilder
               .withDescription("Turn on all options to get all available information.")
               .create("all")
 
@@ -928,34 +945,24 @@ public class SolrCLI implements CLIO {
           throw new UnsupportedOperationException("removeQueue");
         }
       };
-      try (SolrClientCloudManager clientCloudManager = new SolrClientCloudManager(dummmyFactory, cloudSolrClient)) {
+      try (SolrClientCloudManager realCloudManager = new SolrClientCloudManager(dummmyFactory, cloudSolrClient)) {
         AutoScalingConfig config = null;
         HashSet<String> liveNodes = new HashSet<>();
         String configFile = cli.getOptionValue("a");
         if (configFile != null) {
-          log.info("- reading autoscaling config from " + configFile);
+          if (verbose) {
+            log.info("- reading autoscaling config from " + configFile);
+          }
           config = new AutoScalingConfig(IOUtils.toByteArray(new FileInputStream(configFile)));
         } else {
-          log.info("- reading autoscaling config from the cluster.");
-          config = clientCloudManager.getDistribStateManager().getAutoScalingConfig();
+          if (verbose) {
+            log.info("- reading autoscaling config from the cluster.");
+          }
+          config = realCloudManager.getDistribStateManager().getAutoScalingConfig();
         }
-        log.info("- calculating suggestions...");
-        long start = TimeSource.NANO_TIME.getTimeNs();
-        // collect live node names for optional redaction
-        liveNodes.addAll(clientCloudManager.getClusterStateProvider().getLiveNodes());
-        List<Suggester.SuggestionInfo> suggestions = PolicyHelper.getSuggestions(config, clientCloudManager);
-        long end = TimeSource.NANO_TIME.getTimeNs();
-        log.info("  (took " + TimeUnit.NANOSECONDS.toMillis(end - start) + " ms)");
-        log.info("- calculating diagnostics...");
-        start = TimeSource.NANO_TIME.getTimeNs();
-        // update the live nodes
-        liveNodes.addAll(clientCloudManager.getClusterStateProvider().getLiveNodes());
-        Policy.Session session = config.getPolicy().createSession(clientCloudManager);
-        MapWriter mw = PolicyHelper.getDiagnostics(session);
-        Map<String, Object> diagnostics = new LinkedHashMap<>();
-        mw.toMap(diagnostics);
-        end = TimeSource.NANO_TIME.getTimeNs();
-        log.info("  (took " + TimeUnit.NANOSECONDS.toMillis(end - start) + " ms)");
+        // freeze the cluster state
+        SimCloudManager cloudManager = SimCloudManager.createCluster(realCloudManager, TimeSource.get("simTime:50"));
+        liveNodes.addAll(cloudManager.getClusterStateProvider().getLiveNodes());
         boolean withSuggestions = cli.hasOption("s");
         boolean withDiagnostics = cli.hasOption("d") || cli.hasOption("n");
         boolean withSortedNodes = cli.hasOption("n");
@@ -987,149 +994,251 @@ public class SolrCLI implements CLIO {
         }
         // redact collection names too
         Set<String> redactCollections = new HashSet<>();
-        ClusterState clusterState = clientCloudManager.getClusterStateProvider().getClusterState();
+        ClusterState clusterState = cloudManager.getClusterStateProvider().getClusterState();
         clusterState.forEachCollection(coll -> redactCollections.add(coll.getName()));
         if (!withSuggestions && !withDiagnostics) {
           withSuggestions = true;
         }
-        Map<String, Object> results = new LinkedHashMap<>();
-        if (withClusterState) {
-          Map<String, Object> map = new LinkedHashMap<>();
-          map.put("znodeVersion", clusterState.getZNodeVersion());
-          map.put("liveNodes", new TreeSet<>(clusterState.getLiveNodes()));
-          map.put("collections", clusterState.getCollectionsMap());
-          results.put("CLUSTERSTATE", map);
+        Map<String, Object> results = prepareResults(cloudManager, config, withClusterState,
+            withStats, withSuggestions, withSortedNodes, withDiagnostics);
+        if (cli.hasOption("simulate")) {
+          String iterStr = cli.getOptionValue("i", "10");
+          int iterations;
+          try {
+            iterations = Integer.parseInt(iterStr);
+          } catch (Exception e) {
+            log.warn("Invalid option 'i' value, using default 10:" + e);
+            iterations = 10;
+          }
+          Map<String, Object> simulationResults = new HashMap<>();
+          simulate(cloudManager, config, simulationResults, withClusterState,
+              withStats, withSuggestions, withSortedNodes, withDiagnostics, iterations);
+          results.put("simulation", simulationResults);
+        }
+        String data = Utils.toJSONString(results);
+        if (redact) {
+          data = RedactionUtils.redactNames(redactCollections, COLL_REDACTION_PREFIX, data);
+          data = RedactionUtils.redactNames(redactNames, NODE_REDACTION_PREFIX, data);
         }
-        if (withStats) {
-          Map<String, Map<String, Number>> collStats = new TreeMap<>();
-          clusterState.forEachCollection(coll -> {
-            Map<String, Number> perColl = collStats.computeIfAbsent(coll.getName(), n -> new LinkedHashMap<>());
-            AtomicInteger numCores = new AtomicInteger();
-            HashMap<String, Map<String, AtomicInteger>> nodes = new HashMap<>();
-            coll.getSlices().forEach(s -> {
-              numCores.addAndGet(s.getReplicas().size());
-              s.getReplicas().forEach(r -> {
-                nodes.computeIfAbsent(r.getNodeName(), n -> new HashMap<>())
-                    .computeIfAbsent(s.getName(), slice -> new AtomicInteger()).incrementAndGet();
-              });
+        stdout.println(data);
+      }
+    }
+
+    private Map<String, Object> prepareResults(SolrCloudManager clientCloudManager,
+                                               AutoScalingConfig config,
+                                               boolean withClusterState,
+                                               boolean withStats,
+                                               boolean withSuggestions,
+                                               boolean withSortedNodes,
+                                               boolean withDiagnostics) throws Exception {
+      Policy.Session session = config.getPolicy().createSession(clientCloudManager);
+      ClusterState clusterState = clientCloudManager.getClusterStateProvider().getClusterState();
+      if (verbose) {
+        log.info("- calculating suggestions...");
+      }
+      long start = TimeSource.NANO_TIME.getTimeNs();
+      List<Suggester.SuggestionInfo> suggestions = PolicyHelper.getSuggestions(config, clientCloudManager);
+      long end = TimeSource.NANO_TIME.getTimeNs();
+      if (verbose) {
+        log.info("  (took " + TimeUnit.NANOSECONDS.toMillis(end - start) + " ms)");
+        log.info("- calculating diagnostics...");
+      }
+      start = TimeSource.NANO_TIME.getTimeNs();
+      MapWriter mw = PolicyHelper.getDiagnostics(session);
+      Map<String, Object> diagnostics = new LinkedHashMap<>();
+      mw.toMap(diagnostics);
+      end = TimeSource.NANO_TIME.getTimeNs();
+      if (verbose) {
+        log.info("  (took " + TimeUnit.NANOSECONDS.toMillis(end - start) + " ms)");
+      }
+      Map<String, Object> results = new LinkedHashMap<>();
+      if (withClusterState) {
+        Map<String, Object> map = new LinkedHashMap<>();
+        map.put("znodeVersion", clusterState.getZNodeVersion());
+        map.put("liveNodes", new TreeSet<>(clusterState.getLiveNodes()));
+        map.put("collections", clusterState.getCollectionsMap());
+        results.put("CLUSTERSTATE", map);
+      }
+      if (withStats) {
+        Map<String, Map<String, Number>> collStats = new TreeMap<>();
+        clusterState.forEachCollection(coll -> {
+          Map<String, Number> perColl = collStats.computeIfAbsent(coll.getName(), n -> new LinkedHashMap<>());
+          AtomicInteger numCores = new AtomicInteger();
+          HashMap<String, Map<String, AtomicInteger>> nodes = new HashMap<>();
+          coll.getSlices().forEach(s -> {
+            numCores.addAndGet(s.getReplicas().size());
+            s.getReplicas().forEach(r -> {
+              nodes.computeIfAbsent(r.getNodeName(), n -> new HashMap<>())
+                  .computeIfAbsent(s.getName(), slice -> new AtomicInteger()).incrementAndGet();
             });
-            int maxCoresPerNode = 0;
-            int minCoresPerNode = 0;
-            int maxActualShardsPerNode = 0;
-            int minActualShardsPerNode = 0;
-            int maxShardReplicasPerNode = 0;
-            int minShardReplicasPerNode = 0;
-            if (!nodes.isEmpty()) {
-              minCoresPerNode = Integer.MAX_VALUE;
-              minActualShardsPerNode = Integer.MAX_VALUE;
-              minShardReplicasPerNode = Integer.MAX_VALUE;
-              for (Map<String, AtomicInteger> counts : nodes.values()) {
-                int total = counts.values().stream().mapToInt(c -> c.get()).sum();
-                for (AtomicInteger count : counts.values()) {
-                  if (count.get() > maxShardReplicasPerNode) {
-                    maxShardReplicasPerNode = count.get();
-                  }
-                  if (count.get() < minShardReplicasPerNode) {
-                    minShardReplicasPerNode = count.get();
-                  }
-                }
-                if (total > maxCoresPerNode) {
-                  maxCoresPerNode = total;
+          });
+          int maxCoresPerNode = 0;
+          int minCoresPerNode = 0;
+          int maxActualShardsPerNode = 0;
+          int minActualShardsPerNode = 0;
+          int maxShardReplicasPerNode = 0;
+          int minShardReplicasPerNode = 0;
+          if (!nodes.isEmpty()) {
+            minCoresPerNode = Integer.MAX_VALUE;
+            minActualShardsPerNode = Integer.MAX_VALUE;
+            minShardReplicasPerNode = Integer.MAX_VALUE;
+            for (Map<String, AtomicInteger> counts : nodes.values()) {
+              int total = counts.values().stream().mapToInt(c -> c.get()).sum();
+              for (AtomicInteger count : counts.values()) {
+                if (count.get() > maxShardReplicasPerNode) {
+                  maxShardReplicasPerNode = count.get();
                 }
-                if (total < minCoresPerNode) {
-                  minCoresPerNode = total;
+                if (count.get() < minShardReplicasPerNode) {
+                  minShardReplicasPerNode = count.get();
                 }
-                if (counts.size() > maxActualShardsPerNode) {
-                  maxActualShardsPerNode = counts.size();
+              }
+              if (total > maxCoresPerNode) {
+                maxCoresPerNode = total;
+              }
+              if (total < minCoresPerNode) {
+                minCoresPerNode = total;
+              }
+              if (counts.size() > maxActualShardsPerNode) {
+                maxActualShardsPerNode = counts.size();
+              }
+              if (counts.size() < minActualShardsPerNode) {
+                minActualShardsPerNode = counts.size();
+              }
+            }
+          }
+          perColl.put("activeShards", coll.getActiveSlices().size());
+          perColl.put("inactiveShards", coll.getSlices().size() - coll.getActiveSlices().size());
+          perColl.put("rf", coll.getReplicationFactor());
+          perColl.put("maxShardsPerNode", coll.getMaxShardsPerNode());
+          perColl.put("maxActualShardsPerNode", maxActualShardsPerNode);
+          perColl.put("minActualShardsPerNode", minActualShardsPerNode);
+          perColl.put("maxShardReplicasPerNode", maxShardReplicasPerNode);
+          perColl.put("minShardReplicasPerNode", minShardReplicasPerNode);
+          perColl.put("numCores", numCores.get());
+          perColl.put("numNodes", nodes.size());
+          perColl.put("maxCoresPerNode", maxCoresPerNode);
+          perColl.put("minCoresPerNode", minCoresPerNode);
+        });
+        Map<String, Map<String, Object>> nodeStats = new TreeMap<>();
+        Map<Integer, AtomicInteger> coreStats = new TreeMap<>();
+        for (Row row : session.getSortedNodes()) {
+          Map<String, Object> nodeStat = nodeStats.computeIfAbsent(row.node, n -> new LinkedHashMap<>());
+          nodeStat.put("isLive", row.isLive());
+          nodeStat.put("freedisk", row.getVal("freedisk", 0));
+          nodeStat.put("totaldisk", row.getVal("totaldisk", 0));
+          int cores = ((Number)row.getVal("cores", 0)).intValue();
+          nodeStat.put("cores", cores);
+          coreStats.computeIfAbsent(cores, num -> new AtomicInteger()).incrementAndGet();
+          Map<String, Map<String, Map<String, Object>>> collReplicas = new TreeMap<>();
+          row.forEachReplica(ri -> {
+            Map<String, Object> perReplica = collReplicas.computeIfAbsent(ri.getCollection(), c -> new TreeMap<>())
+                .computeIfAbsent(ri.getCore().substring(ri.getCollection().length() + 1), core -> new LinkedHashMap<>());
+//            if (ri.getVariable(Variable.Type.CORE_IDX.tagName) != null) {
+//              perReplica.put(Variable.Type.CORE_IDX.tagName, ri.getVariable(Variable.Type.CORE_IDX.tagName));
+//            }
+            if (ri.getVariable(Variable.Type.CORE_IDX.metricsAttribute) != null) {
+              perReplica.put(Variable.Type.CORE_IDX.metricsAttribute, ri.getVariable(Variable.Type.CORE_IDX.metricsAttribute));
+            }
+            perReplica.put("coreNode", ri.getName());
+            if (ri.getBool("leader", false)) {
+              perReplica.put("leader", true);
+              Double totalSize = (Double)collStats.computeIfAbsent(ri.getCollection(), c -> new HashMap<>())
+                  .computeIfAbsent("avgShardSize", size -> 0.0);
+              Number riSize = (Number)ri.getVariable(Variable.Type.CORE_IDX.metricsAttribute);
+              if (riSize != null) {
+                totalSize += riSize.doubleValue();
+                collStats.get(ri.getCollection()).put("avgShardSize", totalSize);
+                Double max = (Double)collStats.get(ri.getCollection()).get("maxShardSize");
+                if (max == null) max = 0.0;
+                if (riSize.doubleValue() > max) {
+                  collStats.get(ri.getCollection()).put("maxShardSize", riSize.doubleValue());
                 }
-                if (counts.size() < minActualShardsPerNode) {
-                  minActualShardsPerNode = counts.size();
+                Double min = (Double)collStats.get(ri.getCollection()).get("minShardSize");
+                if (min == null) min = Double.MAX_VALUE;
+                if (riSize.doubleValue() < min) {
+                  collStats.get(ri.getCollection()).put("minShardSize", riSize.doubleValue());
                 }
               }
             }
-            perColl.put("activeShards", coll.getActiveSlices().size());
-            perColl.put("inactiveShards", coll.getSlices().size() - coll.getActiveSlices().size());
-            perColl.put("rf", coll.getReplicationFactor());
-            perColl.put("maxShardsPerNode", coll.getMaxShardsPerNode());
-            perColl.put("maxActualShardsPerNode", maxActualShardsPerNode);
-            perColl.put("minActualShardsPerNode", minActualShardsPerNode);
-            perColl.put("maxShardReplicasPerNode", maxShardReplicasPerNode);
-            perColl.put("minShardReplicasPerNode", minShardReplicasPerNode);
-            perColl.put("numCores", numCores.get());
-            perColl.put("numNodes", nodes.size());
-            perColl.put("maxCoresPerNode", maxCoresPerNode);
-            perColl.put("minCoresPerNode", minCoresPerNode);
+            nodeStat.put("replicas", collReplicas);
           });
-          Map<String, Map<String, Object>> nodeStats = new TreeMap<>();
-          Map<Integer, AtomicInteger> coreStats = new TreeMap<>();
-          for (Row row : session.getSortedNodes()) {
-            Map<String, Object> nodeStat = nodeStats.computeIfAbsent(row.node, n -> new LinkedHashMap<>());
-            nodeStat.put("isLive", row.isLive());
-            nodeStat.put("freedisk", row.getVal("freedisk", 0));
-            nodeStat.put("totaldisk", row.getVal("totaldisk", 0));
-            int cores = ((Number)row.getVal("cores", 0)).intValue();
-            nodeStat.put("cores", cores);
-            coreStats.computeIfAbsent(cores, num -> new AtomicInteger()).incrementAndGet();
-            Map<String, Map<String, Map<String, Object>>> collReplicas = new TreeMap<>();
-            row.forEachReplica(ri -> {
-              Map<String, Object> perReplica = collReplicas.computeIfAbsent(ri.getCollection(), c -> new TreeMap<>())
-                  .computeIfAbsent(ri.getCore().substring(ri.getCollection().length() + 1), core -> new LinkedHashMap<>());
-              perReplica.put("INDEX.sizeInGB", ri.getVariable("INDEX.sizeInGB"));
-              perReplica.put("coreNode", ri.getName());
-              if (ri.getBool("leader", false)) {
-                perReplica.put("leader", true);
-                Double totalSize = (Double)collStats.computeIfAbsent(ri.getCollection(), c -> new HashMap<>())
-                    .computeIfAbsent("avgShardSize", size -> 0.0);
-                Number riSize = (Number)ri.getVariable("INDEX.sizeInGB");
-                if (riSize != null) {
-                  totalSize += riSize.doubleValue();
-                  collStats.get(ri.getCollection()).put("avgShardSize", totalSize);
-                  Double max = (Double)collStats.get(ri.getCollection()).get("maxShardSize");
-                  if (max == null) max = 0.0;
-                  if (riSize.doubleValue() > max) {
-                    collStats.get(ri.getCollection()).put("maxShardSize", riSize.doubleValue());
-                  }
-                  Double min = (Double)collStats.get(ri.getCollection()).get("minShardSize");
-                  if (min == null) min = Double.MAX_VALUE;
-                  if (riSize.doubleValue() < min) {
-                    collStats.get(ri.getCollection()).put("minShardSize", riSize.doubleValue());
-                  }
-                }
-              }
-              nodeStat.put("replicas", collReplicas);
-            });
-          }
+        }
 
-          // calculate average per shard
-          for (Map<String, Number> perColl : collStats.values()) {
-            Double avg = (Double)perColl.get("avgShardSize");
-            if (avg != null) {
-              avg = avg / ((Number)perColl.get("activeShards")).doubleValue();
-              perColl.put("avgShardSize", avg);
-            }
+        // calculate average per shard and convert the units
+        for (Map<String, Number> perColl : collStats.values()) {
+          Number avg = perColl.get("avgShardSize");
+          if (avg != null) {
+            avg = avg.doubleValue() / perColl.get("activeShards").doubleValue();
+            perColl.put("avgShardSize", (Number)Variable.Type.CORE_IDX.convertVal(avg));
+          }
+          Number num = perColl.get("maxShardSize");
+          if (num != null) {
+            perColl.put("maxShardSize", (Number)Variable.Type.CORE_IDX.convertVal(num));
+          }
+          num = perColl.get("minShardSize");
+          if (num != null) {
+            perColl.put("minShardSize", (Number)Variable.Type.CORE_IDX.convertVal(num));
           }
-          Map<String, Object> stats = new LinkedHashMap<>();
-          results.put("STATISTICS", stats);
-          stats.put("coresPerNodes", coreStats);
-          stats.put("nodeStats", nodeStats);
-          stats.put("collectionStats", collStats);
-        }
-        if (withSuggestions) {
-          results.put("SUGGESTIONS", suggestions);
         }
-        if (!withSortedNodes) {
-          diagnostics.remove("sortedNodes");
+        Map<String, Object> stats = new LinkedHashMap<>();
+        results.put("STATISTICS", stats);
+        stats.put("coresPerNodes", coreStats);
+        stats.put("nodeStats", nodeStats);
+        stats.put("collectionStats", collStats);
+      }
+      if (withSuggestions) {
+        results.put("SUGGESTIONS", suggestions);
+      }
+      if (!withSortedNodes) {
+        diagnostics.remove("sortedNodes");
+      }
+      if (withDiagnostics) {
+        results.put("DIAGNOSTICS", diagnostics);
+      }
+      return results;
+    }
+
+    private void simulate(SimCloudManager simCloudManager,
+                          AutoScalingConfig config,
+                          Map<String, Object> results,
+                          boolean withClusterState,
+                          boolean withStats,
+                          boolean withSuggestions,
+                          boolean withSortedNodes,
+                          boolean withDiagnostics, int iterations) throws Exception {
+      int loop = iterations;
+      List<Suggester.SuggestionInfo> suggestions = Collections.emptyList();
+      Map<String, Object> intermediate = new LinkedHashMap<>();
+      results.put("intermediateSuggestions", intermediate);
+      while (loop-- > 0) {
+        suggestions = PolicyHelper.getSuggestions(config, simCloudManager);
+        log.info("-- step " + (iterations - loop) + ", " + suggestions.size() + " suggestions.");
+        if (suggestions.isEmpty()) {
+          break;
         }
-        if (withDiagnostics) {
-          results.put("DIAGNOSTICS", diagnostics);
+        intermediate.put("step" + (iterations - loop), suggestions);
+        int unresolvedCount = 0;
+        for (Suggester.SuggestionInfo suggestion : suggestions) {
+          SolrRequest operation = suggestion.getOperation();
+          if (operation == null) {
+            unresolvedCount++;
+            if (suggestion.getViolation() == null) {
+              log.info("   - ignoring suggestion without violation and without operation: " + suggestion);
+            }
+            continue;
+          }
+          simCloudManager.request(operation);
         }
-        String data = Utils.toJSONString(results);
-        if (redact) {
-          data = RedactionUtils.redactNames(redactCollections, COLL_REDACTION_PREFIX, data);
-          data = RedactionUtils.redactNames(redactNames, NODE_REDACTION_PREFIX, data);
+        if (unresolvedCount == suggestions.size()) {
+          log.info("--- aborting simulation, only unresolved violations remain");
+          break;
         }
-        stdout.println(data);
       }
+      if (loop == 0 && !suggestions.isEmpty()) {
+        CLIO.err("### Failed to apply all suggestions in " + iterations + " steps. Remaining suggestions: " + suggestions + "\n");
+      }
+      results.put("finalState", prepareResults(simCloudManager, config, withClusterState, withStats,
+          withSuggestions, withSortedNodes, withDiagnostics));
     }
   }
 
@@ -3792,7 +3901,11 @@ public class SolrCLI implements CLIO {
         // since this is a CLI, spare the user the stacktrace
         String excMsg = exc.getMessage();
         if (excMsg != null) {
-          CLIO.err("\nERROR: " + excMsg + "\n");
+          if (verbose) {
+            CLIO.err("\nERROR: " + exc + "\n");
+          } else {
+            CLIO.err("\nERROR: " + excMsg + "\n");
+          }
           toolExitStatus = 100; // Exit >= 100 means error, else means number of tests that failed
         } else {
           throw exc;
diff --git a/solr/core/src/test/org/apache/solr/cloud/CloudTestUtils.java b/solr/core/src/test/org/apache/solr/cloud/CloudTestUtils.java
index 07a9219..196cb20 100644
--- a/solr/core/src/test/org/apache/solr/cloud/CloudTestUtils.java
+++ b/solr/core/src/test/org/apache/solr/cloud/CloudTestUtils.java
@@ -18,17 +18,11 @@
 package org.apache.solr.cloud;
 
 import java.io.IOException;
-import java.lang.invoke.MethodHandles;
-import java.util.Collection;
-import java.util.HashSet;
 import java.util.Map;
-import java.util.Set;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeoutException;
-import java.util.concurrent.atomic.AtomicReference;
 
 import org.apache.lucene.util.LuceneTestCase;
-
 import org.apache.solr.client.solrj.SolrClient;
 import org.apache.solr.client.solrj.SolrRequest;
 import org.apache.solr.client.solrj.SolrResponse;
@@ -37,17 +31,8 @@ import org.apache.solr.client.solrj.request.RequestWriter;
 import org.apache.solr.client.solrj.request.RequestWriter.StringPayloadContentWriter;
 import org.apache.solr.client.solrj.request.V2Request;
 import org.apache.solr.client.solrj.response.SolrResponseBase;
-
-import org.apache.solr.common.cloud.ClusterState;
-import org.apache.solr.common.cloud.CollectionStatePredicate;
-import org.apache.solr.common.cloud.DocCollection;
-import org.apache.solr.common.cloud.Replica;
-import org.apache.solr.common.cloud.Slice;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.util.TimeOut;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 import org.junit.Assert;
 
 import static org.apache.solr.common.params.CommonParams.JSON_MIME;
@@ -57,140 +42,10 @@ import static org.apache.solr.common.params.CommonParams.JSON_MIME;
  * Some useful methods for SolrCloud tests.
  */
 public class CloudTestUtils {
-  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
   public static final int DEFAULT_TIMEOUT = 90;
 
   /**
-   * Wait for a particular collection state to appear.
-   *
-   * This is a convenience method using the {@link #DEFAULT_TIMEOUT}
-   *
-   * @param cloudManager current instance of {@link SolrCloudManager}
-   * @param message     a message to report on failure
-   * @param collection  the collection to watch
-   * @param predicate   a predicate to match against the collection state
-   */
-  public static long waitForState(final SolrCloudManager cloudManager,
-                                  final String message,
-                                  final String collection,
-                                  final CollectionStatePredicate predicate) {
-    AtomicReference<DocCollection> state = new AtomicReference<>();
-    AtomicReference<Set<String>> liveNodesLastSeen = new AtomicReference<>();
-    try {
-      return waitForState(cloudManager, collection, DEFAULT_TIMEOUT, TimeUnit.SECONDS, (n, c) -> {
-        state.set(c);
-        liveNodesLastSeen.set(n);
-        return predicate.matches(n, c);
-      });
-    } catch (Exception e) {
-      throw new AssertionError(message + "\n" + "Live Nodes: " + liveNodesLastSeen.get() + "\nLast available state: " + state.get(), e);
-    }
-  }
-
-  /**
-   * Wait for a particular collection state to appear.
-   *
-   * This is a convenience method using the {@link #DEFAULT_TIMEOUT}
-   *
-   * @param cloudManager current instance of {@link SolrCloudManager}
-   * @param collection  the collection to watch
-   * @param wait timeout value
-   * @param unit timeout unit
-   * @param predicate   a predicate to match against the collection state
-   */
-  public static long waitForState(final SolrCloudManager cloudManager,
-                                  final String collection,
-                                  long wait,
-                                  final TimeUnit unit,
-                                  final CollectionStatePredicate predicate) throws InterruptedException, TimeoutException, IOException {
-    TimeOut timeout = new TimeOut(wait, unit, cloudManager.getTimeSource());
-    long timeWarn = timeout.timeLeft(TimeUnit.MILLISECONDS) / 4;
-    ClusterState state = null;
-    DocCollection coll = null;
-    while (!timeout.hasTimedOut()) {
-      state = cloudManager.getClusterStateProvider().getClusterState();
-      coll = state.getCollectionOrNull(collection);
-      // due to the way we manage collections in SimClusterStateProvider a null here
-      // can mean that a collection is still being created but has no replicas
-      if (coll == null) { // does not yet exist?
-        timeout.sleep(100);
-        continue;
-      }
-      if (predicate.matches(state.getLiveNodes(), coll)) {
-        log.trace("-- predicate matched with state {}", state);
-        return timeout.timeElapsed(TimeUnit.MILLISECONDS);
-      }
-      timeout.sleep(100);
-      if (timeout.timeLeft(TimeUnit.MILLISECONDS) < timeWarn) {
-        log.trace("-- still not matching predicate: {}", state);
-      }
-    }
-    throw new TimeoutException("last ClusterState: " + state + ", last coll state: " + coll);
-  }
-
-  /**
-   * Return a {@link CollectionStatePredicate} that returns true if a collection has the expected
-   * number of active shards and replicas
-   * @param expectedShards expected number of active shards
-   * @param expectedReplicas expected number of active replicas
-   */
-  public static CollectionStatePredicate clusterShape(int expectedShards, int expectedReplicas) {
-    return clusterShape(expectedShards, expectedReplicas, false, false);
-  }
-
-  /**
-   * Return a {@link CollectionStatePredicate} that returns true if a collection has the expected
-   * number of shards and replicas.
-   * <p>Note: for shards marked as inactive the current Solr behavior is that replicas remain active.
-   * {@link org.apache.solr.cloud.autoscaling.sim.SimCloudManager} follows this behavior.</p>
-   * @param expectedShards expected number of shards
-   * @param expectedReplicas expected number of active replicas
-   * @param withInactive if true then count also inactive shards
-   * @param requireLeaders if true then require that each shard has a leader
-   */
-  public static CollectionStatePredicate clusterShape(int expectedShards, int expectedReplicas, boolean withInactive,
-                                                      boolean requireLeaders) {
-    return (liveNodes, collectionState) -> {
-      if (collectionState == null) {
-        log.info("-- null collection");
-        return false;
-      }
-      Collection<Slice> slices = withInactive ? collectionState.getSlices() : collectionState.getActiveSlices();
-      if (slices.size() != expectedShards) {
-        log.info("-- wrong number of slices for collection {}, expected={}, found={}: {}", collectionState.getName(), expectedShards, collectionState.getSlices().size(), collectionState.getSlices());
-        return false;
-      }
-      Set<String> leaderless = new HashSet<>();
-      for (Slice slice : slices) {
-        int activeReplicas = 0;
-        if (requireLeaders && slice.getState() != Slice.State.INACTIVE && slice.getLeader() == null) {
-          leaderless.add(slice.getName());
-          continue;
-        }
-        // skip other checks, we're going to fail anyway
-        if (!leaderless.isEmpty()) {
-          continue;
-        }
-        for (Replica replica : slice) {
-          if (replica.isActive(liveNodes))
-            activeReplicas++;
-        }
-        if (activeReplicas != expectedReplicas) {
-          log.info("-- wrong number of active replicas for collection {} in slice {}, expected={}, found={}", collectionState.getName(), slice.getName(), expectedReplicas, activeReplicas);
-          return false;
-        }
-      }
-      if (leaderless.isEmpty()) {
-        return true;
-      } else {
-        log.info("-- shards without leaders: {}", leaderless);
-        return false;
-      }
-    };
-  }
-  
-  /**
    * Wait for a particular named trigger to be scheduled.
    * <p>
    * This is a convenience method that polls the autoscaling API looking for a trigger with the 
diff --git a/solr/core/src/test/org/apache/solr/cloud/MetricsHistoryIntegrationTest.java b/solr/core/src/test/org/apache/solr/cloud/MetricsHistoryIntegrationTest.java
index d30fe29..5332e7a 100644
--- a/solr/core/src/test/org/apache/solr/cloud/MetricsHistoryIntegrationTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/MetricsHistoryIntegrationTest.java
@@ -75,8 +75,8 @@ public class MetricsHistoryIntegrationTest extends SolrCloudTestCase {
     // create .system
     CollectionAdminRequest.createCollection(CollectionAdminParams.SYSTEM_COLL, null, 1, 1)
         .process(solrClient);
-    CloudTestUtils.waitForState(cloudManager, CollectionAdminParams.SYSTEM_COLL,
-        30, TimeUnit.SECONDS, CloudTestUtils.clusterShape(1, 1));
+    CloudUtil.waitForState(cloudManager, CollectionAdminParams.SYSTEM_COLL,
+        30, TimeUnit.SECONDS, CloudUtil.clusterShape(1, 1));
     solrClient.query(CollectionAdminParams.SYSTEM_COLL, params(CommonParams.Q, "*:*"));
     // sleep a little to allow the handler to collect some metrics
     if (simulated) {
diff --git a/solr/core/src/test/org/apache/solr/cloud/OverseerCollectionConfigSetProcessorTest.java b/solr/core/src/test/org/apache/solr/cloud/OverseerCollectionConfigSetProcessorTest.java
index ef9602f..5ead862 100644
--- a/solr/core/src/test/org/apache/solr/cloud/OverseerCollectionConfigSetProcessorTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/OverseerCollectionConfigSetProcessorTest.java
@@ -67,6 +67,7 @@ import org.apache.solr.handler.component.HttpShardHandlerFactory;
 import org.apache.solr.handler.component.ShardRequest;
 import org.apache.solr.update.UpdateShardHandler;
 import org.apache.solr.util.TimeOut;
+import org.apache.zookeeper.CreateMode;
 import org.apache.zookeeper.KeeperException;
 import org.apache.zookeeper.data.Stat;
 import org.junit.After;
@@ -398,7 +399,7 @@ public class OverseerCollectionConfigSetProcessorTest extends SolrTestCaseJ4 {
       if (data == null || data.length == 0) {
         return null;
       }
-      return new VersionedData(-1, data, "");
+      return new VersionedData(-1, data, CreateMode.PERSISTENT, "");
         
     });
     
@@ -478,7 +479,7 @@ public class OverseerCollectionConfigSetProcessorTest extends SolrTestCaseJ4 {
       if (data == null || data.length == 0) {
         return null;
       }
-      return new VersionedData(-1, data, "");
+      return new VersionedData(-1, data, CreateMode.PERSISTENT, "");
         
     });
     
diff --git a/solr/core/src/test/org/apache/solr/cloud/ReindexCollectionTest.java b/solr/core/src/test/org/apache/solr/cloud/ReindexCollectionTest.java
index 8413cf2..6c84253 100644
--- a/solr/core/src/test/org/apache/solr/cloud/ReindexCollectionTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/ReindexCollectionTest.java
@@ -133,7 +133,7 @@ public class ReindexCollectionTest extends SolrCloudTestCase {
     assertEquals(status.toString(), (long)NUM_DOCS, ((Number)status.get("inputDocs")).longValue());
     assertEquals(status.toString(), (long)NUM_DOCS, ((Number)status.get("processedDocs")).longValue());
 
-    CloudTestUtils.waitForState(cloudManager, "did not finish copying in time", targetCollection, (liveNodes, coll) -> {
+    CloudUtil.waitForState(cloudManager, "did not finish copying in time", targetCollection, (liveNodes, coll) -> {
       ReindexCollectionCmd.State state = ReindexCollectionCmd.State.get(coll.getStr(ReindexCollectionCmd.REINDEXING_STATE));
       return ReindexCollectionCmd.State.FINISHED == state;
     });
@@ -171,7 +171,7 @@ public class ReindexCollectionTest extends SolrCloudTestCase {
     }
     assertNotNull("target collection not present after 30s", realTargetCollection);
 
-    CloudTestUtils.waitForState(cloudManager, "did not finish copying in time", realTargetCollection, (liveNodes, coll) -> {
+    CloudUtil.waitForState(cloudManager, "did not finish copying in time", realTargetCollection, (liveNodes, coll) -> {
       ReindexCollectionCmd.State state = ReindexCollectionCmd.State.get(coll.getStr(ReindexCollectionCmd.REINDEXING_STATE));
       return ReindexCollectionCmd.State.FINISHED == state;
     });
@@ -199,7 +199,7 @@ public class ReindexCollectionTest extends SolrCloudTestCase {
         .setConfigName("conf3");
     req.process(solrClient);
 
-    CloudTestUtils.waitForState(cloudManager, "did not finish copying in time", targetCollection, (liveNodes, coll) -> {
+    CloudUtil.waitForState(cloudManager, "did not finish copying in time", targetCollection, (liveNodes, coll) -> {
       ReindexCollectionCmd.State state = ReindexCollectionCmd.State.get(coll.getStr(ReindexCollectionCmd.REINDEXING_STATE));
       return ReindexCollectionCmd.State.FINISHED == state;
     });
@@ -234,7 +234,7 @@ public class ReindexCollectionTest extends SolrCloudTestCase {
         .setCollectionParam("q", "id:10*");
     req.process(solrClient);
 
-    CloudTestUtils.waitForState(cloudManager, "did not finish copying in time", targetCollection, (liveNodes, coll) -> {
+    CloudUtil.waitForState(cloudManager, "did not finish copying in time", targetCollection, (liveNodes, coll) -> {
       ReindexCollectionCmd.State state = ReindexCollectionCmd.State.get(coll.getStr(ReindexCollectionCmd.REINDEXING_STATE));
       return ReindexCollectionCmd.State.FINISHED == state;
     });
@@ -306,7 +306,7 @@ public class ReindexCollectionTest extends SolrCloudTestCase {
       assertFalse(coll.getName() + " still exists", coll.getName().startsWith(ReindexCollectionCmd.CHK_COL_PREFIX));
     });
     // verify that the source collection is read-write and has no reindexing flags
-    CloudTestUtils.waitForState(cloudManager, "collection state is incorrect", sourceCollection,
+    CloudUtil.waitForState(cloudManager, "collection state is incorrect", sourceCollection,
         ((liveNodes, collectionState) ->
             !collectionState.isReadOnly() &&
             collectionState.getStr(ReindexCollectionCmd.REINDEXING_STATE) == null &&
@@ -324,7 +324,7 @@ public class ReindexCollectionTest extends SolrCloudTestCase {
         .setTarget(targetCollection);
     String asyncId = req.processAsync(solrClient);
     // wait for the source collection to be put in readOnly mode
-    CloudTestUtils.waitForState(cloudManager, "source collection didn't become readOnly",
+    CloudUtil.waitForState(cloudManager, "source collection didn't become readOnly",
         sourceCollection, (liveNodes, coll) -> coll.isReadOnly());
 
     req = CollectionAdminRequest.reindexCollection(sourceCollection);
@@ -334,7 +334,7 @@ public class ReindexCollectionTest extends SolrCloudTestCase {
     assertNotNull(rsp.toString(), status);
     assertEquals(status.toString(), "aborting", status.get("state"));
 
-    CloudTestUtils.waitForState(cloudManager, "incorrect collection state", sourceCollection,
+    CloudUtil.waitForState(cloudManager, "incorrect collection state", sourceCollection,
         ((liveNodes, collectionState) ->
             collectionState.isReadOnly() &&
             getState(sourceCollection) == ReindexCollectionCmd.State.ABORTED));
@@ -347,7 +347,7 @@ public class ReindexCollectionTest extends SolrCloudTestCase {
     assertEquals(status.toString(), "aborted", status.get("state"));
     // let the process continue
     TestInjection.reindexLatch.countDown();
-    CloudTestUtils.waitForState(cloudManager, "source collection is in wrong state",
+    CloudUtil.waitForState(cloudManager, "source collection is in wrong state",
         sourceCollection, (liveNodes, docCollection) -> !docCollection.isReadOnly() && getState(sourceCollection) == null);
     // verify the response
     rsp = CollectionAdminRequest.requestStatus(asyncId).process(solrClient);
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/IndexSizeTriggerTest.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/IndexSizeTriggerTest.java
index d8975ab..46d990a 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/IndexSizeTriggerTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/IndexSizeTriggerTest.java
@@ -40,8 +40,8 @@ import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventProcessorStage
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
 import org.apache.solr.client.solrj.request.UpdateRequest;
 import org.apache.solr.client.solrj.response.QueryResponse;
-import org.apache.solr.cloud.CloudTestUtils;
 import org.apache.solr.cloud.CloudTestUtils.AutoScalingRequest;
+import org.apache.solr.cloud.CloudUtil;
 import org.apache.solr.cloud.SolrCloudTestCase;
 import org.apache.solr.cloud.autoscaling.sim.SimCloudManager;
 import org.apache.solr.common.SolrInputDocument;
@@ -152,8 +152,8 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
     if (SPEED == 1) {
       cluster.waitForActiveCollection(collectionName, 2, 4);
     } else {
-      CloudTestUtils.waitForState(cloudManager, "failed to create " + collectionName, collectionName,
-          CloudTestUtils.clusterShape(2, 2, false, true));
+      CloudUtil.waitForState(cloudManager, "failed to create " + collectionName, collectionName,
+          CloudUtil.clusterShape(2, 2, false, true));
     }
 
     long waitForSeconds = 3 + random().nextInt(5);
@@ -258,8 +258,8 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
     if (SPEED == 1) {
       cluster.waitForActiveCollection(collectionName, 2, 4);
     } else {
-      CloudTestUtils.waitForState(cloudManager, "failed to create " + collectionName, collectionName,
-          CloudTestUtils.clusterShape(2, 2, false, true));
+      CloudUtil.waitForState(cloudManager, "failed to create " + collectionName, collectionName,
+          CloudUtil.clusterShape(2, 2, false, true));
     }
 
     long waitForSeconds = 6 + random().nextInt(5);
@@ -328,7 +328,7 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
 
     boolean await = finished.await(60000, TimeUnit.MILLISECONDS);
     assertTrue("did not finish processing in time", await);
-    CloudTestUtils.waitForState(cloudManager, collectionName, 20, TimeUnit.SECONDS, CloudTestUtils.clusterShape(6, 2, true, true));
+    CloudUtil.waitForState(cloudManager, collectionName, 20, TimeUnit.SECONDS, CloudUtil.clusterShape(6, 2, true, true));
     assertEquals(1, listenerEvents.size());
     List<CapturedEvent> events = listenerEvents.get("capturing2");
     assertNotNull("'capturing2' events not found", events);
@@ -382,8 +382,8 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
     if (SPEED == 1) {
       cluster.waitForActiveCollection(collectionName, 2, 4);
     } else {
-      CloudTestUtils.waitForState(cloudManager, "failed to create " + collectionName, collectionName,
-          CloudTestUtils.clusterShape(2, 2, false, true));
+      CloudUtil.waitForState(cloudManager, "failed to create " + collectionName, collectionName,
+          CloudUtil.clusterShape(2, 2, false, true));
     }
 
     for (int i = 0; i < 20; i++) {
@@ -495,8 +495,8 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
     CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
         "conf", 2, 2).setMaxShardsPerNode(2);
     create.process(solrClient);
-    CloudTestUtils.waitForState(cloudManager, "failed to create " + collectionName, collectionName,
-        CloudTestUtils.clusterShape(2, 2, false, true));
+    CloudUtil.waitForState(cloudManager, "failed to create " + collectionName, collectionName,
+        CloudUtil.clusterShape(2, 2, false, true));
 
     for (int j = 0; j < 10; j++) {
       UpdateRequest ureq = new UpdateRequest();
@@ -621,8 +621,8 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
     assertEquals(TriggerEventProcessorStage.SUCCEEDED, events.get(5).stage);
 
     // collection should have 2 inactive and 4 active shards
-    CloudTestUtils.waitForState(cloudManager, "failed to create " + collectionName, collectionName,
-        CloudTestUtils.clusterShape(6, 2, true, true));
+    CloudUtil.waitForState(cloudManager, "failed to create " + collectionName, collectionName,
+        CloudUtil.clusterShape(6, 2, true, true));
 
     // check ops
     List<TriggerEvent.Op> ops = (List<TriggerEvent.Op>) events.get(4).event.getProperty(TriggerEvent.REQUESTED_OPS);
@@ -759,8 +759,8 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
     if (SPEED == 1) {
       cluster.waitForActiveCollection(collectionName, 5, 10);
     } else {
-      CloudTestUtils.waitForState(cloudManager, "failed to create " + collectionName, collectionName,
-          CloudTestUtils.clusterShape(5, 2, false, true));
+      CloudUtil.waitForState(cloudManager, "failed to create " + collectionName, collectionName,
+          CloudUtil.clusterShape(5, 2, false, true));
     }
     
     long waitForSeconds = 3 + random().nextInt(5);
@@ -901,8 +901,8 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
     CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
         "conf", 2, 2).setMaxShardsPerNode(2);
     create.process(solrClient);
-    CloudTestUtils.waitForState(cloudManager, "failed to create " + collectionName, collectionName,
-        CloudTestUtils.clusterShape(2, 2, false, true));
+    CloudUtil.waitForState(cloudManager, "failed to create " + collectionName, collectionName,
+        CloudUtil.clusterShape(2, 2, false, true));
 
     long waitForSeconds = 3 + random().nextInt(5);
     Map<String, Object> props = createTriggerProps(waitForSeconds);
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/ScheduledMaintenanceTriggerTest.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/ScheduledMaintenanceTriggerTest.java
index 87b32d7..2fb82cd 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/ScheduledMaintenanceTriggerTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/ScheduledMaintenanceTriggerTest.java
@@ -34,8 +34,8 @@ import org.apache.solr.client.solrj.cloud.SolrCloudManager;
 import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventProcessorStage;
 import org.apache.solr.client.solrj.embedded.JettySolrRunner;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
-import org.apache.solr.cloud.CloudTestUtils;
 import org.apache.solr.cloud.CloudTestUtils.AutoScalingRequest;
+import org.apache.solr.cloud.CloudUtil;
 import org.apache.solr.cloud.SolrCloudTestCase;
 import org.apache.solr.cloud.autoscaling.sim.SimCloudManager;
 import org.apache.solr.common.cloud.ClusterState;
@@ -206,8 +206,8 @@ public class ScheduledMaintenanceTriggerTest extends SolrCloudTestCase {
         "conf", 1, 1);
 
     create1.process(solrClient);
-    CloudTestUtils.waitForState(cloudManager, "failed to create " + collection1, collection1,
-        CloudTestUtils.clusterShape(1, 1));
+    CloudUtil.waitForState(cloudManager, "failed to create " + collection1, collection1,
+        CloudUtil.clusterShape(1, 1));
 
     // also create a very stale lock
     Map<String, Object> lockData = new HashMap<>();
@@ -257,8 +257,8 @@ public class ScheduledMaintenanceTriggerTest extends SolrCloudTestCase {
     CollectionAdminRequest.SplitShard split1 = CollectionAdminRequest.splitShard(collection1)
         .setShardName("shard1");
     split1.process(solrClient);
-    CloudTestUtils.waitForState(cloudManager, "failed to split " + collection1, collection1,
-        CloudTestUtils.clusterShape(3, 1, true, true));
+    CloudUtil.waitForState(cloudManager, "failed to split " + collection1, collection1,
+        CloudUtil.clusterShape(3, 1, true, true));
 
 
     await = triggerFired.await(90, TimeUnit.SECONDS);
@@ -309,7 +309,7 @@ public class ScheduledMaintenanceTriggerTest extends SolrCloudTestCase {
 
     ClusterState state = cloudManager.getClusterStateProvider().getClusterState();
 
-    CloudTestUtils.clusterShape(2, 1).matches(state.getLiveNodes(), state.getCollection(collection1));
+    CloudUtil.clusterShape(2, 1).matches(state.getLiveNodes(), state.getCollection(collection1));
   }
 
   public static CountDownLatch getTriggerFired() {
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/SearchRateTriggerIntegrationTest.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/SearchRateTriggerIntegrationTest.java
index a53a389..0c90548 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/SearchRateTriggerIntegrationTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/SearchRateTriggerIntegrationTest.java
@@ -36,6 +36,7 @@ import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventProcessorStage
 import org.apache.solr.client.solrj.impl.CloudSolrClient;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
 import org.apache.solr.cloud.CloudTestUtils;
+import org.apache.solr.cloud.CloudUtil;
 import org.apache.solr.cloud.SolrCloudTestCase;
 import org.apache.solr.common.MapWriter;
 import org.apache.solr.common.cloud.Replica;
@@ -124,8 +125,8 @@ public class SearchRateTriggerIntegrationTest extends SolrCloudTestCase {
         "conf", 1, 2);
     create.process(solrClient);
 
-    CloudTestUtils.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS,
-        CloudTestUtils.clusterShape(1, 2));
+    CloudUtil.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS,
+        CloudUtil.clusterShape(1, 2));
 
     CloudTestUtils.assertAutoScalingRequest
       (cloudManager, 
@@ -276,8 +277,8 @@ public class SearchRateTriggerIntegrationTest extends SolrCloudTestCase {
     CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(COLL1,
         "conf", 1, 2);
     create.process(solrClient);
-    CloudTestUtils.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS,
-        CloudTestUtils.clusterShape(1, 2));
+    CloudUtil.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS,
+        CloudUtil.clusterShape(1, 2));
 
     // add a couple of spare replicas above RF. Use different types.
     // these additional replicas will be placed on other nodes in the cluster
@@ -285,8 +286,8 @@ public class SearchRateTriggerIntegrationTest extends SolrCloudTestCase {
     solrClient.request(CollectionAdminRequest.addReplicaToShard(COLL1, "shard1", Replica.Type.TLOG));
     solrClient.request(CollectionAdminRequest.addReplicaToShard(COLL1, "shard1", Replica.Type.PULL));
 
-    CloudTestUtils.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS,
-        CloudTestUtils.clusterShape(1, 5));
+    CloudUtil.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS,
+        CloudUtil.clusterShape(1, 5));
 
     CloudTestUtils.assertAutoScalingRequest
       (cloudManager, 
@@ -393,8 +394,8 @@ public class SearchRateTriggerIntegrationTest extends SolrCloudTestCase {
     assertEquals("cold replicas", 3, coldReplicas.get());
 
     // now the collection should be down to RF = 2
-    CloudTestUtils.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS,
-        CloudTestUtils.clusterShape(1, 2));
+    CloudUtil.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS,
+        CloudUtil.clusterShape(1, 2));
 
     listenerEvents.clear();
     listenerEventLatch = new CountDownLatch(3);
@@ -494,8 +495,8 @@ public class SearchRateTriggerIntegrationTest extends SolrCloudTestCase {
     assertEquals("coldNodes: " +ops.toString(), 2, coldNodes2.get());
 
     // now the collection should be at RF == 1, with one additional PULL replica
-    CloudTestUtils.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS,
-        CloudTestUtils.clusterShape(1, 1));
+    CloudUtil.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS,
+        CloudUtil.clusterShape(1, 1));
   }
 
   @Test
@@ -507,8 +508,8 @@ public class SearchRateTriggerIntegrationTest extends SolrCloudTestCase {
         "conf", 1, 2);
 
     create.process(solrClient);
-    CloudTestUtils.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS,
-        CloudTestUtils.clusterShape(1, 2));
+    CloudUtil.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS,
+        CloudUtil.clusterShape(1, 2));
 
     // add a couple of spare replicas above RF. Use different types to verify that only
     // searchable replicas are considered
@@ -517,8 +518,8 @@ public class SearchRateTriggerIntegrationTest extends SolrCloudTestCase {
     solrClient.request(CollectionAdminRequest.addReplicaToShard(COLL1, "shard1", Replica.Type.TLOG));
     solrClient.request(CollectionAdminRequest.addReplicaToShard(COLL1, "shard1", Replica.Type.PULL));
 
-    CloudTestUtils.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS,
-        CloudTestUtils.clusterShape(1, 5));
+    CloudUtil.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS,
+        CloudUtil.clusterShape(1, 5));
 
     CloudTestUtils.assertAutoScalingRequest
       (cloudManager, 
@@ -687,8 +688,8 @@ public class SearchRateTriggerIntegrationTest extends SolrCloudTestCase {
     assertEquals(responses.toString(), 4, nodes.get());
 
     // we are left with one searchable replica
-    CloudTestUtils.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS,
-        CloudTestUtils.clusterShape(1, 1));
+    CloudUtil.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS,
+        CloudUtil.clusterShape(1, 1));
   }
 
   public static class CapturingTriggerListener extends TriggerListenerBase {
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/SearchRateTriggerTest.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/SearchRateTriggerTest.java
index f750a5e..d3b523d 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/SearchRateTriggerTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/SearchRateTriggerTest.java
@@ -39,7 +39,7 @@ import org.apache.solr.client.solrj.impl.HttpSolrClient;
 import org.apache.solr.client.solrj.impl.SolrClientCloudManager;
 import org.apache.solr.client.solrj.impl.SolrClientNodeStateProvider;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
-import org.apache.solr.cloud.CloudTestUtils;
+import org.apache.solr.cloud.CloudUtil;
 import org.apache.solr.cloud.SolrCloudTestCase;
 import org.apache.solr.cloud.ZkDistributedQueueFactory;
 import org.apache.solr.common.cloud.SolrZkClient;
@@ -106,8 +106,8 @@ public class SearchRateTriggerTest extends SolrCloudTestCase {
     create.setMaxShardsPerNode(1);
     create.process(solrClient);
 
-    CloudTestUtils.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS, clusterShape(2, 2));
-    CloudTestUtils.waitForState(cloudManager, COLL2, 60, TimeUnit.SECONDS, clusterShape(2, 2));
+    CloudUtil.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS, clusterShape(2, 2));
+    CloudUtil.waitForState(cloudManager, COLL2, 60, TimeUnit.SECONDS, clusterShape(2, 2));
 
     double rate = 1.0;
     URL baseUrl = targetNode.getBaseUrl();
@@ -238,7 +238,7 @@ public class SearchRateTriggerTest extends SolrCloudTestCase {
         "conf", 2, 2);
     create.setMaxShardsPerNode(1);
     create.process(solrClient);
-    CloudTestUtils.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS, clusterShape(2, 4));
+    CloudUtil.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS, clusterShape(2, 4));
 
     long waitForSeconds = 5 + random().nextInt(5);
     Map<String, Object> props = createTriggerProps(Arrays.asList(COLL1, COLL2), waitForSeconds, 1.0, 0.1);
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimClusterStateProvider.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimClusterStateProvider.java
index 800af6b..2ddc92e 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimClusterStateProvider.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimClusterStateProvider.java
@@ -26,6 +26,7 @@ import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
 
 import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig;
 import org.apache.solr.client.solrj.cloud.autoscaling.Preference;
@@ -72,6 +73,7 @@ public class TestSimClusterStateProvider extends SolrCloudTestCase {
   @BeforeClass
   public static void setupCluster() throws Exception {
     simulated = random().nextBoolean();
+    simulated = true;
     log.info("####### Using simulated components? " + simulated);
 
     configureCluster(NODE_COUNT)
@@ -105,17 +107,17 @@ public class TestSimClusterStateProvider extends SolrCloudTestCase {
 
     if (simulated) {
       // initialize simulated provider
-      SimCloudManager simCloudManager = new SimCloudManager(TimeSource.get("simTime:10"));
-      simCloudManager.getSimClusterStateProvider().simSetClusterProperties(clusterProperties);
-      simCloudManager.getSimDistribStateManager().simSetAutoScalingConfig(autoScalingConfig);
-      nodeValues.forEach((n, values) -> {
-        try {
-          simCloudManager.getSimNodeStateProvider().simSetNodeValues(n, values);
-        } catch (InterruptedException e) {
-          fail("Interrupted:" + e);
-        }
-      });
-      simCloudManager.getSimClusterStateProvider().simSetClusterState(realState);
+      SimCloudManager simCloudManager = SimCloudManager.createCluster(realManager, TimeSource.get("simTime:10"));
+//      simCloudManager.getSimClusterStateProvider().simSetClusterProperties(clusterProperties);
+//      simCloudManager.getSimDistribStateManager().simSetAutoScalingConfig(autoScalingConfig);
+//      nodeValues.forEach((n, values) -> {
+//        try {
+//          simCloudManager.getSimNodeStateProvider().simSetNodeValues(n, values);
+//        } catch (InterruptedException e) {
+//          fail("Interrupted:" + e);
+//        }
+//      });
+//      simCloudManager.getSimClusterStateProvider().simSetClusterState(realState);
       ClusterState simState = simCloudManager.getClusterStateProvider().getClusterState();
       assertClusterStateEquals(realState, simState);
       cloudManager = simCloudManager;
@@ -137,12 +139,26 @@ public class TestSimClusterStateProvider extends SolrCloudTestCase {
         for (Replica oneReplica : slice.getReplicas()) {
           Replica twoReplica = sTwo.getReplica(oneReplica.getName());
           assertNotNull(twoReplica);
-          assertEquals(oneReplica, twoReplica);
+          assertReplicaEquals(oneReplica, twoReplica);
         }
       });
     });
   }
 
+  private static void assertReplicaEquals(Replica one, Replica two) {
+    assertEquals(one.getName(), two.getName());
+    assertEquals(one.getNodeName(), two.getNodeName());
+    assertEquals(one.getState(), two.getState());
+    assertEquals(one.getType(), two.getType());
+    Map<String, Object> filteredPropsOne = one.getProperties().entrySet().stream()
+        .filter(e -> !(e.getKey().startsWith("INDEX") || e.getKey().startsWith("QUERY") || e.getKey().startsWith("UPDATE")))
+        .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
+    Map<String, Object> filteredPropsTwo = two.getProperties().entrySet().stream()
+        .filter(e -> !(e.getKey().startsWith("INDEX") || e.getKey().startsWith("QUERY") || e.getKey().startsWith("UPDATE")))
+        .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
+    assertEquals(filteredPropsOne, filteredPropsTwo);
+  }
+
   private String addNode() throws Exception {
     JettySolrRunner solr = cluster.startJettySolrRunner();
     cluster.waitForAllNodes(30);
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimComputePlanAction.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimComputePlanAction.java
index 19f9b8d..387072d 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimComputePlanAction.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimComputePlanAction.java
@@ -33,8 +33,8 @@ import org.apache.solr.client.solrj.SolrResponse;
 import org.apache.solr.client.solrj.cloud.SolrCloudManager;
 import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventType;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
-import org.apache.solr.cloud.CloudTestUtils;
 import org.apache.solr.cloud.CloudTestUtils.AutoScalingRequest;
+import org.apache.solr.cloud.CloudUtil;
 import org.apache.solr.cloud.autoscaling.ActionContext;
 import org.apache.solr.cloud.autoscaling.ComputePlanAction;
 import org.apache.solr.cloud.autoscaling.ScheduledTriggers;
@@ -145,8 +145,8 @@ public class TestSimComputePlanAction extends SimSolrCloudTestCase {
         "conf",1, 2);
     create.process(solrClient);
 
-    CloudTestUtils.waitForState(cluster, "Timed out waiting for replicas of new collection to be active",
-        "testNodeLost", CloudTestUtils.clusterShape(1, 2, false, true));
+    CloudUtil.waitForState(cluster, "Timed out waiting for replicas of new collection to be active",
+        "testNodeLost", CloudUtil.clusterShape(1, 2, false, true));
 
     ClusterState clusterState = cluster.getClusterStateProvider().getClusterState();
     log.debug("-- cluster state: {}", clusterState);
@@ -211,8 +211,8 @@ public class TestSimComputePlanAction extends SimSolrCloudTestCase {
     create.setMaxShardsPerNode(2);
     create.process(solrClient);
 
-    CloudTestUtils.waitForState(cluster, "Timed out waiting for replicas of new collection to be active",
-        "testNodeWithMultipleReplicasLost", CloudTestUtils.clusterShape(2, 3, false, true));
+    CloudUtil.waitForState(cluster, "Timed out waiting for replicas of new collection to be active",
+        "testNodeWithMultipleReplicasLost", CloudUtil.clusterShape(2, 3, false, true));
 
     ClusterState clusterState = cluster.getClusterStateProvider().getClusterState();
     log.debug("-- cluster state: {}", clusterState);
@@ -296,7 +296,7 @@ public class TestSimComputePlanAction extends SimSolrCloudTestCase {
         "conf",1, 4).setMaxShardsPerNode(-1);
     create.process(solrClient);
 
-    CloudTestUtils.waitForState(cluster, "Timed out waiting for replicas of new collection to be active",
+    CloudUtil.waitForState(cluster, "Timed out waiting for replicas of new collection to be active",
         "testNodeAdded", (liveNodes, collectionState) -> collectionState.getReplicas().stream().allMatch(replica -> replica.isActive(liveNodes)));
 
     // reset to the original policy which has only 1 replica per shard per node
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimExecutePlanAction.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimExecutePlanAction.java
index a0c18a9..f64dad2 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimExecutePlanAction.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimExecutePlanAction.java
@@ -31,8 +31,8 @@ import org.apache.solr.client.solrj.SolrRequest;
 import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventType;
 import org.apache.solr.client.solrj.cloud.autoscaling.VersionedData;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
-import org.apache.solr.cloud.CloudTestUtils;
 import org.apache.solr.cloud.CloudTestUtils.AutoScalingRequest;
+import org.apache.solr.cloud.CloudUtil;
 import org.apache.solr.cloud.autoscaling.ActionContext;
 import org.apache.solr.cloud.autoscaling.ExecutePlanAction;
 import org.apache.solr.cloud.autoscaling.NodeLostTrigger;
@@ -90,8 +90,8 @@ public class TestSimExecutePlanAction extends SimSolrCloudTestCase {
     create.setMaxShardsPerNode(1);
     create.process(solrClient);
 
-    log.info("Collection ready after " + CloudTestUtils.waitForState(cluster, collectionName, 120, TimeUnit.SECONDS,
-        CloudTestUtils.clusterShape(1, 2, false, true)) + "ms");
+    log.info("Collection ready after " + CloudUtil.waitForState(cluster, collectionName, 120, TimeUnit.SECONDS,
+        CloudUtil.clusterShape(1, 2, false, true)) + "ms");
 
     String sourceNodeName = cluster.getSimClusterStateProvider().simGetRandomNode();
     ClusterState clusterState = cluster.getClusterStateProvider().getClusterState();
@@ -152,8 +152,8 @@ public class TestSimExecutePlanAction extends SimSolrCloudTestCase {
       assertNotNull(response.get("success"));
     }
 
-    log.info("Collection ready after " + CloudTestUtils.waitForState(cluster, collectionName, 300, TimeUnit.SECONDS,
-        CloudTestUtils.clusterShape(1, 2, false, true)) + "ms");
+    log.info("Collection ready after " + CloudUtil.waitForState(cluster, collectionName, 300, TimeUnit.SECONDS,
+        CloudUtil.clusterShape(1, 2, false, true)) + "ms");
   }
 
   @Test
@@ -181,8 +181,8 @@ public class TestSimExecutePlanAction extends SimSolrCloudTestCase {
     create.setMaxShardsPerNode(1);
     create.process(solrClient);
 
-    CloudTestUtils.waitForState(cluster, "Timed out waiting for replicas of new collection to be active",
-        collectionName, CloudTestUtils.clusterShape(1, 2, false, true));
+    CloudUtil.waitForState(cluster, "Timed out waiting for replicas of new collection to be active",
+        collectionName, CloudUtil.clusterShape(1, 2, false, true));
 
     String sourceNodeName = cluster.getSimClusterStateProvider().simGetRandomNode();
     ClusterState clusterState = cluster.getClusterStateProvider().getClusterState();
@@ -200,8 +200,8 @@ public class TestSimExecutePlanAction extends SimSolrCloudTestCase {
 
     cluster.getTimeSource().sleep(3000);
 
-    CloudTestUtils.waitForState(cluster, "Timed out waiting for replicas of collection to be 2 again",
-        collectionName, CloudTestUtils.clusterShape(1, 2, false, true));
+    CloudUtil.waitForState(cluster, "Timed out waiting for replicas of collection to be 2 again",
+        collectionName, CloudUtil.clusterShape(1, 2, false, true));
 
     clusterState = cluster.getClusterStateProvider().getClusterState();
     docCollection = clusterState.getCollection(collectionName);
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimExtremeIndexing.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimExtremeIndexing.java
index 6b87fba..654c29f 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimExtremeIndexing.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimExtremeIndexing.java
@@ -26,8 +26,8 @@ import org.apache.solr.client.solrj.SolrRequest;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
 import org.apache.solr.client.solrj.request.UpdateRequest;
 import org.apache.solr.client.solrj.response.QueryResponse;
-import org.apache.solr.cloud.CloudTestUtils;
 import org.apache.solr.cloud.CloudTestUtils.AutoScalingRequest;
+import org.apache.solr.cloud.CloudUtil;
 import org.apache.solr.cloud.autoscaling.ExecutePlanAction;
 import org.apache.solr.common.SolrDocumentList;
 import org.apache.solr.common.SolrInputDocument;
@@ -101,9 +101,9 @@ public class TestSimExtremeIndexing extends SimSolrCloudTestCase {
     CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
         "conf", 2, 2).setMaxShardsPerNode(10);
     create.process(solrClient);
-    
-    CloudTestUtils.waitForState(cluster, collectionName, 90, TimeUnit.SECONDS,
-        CloudTestUtils.clusterShape(2, 2, false, true));
+
+    CloudUtil.waitForState(cluster, collectionName, 90, TimeUnit.SECONDS,
+        CloudUtil.clusterShape(2, 2, false, true));
 
     //long waitForSeconds = 3 + random().nextInt(5);
     long waitForSeconds = 1;
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimLargeCluster.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimLargeCluster.java
index ed160cf..1cf4f0b 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimLargeCluster.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimLargeCluster.java
@@ -38,6 +38,7 @@ import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventProcessorStage
 import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventType;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
 import org.apache.solr.cloud.CloudTestUtils;
+import org.apache.solr.cloud.CloudUtil;
 import org.apache.solr.cloud.autoscaling.ActionContext;
 import org.apache.solr.cloud.autoscaling.CapturedEvent;
 import org.apache.solr.cloud.autoscaling.ComputePlanAction;
@@ -194,8 +195,8 @@ public class TestSimLargeCluster extends SimSolrCloudTestCase {
     create.setCreateNodeSet(String.join(",", nodes));
     create.process(solrClient);
 
-    log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 30 * nodes.size(), TimeUnit.SECONDS,
-        CloudTestUtils.clusterShape(5, 15, false, true)) + "ms");
+    log.info("Ready after " + CloudUtil.waitForState(cluster, collectionName, 30 * nodes.size(), TimeUnit.SECONDS,
+        CloudUtil.clusterShape(5, 15, false, true)) + "ms");
 
     int KILL_NODES = 8;
     // kill off a number of nodes
@@ -203,8 +204,8 @@ public class TestSimLargeCluster extends SimSolrCloudTestCase {
       cluster.simRemoveNode(nodes.get(i), false);
     }
     // should fully recover
-    log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 90 * KILL_NODES, TimeUnit.SECONDS,
-        CloudTestUtils.clusterShape(5, 15, false, true)) + "ms");
+    log.info("Ready after " + CloudUtil.waitForState(cluster, collectionName, 90 * KILL_NODES, TimeUnit.SECONDS,
+        CloudUtil.clusterShape(5, 15, false, true)) + "ms");
 
     log.info("OP COUNTS: " + cluster.simGetOpCounts());
     long moveReplicaOps = cluster.simGetOpCount(CollectionParams.CollectionAction.MOVEREPLICA.name());
@@ -237,8 +238,8 @@ public class TestSimLargeCluster extends SimSolrCloudTestCase {
       fail("did not finish processing all events in time: started=" + triggerStartedCount.get() + ", finished=" + triggerFinishedCount.get());
     }
 
-    log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 30 * nodes.size(), TimeUnit.SECONDS,
-        CloudTestUtils.clusterShape(5, 15, false, true)) + "ms");
+    log.info("Ready after " + CloudUtil.waitForState(cluster, collectionName, 30 * nodes.size(), TimeUnit.SECONDS,
+        CloudUtil.clusterShape(5, 15, false, true)) + "ms");
     long newMoveReplicaOps = cluster.simGetOpCount(CollectionParams.CollectionAction.MOVEREPLICA.name());
     log.info("==== Flaky replicas: {}. Additional MOVEREPLICA count: {}", flakyReplicas, (newMoveReplicaOps - moveReplicaOps));
     // flaky nodes lead to a number of MOVEREPLICA that is non-zero but lower than the number of flaky replicas
@@ -274,8 +275,8 @@ public class TestSimLargeCluster extends SimSolrCloudTestCase {
       // Since our current goal is to try and find situations where cores are just flat out missing
       // no matter how long we wait, let's be excessive and generous in our timeout.
       // (REMINDER: this uses the cluster's timesource, and ADDREPLICA has a hardcoded delay of 500ms)
-      CloudTestUtils.waitForState(cluster, name, 2 * totalCores, TimeUnit.SECONDS,
-                                  CloudTestUtils.clusterShape(numShards, repsPerShard, false, true));
+      CloudUtil.waitForState(cluster, name, 2 * totalCores, TimeUnit.SECONDS,
+          CloudUtil.clusterShape(numShards, repsPerShard, false, true));
       
       final CollectionAdminRequest.Delete delete = CollectionAdminRequest.deleteCollection(name);
       log.info("DELETE: {}", delete);
@@ -311,8 +312,8 @@ public class TestSimLargeCluster extends SimSolrCloudTestCase {
     create.setAutoAddReplicas(false);
     create.process(solrClient);
 
-    log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 90 * NUM_NODES, TimeUnit.SECONDS,
-        CloudTestUtils.clusterShape(NUM_NODES / 10, NUM_NODES / 8 * 3, false, true)) + " ms");
+    log.info("Ready after " + CloudUtil.waitForState(cluster, collectionName, 90 * NUM_NODES, TimeUnit.SECONDS,
+        CloudUtil.clusterShape(NUM_NODES / 10, NUM_NODES / 8 * 3, false, true)) + " ms");
 
     // start adding nodes
     int numAddNode = NUM_NODES / 5;
@@ -361,8 +362,8 @@ public class TestSimLargeCluster extends SimSolrCloudTestCase {
     log.info("1st check: lastNumOps (MOVEREPLICA) = {}", lastNumOps);
     assertTrue("no MOVEREPLICA ops?", lastNumOps > 0);
 
-    log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 20 * NUM_NODES, TimeUnit.SECONDS,
-        CloudTestUtils.clusterShape(NUM_NODES / 10, NUM_NODES / 8 * 3, false, true)) + " ms");
+    log.info("Ready after " + CloudUtil.waitForState(cluster, collectionName, 20 * NUM_NODES, TimeUnit.SECONDS,
+        CloudUtil.clusterShape(NUM_NODES / 10, NUM_NODES / 8 * 3, false, true)) + " ms");
 
     int count = 1000;
     SolrInputDocument finishedEvent = null;
@@ -520,8 +521,8 @@ public class TestSimLargeCluster extends SimSolrCloudTestCase {
     create.setAutoAddReplicas(false);
     create.process(solrClient);
 
-    log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 60 * NUM_NODES, TimeUnit.SECONDS,
-        CloudTestUtils.clusterShape(NUM_NODES / 5, NUM_NODES / 10, false, true)) + " ms");
+    log.info("Ready after " + CloudUtil.waitForState(cluster, collectionName, 60 * NUM_NODES, TimeUnit.SECONDS,
+        CloudUtil.clusterShape(NUM_NODES / 5, NUM_NODES / 10, false, true)) + " ms");
 
     // start killing nodes
     int numNodes = NUM_NODES / 5;
@@ -604,8 +605,8 @@ public class TestSimLargeCluster extends SimSolrCloudTestCase {
 
     if (listenerEvents.isEmpty()) {
       // no failed movements - verify collection shape
-      log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 20 * NUM_NODES, TimeUnit.SECONDS,
-          CloudTestUtils.clusterShape(NUM_NODES / 5, NUM_NODES / 10, false, true)) + " ms");
+      log.info("Ready after " + CloudUtil.waitForState(cluster, collectionName, 20 * NUM_NODES, TimeUnit.SECONDS,
+          CloudUtil.clusterShape(NUM_NODES / 5, NUM_NODES / 10, false, true)) + " ms");
     } else {
       cluster.getTimeSource().sleep(NUM_NODES * 100);
     }
@@ -663,8 +664,8 @@ public class TestSimLargeCluster extends SimSolrCloudTestCase {
         "conf", 2, 10);
     create.process(solrClient);
 
-    log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 300, TimeUnit.SECONDS,
-        CloudTestUtils.clusterShape(2, 10, false, true)) + " ms");
+    log.info("Ready after " + CloudUtil.waitForState(cluster, collectionName, 300, TimeUnit.SECONDS,
+        CloudUtil.clusterShape(2, 10, false, true)) + " ms");
 
     // collect the node names for shard1
     Set<String> nodes = new HashSet<>();
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimPolicyCloud.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimPolicyCloud.java
index 0dcea03..36c94a0 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimPolicyCloud.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimPolicyCloud.java
@@ -36,8 +36,8 @@ import org.apache.solr.client.solrj.cloud.autoscaling.ReplicaInfo;
 import org.apache.solr.client.solrj.cloud.autoscaling.Row;
 import org.apache.solr.client.solrj.cloud.autoscaling.Variable.Type;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
-import org.apache.solr.cloud.CloudTestUtils;
 import org.apache.solr.cloud.CloudTestUtils.AutoScalingRequest;
+import org.apache.solr.cloud.CloudUtil;
 import org.apache.solr.common.cloud.DocCollection;
 import org.apache.solr.common.cloud.Replica;
 import org.apache.solr.common.cloud.ZkStateReader;
@@ -75,8 +75,8 @@ public class TestSimPolicyCloud extends SimSolrCloudTestCase {
     CollectionAdminRequest.createCollection("perReplicaDataColl", "conf", 1, 5)
         .process(solrClient);
 
-    CloudTestUtils.waitForState(cluster, "Timeout waiting for collection to become active", "perReplicaDataColl",
-        CloudTestUtils.clusterShape(1, 5, false, true));
+    CloudUtil.waitForState(cluster, "Timeout waiting for collection to become active", "perReplicaDataColl",
+        CloudUtil.clusterShape(1, 5, false, true));
     DocCollection coll = getCollectionState("perReplicaDataColl");
     String autoScaleJson = "{" +
         "  'cluster-preferences': [" +
@@ -126,13 +126,13 @@ public class TestSimPolicyCloud extends SimSolrCloudTestCase {
     CollectionAdminRequest.createCollection(collectionName, "conf", 1, 1)
         .setPolicy("c1")
         .process(solrClient);
-    CloudTestUtils.waitForState(cluster, collectionName, 120, TimeUnit.SECONDS,
-        CloudTestUtils.clusterShape(1, 1, false, true));
+    CloudUtil.waitForState(cluster, collectionName, 120, TimeUnit.SECONDS,
+        CloudUtil.clusterShape(1, 1, false, true));
 
     getCollectionState(collectionName).forEachReplica((s, replica) -> assertEquals(nodeId, replica.getNodeName()));
 
     CollectionAdminRequest.addReplicaToShard(collectionName, "shard1").process(solrClient);
-    CloudTestUtils.waitForState(cluster,
+    CloudUtil.waitForState(cluster,
         collectionName, 120l, TimeUnit.SECONDS,
         (liveNodes, collectionState) -> collectionState.getReplicas().size() == 2);
 
@@ -160,8 +160,8 @@ public class TestSimPolicyCloud extends SimSolrCloudTestCase {
     CollectionAdminRequest.createCollection(collectionName, "conf", 1, 2)
         .setPolicy("c1")
         .process(solrClient);
-    CloudTestUtils.waitForState(cluster, "Timeout waiting for collection to become active", collectionName,
-        CloudTestUtils.clusterShape(1, 2, false, true));
+    CloudUtil.waitForState(cluster, "Timeout waiting for collection to become active", collectionName,
+        CloudUtil.clusterShape(1, 2, false, true));
 
     DocCollection docCollection = getCollectionState(collectionName);
     List<Replica> list = docCollection.getReplicas(firstNode);
@@ -174,7 +174,7 @@ public class TestSimPolicyCloud extends SimSolrCloudTestCase {
 
     CollectionAdminRequest.splitShard(collectionName).setShardName("shard1").process(solrClient);
 
-    CloudTestUtils.waitForState(cluster, "Timed out waiting to see 6 replicas for collection: " + collectionName,
+    CloudUtil.waitForState(cluster, "Timed out waiting to see 6 replicas for collection: " + collectionName,
         collectionName, (liveNodes, collectionState) -> collectionState.getReplicas().size() == 6);
 
     docCollection = getCollectionState(collectionName);
@@ -219,8 +219,8 @@ public class TestSimPolicyCloud extends SimSolrCloudTestCase {
     //org.eclipse.jetty.server.handler.DefaultHandler.2xx-responses
     CollectionAdminRequest.createCollection("metricsTest", "conf", 1, 1)
         .process(solrClient);
-    CloudTestUtils.waitForState(cluster, "Timeout waiting for collection to become active", "metricsTest",
-        CloudTestUtils.clusterShape(1, 1));
+    CloudUtil.waitForState(cluster, "Timeout waiting for collection to become active", "metricsTest",
+        CloudUtil.clusterShape(1, 1));
 
     DocCollection collection = getCollectionState("metricsTest");
     List<String> tags = Arrays.asList("metrics:solr.node:ADMIN./admin/authorization.clientErrors:count",
@@ -266,8 +266,8 @@ public class TestSimPolicyCloud extends SimSolrCloudTestCase {
     CollectionAdminRequest.createCollectionWithImplicitRouter("policiesTest", "conf", "s1", 1, 1, 1)
         .setMaxShardsPerNode(-1)
         .process(solrClient);
-    CloudTestUtils.waitForState(cluster, "Timeout waiting for collection to become active", "policiesTest",
-        CloudTestUtils.clusterShape(1, 3, false, true));
+    CloudUtil.waitForState(cluster, "Timeout waiting for collection to become active", "policiesTest",
+        CloudUtil.clusterShape(1, 3, false, true));
 
     DocCollection coll = getCollectionState("policiesTest");
 
@@ -311,16 +311,16 @@ public class TestSimPolicyCloud extends SimSolrCloudTestCase {
     CollectionAdminRequest.createCollectionWithImplicitRouter("policiesTest", "conf", "s1,s2", 1)
         .setPolicy("c1")
         .process(solrClient);
-    CloudTestUtils.waitForState(cluster, "Timeout waiting for collection to become active", "policiesTest",
-        CloudTestUtils.clusterShape(2, 1));
+    CloudUtil.waitForState(cluster, "Timeout waiting for collection to become active", "policiesTest",
+        CloudUtil.clusterShape(2, 1));
 
     DocCollection coll = getCollectionState("policiesTest");
     assertEquals("c1", coll.getPolicyName());
     assertEquals(2,coll.getReplicas().size());
     coll.forEachReplica((s, replica) -> assertEquals(nodeId, replica.getNodeName()));
     CollectionAdminRequest.createShard("policiesTest", "s3").process(solrClient);
-    CloudTestUtils.waitForState(cluster, "Timeout waiting for collection to become active", "policiesTest",
-        CloudTestUtils.clusterShape(3, 1));
+    CloudUtil.waitForState(cluster, "Timeout waiting for collection to become active", "policiesTest",
+        CloudUtil.clusterShape(3, 1));
 
     coll = getCollectionState("policiesTest");
     assertEquals(1, coll.getSlice("s3").getReplicas().size());
@@ -331,8 +331,8 @@ public class TestSimPolicyCloud extends SimSolrCloudTestCase {
     SolrClient solrClient = cluster.simGetSolrClient();
     CollectionAdminRequest.createCollectionWithImplicitRouter("policiesTest", "conf", "shard1", 2)
         .process(solrClient);
-    CloudTestUtils.waitForState(cluster, "Timeout waiting for collection to become active", "policiesTest",
-        CloudTestUtils.clusterShape(1, 2, false, true));
+    CloudUtil.waitForState(cluster, "Timeout waiting for collection to become active", "policiesTest",
+        CloudUtil.clusterShape(1, 2, false, true));
     DocCollection rulesCollection = getCollectionState("policiesTest");
 
     Map<String, Object> val = cluster.getNodeStateProvider().getNodeValues(rulesCollection.getReplicas().get(0).getNodeName(), Arrays.asList(
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimTriggerIntegration.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimTriggerIntegration.java
index f680781..ea645c6 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimTriggerIntegration.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimTriggerIntegration.java
@@ -42,6 +42,7 @@ import org.apache.solr.client.solrj.cloud.autoscaling.ReplicaInfo;
 import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventProcessorStage;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
 import org.apache.solr.cloud.CloudTestUtils;
+import org.apache.solr.cloud.CloudUtil;
 import org.apache.solr.cloud.autoscaling.ActionContext;
 import org.apache.solr.cloud.autoscaling.AutoScaling;
 import org.apache.solr.cloud.autoscaling.CapturedEvent;
@@ -1343,8 +1344,8 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
     CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(COLL1,
         "conf", 1, 2);
     create.process(solrClient);
-    CloudTestUtils.waitForState(cluster, "searchRate testing collection creating",
-                                COLL1, CloudTestUtils.clusterShape(1, 2, false, true));
+    CloudUtil.waitForState(cluster, "searchRate testing collection creating",
+                                COLL1, CloudUtil.clusterShape(1, 2, false, true));
 
     listenerEventLatch = new CountDownLatch(4);
     
diff --git a/solr/core/src/test/org/apache/solr/handler/admin/MetricsHistoryHandlerTest.java b/solr/core/src/test/org/apache/solr/handler/admin/MetricsHistoryHandlerTest.java
index 2f55c7b..4c058e5 100644
--- a/solr/core/src/test/org/apache/solr/handler/admin/MetricsHistoryHandlerTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/admin/MetricsHistoryHandlerTest.java
@@ -25,7 +25,7 @@ import org.apache.solr.client.solrj.SolrClient;
 import org.apache.solr.client.solrj.cloud.SolrCloudManager;
 import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
-import org.apache.solr.cloud.CloudTestUtils;
+import org.apache.solr.cloud.CloudUtil;
 import org.apache.solr.cloud.SolrCloudTestCase;
 import org.apache.solr.cloud.autoscaling.sim.SimCloudManager;
 import org.apache.solr.common.params.CollectionAdminParams;
@@ -96,8 +96,8 @@ public class MetricsHistoryHandlerTest extends SolrCloudTestCase {
     CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(CollectionAdminParams.SYSTEM_COLL,
         "conf", 1, 1);
     create.process(solrClient);
-    CloudTestUtils.waitForState(cloudManager, "failed to create " + CollectionAdminParams.SYSTEM_COLL,
-        CollectionAdminParams.SYSTEM_COLL, CloudTestUtils.clusterShape(1, 1));
+    CloudUtil.waitForState(cloudManager, "failed to create " + CollectionAdminParams.SYSTEM_COLL,
+        CollectionAdminParams.SYSTEM_COLL, CloudUtil.clusterShape(1, 1));
   }
 
   @AfterClass
diff --git a/solr/solr-ref-guide/src/solrcloud-autoscaling-policy-preferences.adoc b/solr/solr-ref-guide/src/solrcloud-autoscaling-policy-preferences.adoc
index c421703..21b6635 100644
--- a/solr/solr-ref-guide/src/solrcloud-autoscaling-policy-preferences.adoc
+++ b/solr/solr-ref-guide/src/solrcloud-autoscaling-policy-preferences.adoc
@@ -447,3 +447,30 @@ Since there is no node that can host a replica for `shard2` without causing a vi
 [ {"cores": "<3", "node": "#ANY"} ]
 
 After re-issuing the `SecondCollection` CREATE command, the replica for `shard1` will be placed on "nodeA": it's least loaded, so is tested first, and no policy violation will result from placement there.  The `shard2` replica could be placed on any of the 3 nodes, since they're all equally loaded, and the chosen node will remain below its maximum core count after placement.  The CREATE command succeeds.
+
+== Testing the autoscaling configuration and suggestions
+It's not always easy to predict the impact of autoscaling configuration changes on the
+cluster layout. Starting with release 8.1 Solr provides a tool for assessing the impact of
+such changes without affecting the state of the target cluster.
+
+This testing tool is a part of `bin/solr autoscaling` command. In addition to other
+options that provide detailed status of the current cluster layout the following options
+specifically allow users to test new autoscaling configurations and run "what if" scenarios:
+
+`-a <CONFIG>`::
+JSON file containing autoscaling configuration to test. This file needs to be in the same
+format as the result of the `/solr/admin/autoscaling` call. If this parameter is missing then the
+currently deployed autoscaling configuration is used.
+
+`-simulate`::
+Simulate the effects of applying all autoscaling suggestions on the cluster layout. NOTE: this does not
+affect in any way the actual cluster - this option uses the simulation framework to calculate the
+new layout without actually making the changes. Calculations are performed in the tool's JVM so they don't
+affect the performance of the running cluster either. This process is repeated several times until a limit
+is reached or there are no more suggestions left to apply (although unresolved violations may still remain!)
+
+`-i <NUMBER>`::
+Number of iterations of the simulation loop. Default is 10.
+
+Results of the simulation contain the initial suggestions, suggestions at each step of the
+simulation and the final simulated state of the cluster.
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/DistribStateManager.java b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/DistribStateManager.java
index b6fb558..f811c5a 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/DistribStateManager.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/DistribStateManager.java
@@ -103,7 +103,7 @@ public interface DistribStateManager extends SolrCloseable {
       }
       List<String> children = listData(node);
       for (final String child : children) {
-        final String childPath = node + "/" + child;
+        final String childPath = node + (node.equals("/") ? "" : "/") + child;
         queue.add(childPath);
         tree.add(childPath);
       }
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/VersionedData.java b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/VersionedData.java
index 63341b9..e81d50b 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/VersionedData.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/VersionedData.java
@@ -16,6 +16,8 @@
  */
 package org.apache.solr.client.solrj.cloud.autoscaling;
 
+import org.apache.zookeeper.CreateMode;
+
 /**
  * Immutable representation of binary data with version.
  */
@@ -23,16 +25,19 @@ public class VersionedData {
   private final int version;
   private final byte[] data;
   private final String owner;
+  private final CreateMode mode;
 
   /**
    * Constructor.
    * @param version version of the data, or -1 if unknown
    * @param data binary data, or null.
+   * @param mode create mode
    * @param owner symbolic identifier of data owner / creator, or null.
    */
-  public VersionedData(int version, byte[] data, String owner) {
+  public VersionedData(int version, byte[] data, CreateMode mode, String owner) {
     this.version = version;
     this.data = data;
+    this.mode = mode;
     this.owner = owner;
   }
 
@@ -44,6 +49,10 @@ public class VersionedData {
     return data;
   }
 
+  public CreateMode getMode() {
+    return mode;
+  }
+
   public String getOwner() {
     return owner;
   }
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/ZkDistribStateManager.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/ZkDistribStateManager.java
index 77bd84c..e2834db 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/ZkDistribStateManager.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/ZkDistribStateManager.java
@@ -85,7 +85,9 @@ public class ZkDistribStateManager implements DistribStateManager {
     Stat stat = new Stat();
     try {
       byte[] bytes = zkClient.getData(path, watcher, stat, true);
-      return new VersionedData(stat.getVersion(), bytes, String.valueOf(stat.getEphemeralOwner()));
+      return new VersionedData(stat.getVersion(), bytes,
+          stat.getEphemeralOwner() != 0 ? CreateMode.EPHEMERAL : CreateMode.PERSISTENT,
+          String.valueOf(stat.getEphemeralOwner()));
     } catch (KeeperException.NoNodeException e) {
       throw new NoSuchElementException(path);
     } catch (InterruptedException e) {