You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2018/09/06 14:36:01 UTC

[4/7] lucene-solr:jira/solr-12709: SOLR-12723: Performance improvements in the simulator.

SOLR-12723: Performance improvements in the simulator.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/4a1ee8e1
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/4a1ee8e1
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/4a1ee8e1

Branch: refs/heads/jira/solr-12709
Commit: 4a1ee8e13b7c19d3dd80c361a6023dc55aa109fa
Parents: b56095b
Author: Andrzej Bialecki <ab...@apache.org>
Authored: Tue Sep 4 16:11:39 2018 +0200
Committer: Andrzej Bialecki <ab...@apache.org>
Committed: Tue Sep 4 16:11:39 2018 +0200

----------------------------------------------------------------------
 .../cloud/autoscaling/ComputePlanAction.java    |   6 +-
 .../org/apache/solr/cloud/CloudTestUtils.java   |   6 +-
 .../cloud/autoscaling/sim/SimCloudManager.java  |   8 +
 .../sim/SimClusterStateProvider.java            | 408 +++++++++++--------
 .../autoscaling/sim/SimNodeStateProvider.java   |  21 +-
 .../autoscaling/sim/TestSimAutoScaling.java     |  22 +-
 .../sim/TestSimExecutePlanAction.java           |   4 +-
 .../autoscaling/sim/TestSimNodeLostTrigger.java |   2 +-
 .../autoscaling/sim/TestSimPolicyCloud.java     |  10 +-
 .../sim/TestSimTriggerIntegration.java          |   8 +-
 .../solrj/cloud/autoscaling/ReplicaInfo.java    |   4 +-
 11 files changed, 280 insertions(+), 219 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4a1ee8e1/solr/core/src/java/org/apache/solr/cloud/autoscaling/ComputePlanAction.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/ComputePlanAction.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/ComputePlanAction.java
index 923a27a..5d211d2 100644
--- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/ComputePlanAction.java
+++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/ComputePlanAction.java
@@ -171,7 +171,11 @@ public class ComputePlanAction extends TriggerActionBase {
     clusterState.forEachCollection(coll -> {
       Integer rf = coll.getReplicationFactor();
       if (rf == null) {
-        rf = coll.getReplicas().size() / coll.getSlices().size();
+        if (coll.getSlices().isEmpty()) {
+          rf = 1; // ???
+        } else {
+          rf = coll.getReplicas().size() / coll.getSlices().size();
+        }
       }
       totalRF.addAndGet(rf * coll.getSlices().size());
     });

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4a1ee8e1/solr/core/src/test/org/apache/solr/cloud/CloudTestUtils.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/CloudTestUtils.java b/solr/core/src/test/org/apache/solr/cloud/CloudTestUtils.java
index b67b551..eb50b96 100644
--- a/solr/core/src/test/org/apache/solr/cloud/CloudTestUtils.java
+++ b/solr/core/src/test/org/apache/solr/cloud/CloudTestUtils.java
@@ -109,7 +109,7 @@ public class CloudTestUtils {
         log.trace("-- still not matching predicate: {}", state);
       }
     }
-    throw new TimeoutException("last state: " + coll);
+    throw new TimeoutException("last ClusterState: " + state + ", last coll state: " + coll);
   }
 
   /**
@@ -141,13 +141,13 @@ public class CloudTestUtils {
       }
       Collection<Slice> slices = withInactive ? collectionState.getSlices() : collectionState.getActiveSlices();
       if (slices.size() != expectedShards) {
-        log.trace("-- wrong number of active slices, expected={}, found={}", expectedShards, collectionState.getSlices().size());
+        log.trace("-- wrong number of slices, expected={}, found={}: {}", expectedShards, collectionState.getSlices().size(), collectionState.getSlices());
         return false;
       }
       Set<String> leaderless = new HashSet<>();
       for (Slice slice : slices) {
         int activeReplicas = 0;
-        if (requireLeaders && slice.getLeader() == null) {
+        if (requireLeaders && slice.getState() != Slice.State.INACTIVE && slice.getLeader() == null) {
           leaderless.add(slice.getName());
           continue;
         }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4a1ee8e1/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java
index 1f0b6cf..51e3db4 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java
@@ -37,6 +37,7 @@ import java.util.concurrent.Future;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicLong;
 
+import com.carrotsearch.randomizedtesting.RandomizedContext;
 import com.codahale.metrics.jvm.ClassLoadingGaugeSet;
 import com.codahale.metrics.jvm.GarbageCollectorMetricSet;
 import com.codahale.metrics.jvm.MemoryUsageGaugeSet;
@@ -386,6 +387,13 @@ public class SimCloudManager implements SolrCloudManager {
   }
 
   /**
+   * Get the source of randomness (usually initialized by the test suite).
+   */
+  public Random getRandom() {
+    return RandomizedContext.current().getRandom();
+  }
+
+  /**
    * Add a new node and initialize its node values (metrics). The
    * /live_nodes list is updated with the new node id.
    * @return new node id

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4a1ee8e1/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java
index f261745..3dd26e9 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java
@@ -232,6 +232,14 @@ public class SimClusterStateProvider implements ClusterStateProvider {
 
   /**
    * Get random node id.
+   * @return one of the live nodes
+   */
+  public String simGetRandomNode() {
+    return simGetRandomNode(cloudManager.getRandom());
+  }
+
+  /**
+   * Get random node id.
    * @param random instance of random.
    * @return one of the live nodes
    */
@@ -637,7 +645,7 @@ public class SimClusterStateProvider implements ClusterStateProvider {
           log.trace("-- no replicas in {} / {}", dc.getName(), s.getName());
           return;
         }
-        log.debug("-- submit leader election for {} / {}", dc.getName(), s.getName());
+        log.trace("-- submit leader election for {} / {}", dc.getName(), s.getName());
         cloudManager.submit(() -> {
           simRunLeaderElection(dc.getName(), s, saveClusterState);
           return true;
@@ -652,7 +660,7 @@ public class SimClusterStateProvider implements ClusterStateProvider {
     if (leader == null || !liveNodes.contains(leader.getNodeName())) {
       log.trace("Running leader election for {} / {}", collection, s.getName());
       if (s.getReplicas().isEmpty()) { // no replicas - punt
-        log.debug("-- no replicas in {} / {}", collection, s.getName());
+        log.trace("-- no replicas in {} / {}", collection, s.getName());
         return;
       }
       ActionThrottle lt = getThrottle(collection, s.getName());
@@ -686,7 +694,7 @@ public class SimClusterStateProvider implements ClusterStateProvider {
           }
         });
         if (alreadyHasLeader.get()) {
-          log.debug("-- already has leader {} / {}: {}", collection, s.getName(), s);
+          log.trace("-- already has leader {} / {}: {}", collection, s.getName(), s);
           return;
         }
         if (active.isEmpty()) {
@@ -712,8 +720,8 @@ public class SimClusterStateProvider implements ClusterStateProvider {
         synchronized (ri) {
           ri.getVariables().put(ZkStateReader.LEADER_PROP, "true");
         }
+        log.trace("-- elected new leader for {} / {}: {}", collection, s.getName(), ri);
         stateChanged.set(true);
-        log.debug("-- elected new leader for " + collection + " / " + s.getName() + ": " + ri.getName());
       }
     } else {
       log.trace("-- already has leader for {} / {}", collection, s.getName());
@@ -1084,17 +1092,24 @@ public class SimClusterStateProvider implements ClusterStateProvider {
     sliceName.set(message.getStr(SHARD_ID_PROP));
     String splitKey = message.getStr("split.key");
 
+    ClusterState clusterState = getClusterState();
+    DocCollection collection = clusterState.getCollection(collectionName);
+    Slice parentSlice = SplitShardCmd.getParentSlice(clusterState, collectionName, sliceName, splitKey);
+    Replica leader = parentSlice.getLeader();
+    // XXX leader election may not have happened yet - should we require it?
+    if (leader == null) {
+      throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Shard " + collectionName +
+          " /  " + sliceName.get() + " has no leader and can't be split");
+    }
     // start counting buffered updates and
-    // always invalidate cached collection states to get up-to-date metrics
     Map<String, Object> props = sliceProperties.computeIfAbsent(collectionName, c -> new ConcurrentHashMap<>())
         .computeIfAbsent(sliceName.get(), ss -> new ConcurrentHashMap<>());
+    if (props.containsKey(BUFFERED_UPDATES)) {
+      log.trace("--- SOLR-12729: Overlapping splitShard commands for {} / {}", collectionName, sliceName.get());
+      return;
+    }
     props.put(BUFFERED_UPDATES, new AtomicLong());
 
-    collectionsStatesRef.set(null);
-
-    ClusterState clusterState = getClusterState();
-    DocCollection collection = clusterState.getCollection(collectionName);
-    Slice parentSlice = SplitShardCmd.getParentSlice(clusterState, collectionName, sliceName, splitKey);
     List<DocRouter.Range> subRanges = new ArrayList<>();
     List<String> subSlices = new ArrayList<>();
     List<String> subShardNames = new ArrayList<>();
@@ -1115,12 +1130,7 @@ public class SimClusterStateProvider implements ClusterStateProvider {
     if (sessionWrapper != null) sessionWrapper.release();
 
     // adjust numDocs / deletedDocs / maxDoc
-    Replica leader = parentSlice.getLeader();
-    // XXX leader election may not have happened yet - should we require it?
-    if (leader == null) {
-      leader = parentSlice.getReplicas().iterator().next();
-    }
-    String numDocsStr = leader.getStr("SEARCHER.searcher.numDocs", "0");
+    String numDocsStr = String.valueOf(getReplicaInfo(leader).getVariable("SEARCHER.searcher.numDocs", "0"));
     long numDocs = Long.parseLong(numDocsStr);
     long newNumDocs = numDocs / subSlices.size();
     long remainderDocs = numDocs % subSlices.size();
@@ -1128,6 +1138,18 @@ public class SimClusterStateProvider implements ClusterStateProvider {
     long remainderIndexSize = SimCloudManager.DEFAULT_IDX_SIZE_BYTES + remainderDocs * DEFAULT_DOC_SIZE_BYTES;
     String remainderSlice = null;
 
+    // add slice props
+    for (int i = 0; i < subRanges.size(); i++) {
+      String subSlice = subSlices.get(i);
+      DocRouter.Range range = subRanges.get(i);
+      Map<String, Object> sliceProps = sliceProperties.computeIfAbsent(collectionName, c -> new ConcurrentHashMap<>())
+          .computeIfAbsent(subSlice, ss -> new ConcurrentHashMap<>());
+      sliceProps.put(Slice.RANGE, range);
+      sliceProps.put(Slice.PARENT, sliceName.get());
+      sliceProps.put(ZkStateReader.STATE_PROP, Slice.State.CONSTRUCTION.toString());
+      sliceProps.put(ZkStateReader.STATE_TIMESTAMP_PROP, String.valueOf(cloudManager.getTimeSource().getEpochTimeNs()));
+    }
+    // add replicas
     for (ReplicaPosition replicaPosition : replicaPositions) {
       String subSliceName = replicaPosition.shard;
       String subShardNodeName = replicaPosition.node;
@@ -1158,25 +1180,27 @@ public class SimClusterStateProvider implements ClusterStateProvider {
           solrCoreName, collectionName, replicaPosition.shard, replicaPosition.type, subShardNodeName, replicaProps);
       simAddReplica(replicaPosition.node, ri, false);
     }
-    // add slice props
-    for (int i = 0; i < subRanges.size(); i++) {
-      String subSlice = subSlices.get(i);
-      DocRouter.Range range = subRanges.get(i);
-      Map<String, Object> sliceProps = sliceProperties.computeIfAbsent(collectionName, c -> new ConcurrentHashMap<>())
-          .computeIfAbsent(subSlice, ss -> new ConcurrentHashMap<>());
-      sliceProps.put(Slice.RANGE, range);
-      sliceProps.put(Slice.PARENT, sliceName.get());
-      sliceProps.put(ZkStateReader.STATE_PROP, Slice.State.CONSTRUCTION.toString());
-      sliceProps.put(ZkStateReader.STATE_TIMESTAMP_PROP, String.valueOf(cloudManager.getTimeSource().getEpochTimeNs()));
-    }
-    collectionsStatesRef.set(null);
     simRunLeaderElection(Collections.singleton(collectionName), true);
 
-    CloudTestUtils.waitForState(cloudManager, collectionName, 20, TimeUnit.SECONDS,
-        CloudTestUtils.clusterShape(collection.getSlices().size() + subShardNames.size(),
-            repFactor, true, false));
+    // delay it once again to better simulate replica recoveries
+    //opDelay(collectionName, CollectionParams.CollectionAction.SPLITSHARD.name());
+
+    CloudTestUtils.waitForState(cloudManager, collectionName, 20, TimeUnit.SECONDS, (liveNodes, state) -> {
+      for (String subSlice : subSlices) {
+        Slice s = state.getSlice(subSlice);
+        if (s.getLeader() == null) {
+          log.debug("** no leader in {} / {}", collectionName, s);
+          return false;
+        }
+        if (s.getReplicas().size() < repFactor) {
+          log.debug("** expected {} repFactor but there are {} replicas", repFactor, s.getReplicas().size());
+          return false;
+        }
+      }
+      return true;
+    });
     // mark the new slices as active and the old slice as inactive
-    log.debug("-- switching slice states after split shard: collection={}, parent={}, subSlices={}", collectionName,
+    log.trace("-- switching slice states after split shard: collection={}, parent={}, subSlices={}", collectionName,
         sliceName.get(), subSlices);
     lock.lockInterruptibly();
     try {
@@ -1216,9 +1240,6 @@ public class SimClusterStateProvider implements ClusterStateProvider {
     } finally {
       lock.unlock();
     }
-//    cloudManager.submit(() -> {
-//      return true;
-//    });
     results.add("success", "");
 
   }
@@ -1321,158 +1342,176 @@ public class SimClusterStateProvider implements ClusterStateProvider {
       }
     }
 
-    boolean modified = false;
-
-    lock.lockInterruptibly();
-    try {
-      // always reset first to get the current metrics - it's easier than to keep matching
-      // Replica with ReplicaInfo where the current real counts are stored
-      collectionsStatesRef.set(null);
-      DocCollection coll = getClusterState().getCollection(collection);
-      DocRouter router = coll.getRouter();
-      List<String> deletes = req.getDeleteById();
-      if (deletes != null && !deletes.isEmpty()) {
-        for (String id : deletes) {
-          Slice s = router.getTargetSlice(id, null, null, req.getParams(), coll);
-          // NOTE: we don't use getProperty because it uses PROPERTY_PROP_PREFIX
+    DocCollection coll = getClusterState().getCollection(collection);
+    DocRouter router = coll.getRouter();
+    List<String> deletes = req.getDeleteById();
+    if (deletes != null && !deletes.isEmpty()) {
+      for (String id : deletes) {
+        Slice s = router.getTargetSlice(id, null, null, req.getParams(), coll);
+        // NOTE: we don't use getProperty because it uses PROPERTY_PROP_PREFIX
+        Replica leader = s.getLeader();
+        if (leader == null) {
+          log.debug("-- no leader in " + s);
+          continue;
+        }
+        cloudManager.getMetricManager().registry(createRegistryName(collection, s.getName(), leader)).counter("UPDATE./update.requests").inc();
+        ReplicaInfo ri = getReplicaInfo(leader);
+        Number numDocs = (Number)ri.getVariable("SEARCHER.searcher.numDocs");
+        if (numDocs == null || numDocs.intValue() <= 0) {
+          log.debug("-- attempting to delete nonexistent doc " + id + " from " + s.getLeader());
+          continue;
+        }
+        AtomicLong bufferedUpdates = (AtomicLong)s.getProperties().get(BUFFERED_UPDATES);
+        if (bufferedUpdates != null) {
+          if (bufferedUpdates.get() > 0) {
+            bufferedUpdates.decrementAndGet();
+          } else {
+            log.debug("-- attempting to delete nonexistent buffered doc " + id + " from " + s.getLeader());
+          }
+          continue;
+        }
+        lock.lockInterruptibly();
+        try {
+          simSetShardValue(collection, s.getName(), "SEARCHER.searcher.deletedDocs", 1, true, false);
+          simSetShardValue(collection, s.getName(), "SEARCHER.searcher.numDocs", -1, true, false);
+          Number indexSize = (Number)ri.getVariable(Type.CORE_IDX.metricsAttribute);
+          if (indexSize != null && indexSize.longValue() > SimCloudManager.DEFAULT_IDX_SIZE_BYTES) {
+            indexSize = indexSize.longValue() - DEFAULT_DOC_SIZE_BYTES;
+            simSetShardValue(collection, s.getName(), Type.CORE_IDX.metricsAttribute,
+                new AtomicLong(indexSize.longValue()), false, false);
+            simSetShardValue(collection, s.getName(), Variable.coreidxsize,
+                new AtomicDouble((Double)Type.CORE_IDX.convertVal(indexSize)), false, false);
+          } else {
+            throw new Exception("unexpected indexSize ri=" + ri);
+          }
+        } catch (Exception e) {
+          throw new IOException(e);
+        } finally {
+          lock.unlock();
+        }
+      }
+    }
+    deletes = req.getDeleteQuery();
+    if (deletes != null && !deletes.isEmpty()) {
+      for (String q : deletes) {
+        if (!"*:*".equals(q)) {
+          throw new UnsupportedOperationException("Only '*:*' query is supported in deleteByQuery");
+        }
+        for (Slice s : coll.getSlices()) {
           Replica leader = s.getLeader();
           if (leader == null) {
             log.debug("-- no leader in " + s);
             continue;
           }
+
           cloudManager.getMetricManager().registry(createRegistryName(collection, s.getName(), leader)).counter("UPDATE./update.requests").inc();
           ReplicaInfo ri = getReplicaInfo(leader);
           Number numDocs = (Number)ri.getVariable("SEARCHER.searcher.numDocs");
-          if (numDocs == null || numDocs.intValue() <= 0) {
-            log.debug("-- attempting to delete nonexistent doc " + id + " from " + s.getLeader());
-            continue;
-          }
-          AtomicLong bufferedUpdates = (AtomicLong)s.getProperties().get(BUFFERED_UPDATES);
-          if (bufferedUpdates != null) {
-            if (bufferedUpdates.get() > 0) {
-              bufferedUpdates.decrementAndGet();
-            } else {
-              log.debug("-- attempting to delete nonexistent buffered doc " + id + " from " + s.getLeader());
-            }
+          if (numDocs == null || numDocs.intValue() == 0) {
             continue;
           }
-          modified = true;
+          lock.lockInterruptibly();
           try {
-            simSetShardValue(collection, s.getName(), "SEARCHER.searcher.deletedDocs", 1, true, false);
-            simSetShardValue(collection, s.getName(), "SEARCHER.searcher.numDocs", -1, true, false);
-            Number indexSize = (Number)ri.getVariable(Type.CORE_IDX.metricsAttribute);
-            if (indexSize != null && indexSize.longValue() > SimCloudManager.DEFAULT_IDX_SIZE_BYTES) {
-              indexSize = indexSize.longValue() - DEFAULT_DOC_SIZE_BYTES;
-              simSetShardValue(collection, s.getName(), Type.CORE_IDX.metricsAttribute,
-                  new AtomicLong(indexSize.longValue()), false, false);
-              simSetShardValue(collection, s.getName(), Variable.coreidxsize,
-                  new AtomicDouble((Double)Type.CORE_IDX.convertVal(indexSize)), false, false);
-            } else {
-              throw new Exception("unexpected indexSize ri=" + ri);
-            }
+            simSetShardValue(collection, s.getName(), "SEARCHER.searcher.deletedDocs", new AtomicLong(numDocs.longValue()), false, false);
+            simSetShardValue(collection, s.getName(), "SEARCHER.searcher.numDocs", new AtomicLong(0), false, false);
+            simSetShardValue(collection, s.getName(), Type.CORE_IDX.metricsAttribute,
+                new AtomicLong(SimCloudManager.DEFAULT_IDX_SIZE_BYTES), false, false);
+            simSetShardValue(collection, s.getName(), Variable.coreidxsize,
+                new AtomicDouble((Double)Type.CORE_IDX.convertVal(SimCloudManager.DEFAULT_IDX_SIZE_BYTES)), false, false);
           } catch (Exception e) {
             throw new IOException(e);
+          } finally {
+            lock.unlock();
           }
         }
       }
-      deletes = req.getDeleteQuery();
-      if (deletes != null && !deletes.isEmpty()) {
-        for (String q : deletes) {
-          if (!"*:*".equals(q)) {
-            throw new UnsupportedOperationException("Only '*:*' query is supported in deleteByQuery");
-          }
-          for (Slice s : coll.getSlices()) {
-            Replica leader = s.getLeader();
-            if (leader == null) {
-              log.debug("-- no leader in " + s);
-              continue;
-            }
-
-            cloudManager.getMetricManager().registry(createRegistryName(collection, s.getName(), leader)).counter("UPDATE./update.requests").inc();
-            ReplicaInfo ri = getReplicaInfo(leader);
-            Number numDocs = (Number)ri.getVariable("SEARCHER.searcher.numDocs");
-            if (numDocs == null || numDocs.intValue() == 0) {
-              continue;
-            }
-            modified = true;
-            try {
-              simSetShardValue(collection, s.getName(), "SEARCHER.searcher.deletedDocs", new AtomicLong(numDocs.longValue()), false, false);
-              simSetShardValue(collection, s.getName(), "SEARCHER.searcher.numDocs", new AtomicLong(0), false, false);
-              simSetShardValue(collection, s.getName(), Type.CORE_IDX.metricsAttribute,
-                  new AtomicLong(SimCloudManager.DEFAULT_IDX_SIZE_BYTES), false, false);
-              simSetShardValue(collection, s.getName(), Variable.coreidxsize,
-                  new AtomicDouble((Double)Type.CORE_IDX.convertVal(SimCloudManager.DEFAULT_IDX_SIZE_BYTES)), false, false);
-            } catch (Exception e) {
-              throw new IOException(e);
-            }
-          }
-        }
+    }
+    List<SolrInputDocument> docs = req.getDocuments();
+    Iterator<SolrInputDocument> it;
+    if (docs != null) {
+      it = docs.iterator();
+    } else {
+      it = req.getDocIterator();
+    }
+    if (it != null) {
+      // this approach to updating counters and metrics drastically increases performance
+      // of bulk updates, because simSetShardValue is relatively costly
+
+      // also, skip the hash-based selection of slices in favor of a simple random
+      // start + round-robin assignment, because we don't keep individual id-s anyway
+      Map<String, AtomicLong> docUpdates = new HashMap<>();
+      Map<String, Map<String, AtomicLong>> metricUpdates = new HashMap<>();
+      Slice[] slices = coll.getActiveSlicesArr();
+      if (slices.length == 0) {
+        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Update sent to a collection without slices: " + coll);
       }
-      List<SolrInputDocument> docs = req.getDocuments();
-      Iterator<SolrInputDocument> it;
-      if (docs != null) {
-        it = docs.iterator();
-      } else {
-        it = req.getDocIterator();
+      // TODO: we don't use DocRouter so we should verify that active slices cover the whole hash range
+
+      long docCount = 0;
+      while (it.hasNext()) {
+        SolrInputDocument doc = it.next();
+        String id = (String) doc.getFieldValue("id");
+        if (id == null) {
+          throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Document without id: " + doc);
+        }
+        docCount++;
       }
-      if (it != null) {
-        // this approach to updating metrics drastically increases performance
-        // of bulk updates, because simSetShardValue is relatively costly
-        Map<String, AtomicLong> docUpdates = new HashMap<>();
-        Map<String, Map<String, AtomicLong>> metricUpdates = new HashMap<>();
-        while (it.hasNext()) {
-          SolrInputDocument doc = it.next();
-          String id = (String) doc.getFieldValue("id");
-          if (id == null) {
-            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Document without id: " + doc);
-          }
-          Slice s = router.getTargetSlice(id, null, null, req.getParams(), coll);
-          if (s.getState() != Slice.State.ACTIVE) {
-            log.debug("-- slice not active: {}", s);
-          }
-          Replica leader = s.getLeader();
-          if (leader == null) {
-            log.debug("-- no leader in " + s);
-            continue;
-          }
-          metricUpdates.computeIfAbsent(s.getName(), sh -> new HashMap<>())
-              .computeIfAbsent(leader.getCoreName(), cn -> new AtomicLong())
-              .incrementAndGet();
-          AtomicLong bufferedUpdates = (AtomicLong)s.getProperties().get(BUFFERED_UPDATES);
-          if (bufferedUpdates != null) {
-            bufferedUpdates.incrementAndGet();
-            continue;
-          }
-          modified = true;
-          docUpdates.computeIfAbsent(s.getName(), sh -> new AtomicLong())
-              .incrementAndGet();
+      long perSlice = docCount / slices.length;
+      long remainder = docCount % slices.length;
+      int initialSlice = cloudManager.getRandom().nextInt(slices.length);
+      for (int i = 0; i < slices.length; i++) {
+        long addDocs = perSlice;
+        if (i == 0) {
+          addDocs += remainder;
         }
-        docUpdates.forEach((sh, count) -> {
-          try {
-            simSetShardValue(collection, sh, "SEARCHER.searcher.numDocs", count.get(), true, false);
-            simSetShardValue(collection, sh, "SEARCHER.searcher.maxDoc", count.get(), true, false);
-            // for each new document increase the size by DEFAULT_DOC_SIZE_BYTES
-            simSetShardValue(collection, sh, Type.CORE_IDX.metricsAttribute,
-                DEFAULT_DOC_SIZE_BYTES * count.get(), true, false);
-            simSetShardValue(collection, sh, Variable.coreidxsize,
-                Type.CORE_IDX.convertVal(DEFAULT_DOC_SIZE_BYTES * count.get()), true, false);
-          } catch (Exception e) {
-            throw new RuntimeException(e);
-          }
-        });
-        metricUpdates.forEach((sh, cores) -> {
-          cores.forEach((core, count) -> {
-            String registry = SolrMetricManager.getRegistryName(SolrInfoBean.Group.core, collection, sh,
-                Utils.parseMetricsReplicaName(collection, core));
-            cloudManager.getMetricManager().registry(registry).counter("UPDATE./update.requests").inc(count.get());
-          });
-        });
+        int sliceNum = (initialSlice + i) % slices.length;
+        Slice s = slices[sliceNum];
+        if (s.getState() != Slice.State.ACTIVE) {
+          log.debug("-- slice not active: {}", s);
+        }
+        Replica leader = s.getLeader();
+        if (leader == null) {
+          log.debug("-- no leader in " + s);
+          continue;
+        }
+        metricUpdates.computeIfAbsent(s.getName(), sh -> new HashMap<>())
+            .computeIfAbsent(leader.getCoreName(), cn -> new AtomicLong())
+            .addAndGet(addDocs);
+        AtomicLong bufferedUpdates = (AtomicLong)s.getProperties().get(BUFFERED_UPDATES);
+        if (bufferedUpdates != null) {
+          bufferedUpdates.addAndGet(addDocs);
+          continue;
+        }
+        docUpdates.computeIfAbsent(s.getName(), sh -> new AtomicLong())
+            .addAndGet(addDocs);
       }
-      if (modified) {
-        collectionsStatesRef.set(null);
+      if (docCount > 0) {
+        lock.lockInterruptibly();
+        try {
+          docUpdates.forEach((sh, count) -> {
+            try {
+              simSetShardValue(collection, sh, "SEARCHER.searcher.numDocs", count.get(), true, false);
+              simSetShardValue(collection, sh, "SEARCHER.searcher.maxDoc", count.get(), true, false);
+              // for each new document increase the size by DEFAULT_DOC_SIZE_BYTES
+              simSetShardValue(collection, sh, Type.CORE_IDX.metricsAttribute,
+                  DEFAULT_DOC_SIZE_BYTES * count.get(), true, false);
+              simSetShardValue(collection, sh, Variable.coreidxsize,
+                  Type.CORE_IDX.convertVal(DEFAULT_DOC_SIZE_BYTES * count.get()), true, false);
+            } catch (Exception e) {
+              throw new RuntimeException(e);
+            }
+          });
+          metricUpdates.forEach((sh, cores) -> {
+            cores.forEach((core, count) -> {
+              String registry = SolrMetricManager.getRegistryName(SolrInfoBean.Group.core, collection, sh,
+                  Utils.parseMetricsReplicaName(collection, core));
+              cloudManager.getMetricManager().registry(registry).counter("UPDATE./update.requests").inc(count.get());
+            });
+          });
+        } finally {
+          lock.unlock();
+        }
       }
-    } finally {
-      lock.unlock();
     }
     return new UpdateResponse();
   }
@@ -1643,8 +1682,15 @@ public class SimClusterStateProvider implements ClusterStateProvider {
    *               divided by the number of replicas.
    */
   public void simSetShardValue(String collection, String shard, String key, Object value, boolean delta, boolean divide) throws Exception {
-    List<ReplicaInfo> infos = colShardReplicaMap.computeIfAbsent(collection, c -> new ConcurrentHashMap<>())
-        .computeIfAbsent(shard, s -> new ArrayList<>());
+    final List<ReplicaInfo> infos;
+    if (shard == null) {
+      infos = new ArrayList<>();
+      colShardReplicaMap.computeIfAbsent(collection, c -> new ConcurrentHashMap<>())
+        .forEach((sh, replicas) -> infos.addAll(replicas));
+    } else {
+      infos = colShardReplicaMap.computeIfAbsent(collection, c -> new ConcurrentHashMap<>())
+          .computeIfAbsent(shard, s -> new ArrayList<>());
+    }
     if (infos.isEmpty()) {
       throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Collection " + collection + " doesn't exist (shard=" + shard + ").");
     }
@@ -1729,24 +1775,23 @@ public class SimClusterStateProvider implements ClusterStateProvider {
     }
   }
 
+  public List<ReplicaInfo> simGetReplicaInfos(String collection, String shard) {
+    List<ReplicaInfo> replicas = colShardReplicaMap.computeIfAbsent(collection, c -> new ConcurrentHashMap<>())
+        .computeIfAbsent(shard, s -> new ArrayList<>());
+    if (replicas == null) {
+      return Collections.emptyList();
+    } else {
+      // make a defensive copy to avoid ConcurrentModificationException
+      return Arrays.asList(replicas.toArray(new ReplicaInfo[replicas.size()]));
+    }
+  }
+
   /**
    * List collections.
    * @return list of existing collections.
    */
   public List<String> simListCollections() throws InterruptedException {
-    final Set<String> collections = new HashSet<>();
-    lock.lockInterruptibly();
-    try {
-      nodeReplicaMap.forEach((n, replicas) -> {
-        replicas.forEach(ri -> collections.add(ri.getCollection()));
-      });
-      // check collProps and sliceProps too
-      collProperties.forEach((coll, props) -> collections.add(coll));
-      sliceProperties.forEach((coll, slices) -> collections.add(coll));
-      return new ArrayList<>(collections);
-    } finally {
-      lock.unlock();
-    }
+    return new ArrayList<>(colShardReplicaMap.keySet());
   }
 
   // interface methods
@@ -1790,6 +1835,7 @@ public class SimClusterStateProvider implements ClusterStateProvider {
     lock.lock();
     collectionsStatesRef.set(null);
     saveClusterState.set(true);
+    log.debug("** creating new collection states");
     try {
       Map<String, Map<String, Map<String, Replica>>> collMap = new HashMap<>();
       nodeReplicaMap.forEach((n, replicas) -> {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4a1ee8e1/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimNodeStateProvider.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimNodeStateProvider.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimNodeStateProvider.java
index 9673fa7..5f9293b 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimNodeStateProvider.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimNodeStateProvider.java
@@ -233,6 +233,7 @@ public class SimNodeStateProvider implements NodeStateProvider {
   }
 
   private static final Pattern REGISTRY_PATTERN = Pattern.compile("^solr\\.core\\.([\\w.-_]+?)\\.(shard[\\d_]+?)\\.(replica.*)");
+  private static final Pattern METRIC_KEY_PATTERN = Pattern.compile("^metrics:([^:]+?):([^:]+?)(:([^:]+))?$");
   /**
    * Simulate getting replica metrics values. This uses per-replica properties set in
    * {@link SimClusterStateProvider#simSetCollectionValue(String, String, Object, boolean, boolean)} and
@@ -245,33 +246,31 @@ public class SimNodeStateProvider implements NodeStateProvider {
     if (!liveNodesSet.contains(node)) {
       throw new RuntimeException("non-live node " + node);
     }
-    List<ReplicaInfo> replicas = clusterStateProvider.simGetReplicaInfos(node);
-    if (replicas == null || replicas.isEmpty()) {
-      return Collections.emptyMap();
-    }
     Map<String, Object> values = new HashMap<>();
     for (String tag : tags) {
-      String[] parts = tag.split(":");
-      if (parts.length < 3 || !parts[0].equals("metrics")) {
+      Matcher m = METRIC_KEY_PATTERN.matcher(tag);
+      if (!m.matches() || m.groupCount() < 2) {
         log.warn("Invalid metrics: tag: " + tag);
         continue;
       }
-      if (!parts[1].startsWith("solr.core.")) {
+      String registryName = m.group(1);
+      String key = m.group(3) != null ? m.group(2) + m.group(3) : m.group(2);
+      if (!registryName.startsWith("solr.core.")) {
         // skip - this is probably solr.node or solr.jvm metric
         continue;
       }
-      Matcher m = REGISTRY_PATTERN.matcher(parts[1]);
+      m = REGISTRY_PATTERN.matcher(registryName);
 
       if (!m.matches()) {
-        log.warn("Invalid registry name: " + parts[1]);
+        log.warn("Invalid registry name: " + registryName);
         continue;
       }
       String collection = m.group(1);
       String shard = m.group(2);
       String replica = m.group(3);
-      String key = parts.length > 3 ? parts[2] + ":" + parts[3] : parts[2];
+      List<ReplicaInfo> replicas = clusterStateProvider.simGetReplicaInfos(collection, shard);
       replicas.forEach(r -> {
-        if (r.getCollection().equals(collection) && r.getShard().equals(shard) && r.getCore().endsWith(replica)) {
+        if (r.getNode().equals(node) && r.getCore().endsWith(replica)) {
           Object value = r.getVariables().get(key);
           if (value != null) {
             values.put(tag, value);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4a1ee8e1/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimAutoScaling.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimAutoScaling.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimAutoScaling.java
index dbb9785..c564fec 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimAutoScaling.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimAutoScaling.java
@@ -3,6 +3,7 @@ package org.apache.solr.cloud.autoscaling.sim;
 import java.lang.invoke.MethodHandles;
 import java.util.Iterator;
 
+import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
 import org.apache.solr.client.solrj.SolrClient;
 import org.apache.solr.client.solrj.SolrRequest;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
@@ -24,7 +25,9 @@ import static org.apache.solr.cloud.autoscaling.AutoScalingHandlerTest.createAut
 /**
  *
  */
-@LogLevel("org.apache.solr.cloud.autoscaling=DEBUG;org.apache.solr.cloud.autoscaling.NodeLostTrigger=INFO;org.apache.client.solrj.cloud.autoscaling=DEBUG")
+@TimeoutSuite(millis = 48 * 3600 * 1000)
+@LogLevel("org.apache.solr.cloud.autoscaling=DEBUG;org.apache.solr.cloud.autoscaling.NodeLostTrigger=INFO;org.apache.client.solrj.cloud.autoscaling=DEBUG;org.apache.solr.cloud.autoscaling.ComputePlanAction=INFO;org.apache.solr.cloud.autoscaling.ExecutePlanAction=INFO;org.apache.solr.cloud.autoscaling.ScheduledTriggers=INFO")
+//@LogLevel("org.apache.solr.cloud.autoscaling=DEBUG;org.apache.solr.cloud.autoscaling.NodeLostTrigger=INFO;org.apache.client.solrj.cloud.autoscaling=DEBUG;org.apache.solr.cloud.CloudTestUtils=TRACE")
 public class TestSimAutoScaling extends SimSolrCloudTestCase {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
@@ -35,8 +38,7 @@ public class TestSimAutoScaling extends SimSolrCloudTestCase {
 
   @BeforeClass
   public static void setupCluster() throws Exception {
-    // 20 mln docs / node
-    configureCluster(50, TimeSource.get("simTime:" + SPEED));
+    configureCluster(500, TimeSource.get("simTime:" + SPEED));
     timeSource = cluster.getTimeSource();
     solrClient = cluster.simGetSolrClient();
   }
@@ -56,7 +58,7 @@ public class TestSimAutoScaling extends SimSolrCloudTestCase {
         "'name' : 'scaleUpTrigger'," +
         "'event' : 'indexSize'," +
         "'waitFor' : '" + waitForSeconds + "s'," +
-        "'aboveDocs' : 5000000," +
+        "'aboveDocs' : 10000000," +
         "'enabled' : true," +
         "'actions' : [{'name' : 'compute_plan', 'class' : 'solr.ComputePlanAction'}," +
         "{'name' : 'execute_plan', 'class' : '" + ExecutePlanAction.class.getName() + "'}]" +
@@ -65,9 +67,9 @@ public class TestSimAutoScaling extends SimSolrCloudTestCase {
     NamedList<Object> response = solrClient.request(req);
     assertEquals(response.get("result").toString(), "success");
 
-    long batchSize = 250000;
-    for (long i = 0; i < 10000; i++) {
-      log.info("#### Total docs so far: " + (i * batchSize));
+    long batchSize = 4000000;
+    for (long i = 0; i < 100000; i++) {
+      log.info(String.format("#### Total docs so far: %,d", (i * batchSize)));
       addDocs(collectionName, i * batchSize, batchSize);
       timeSource.sleep(waitForSeconds);
     }
@@ -81,12 +83,13 @@ public class TestSimAutoScaling extends SimSolrCloudTestCase {
   }
 
   // lightweight generator of fake documents
+  // NOTE: this iterator only ever returns the same document, which works ok
+  // for our "index update" simulation. Obviously don't use this for real indexing.
   private static class FakeDocIterator implements Iterator<SolrInputDocument> {
     final SolrInputDocument doc = new SolrInputDocument();
     final SolrInputField idField = new SolrInputField("id");
 
     final long start, count;
-    final StringBuilder sb = new StringBuilder("id-");
 
     long current, max;
 
@@ -95,6 +98,7 @@ public class TestSimAutoScaling extends SimSolrCloudTestCase {
       this.count = count;
       current = start;
       max = start + count;
+      idField.setValue("foo");
       doc.put("id", idField);
     }
 
@@ -105,8 +109,6 @@ public class TestSimAutoScaling extends SimSolrCloudTestCase {
 
     @Override
     public SolrInputDocument next() {
-      sb.setLength(3);
-      idField.setValue(sb.append(Long.toString(current)).toString());
       current++;
       return doc;
     }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4a1ee8e1/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimExecutePlanAction.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimExecutePlanAction.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimExecutePlanAction.java
index d0d08fd..9a1d63e 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimExecutePlanAction.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimExecutePlanAction.java
@@ -92,7 +92,7 @@ public class TestSimExecutePlanAction extends SimSolrCloudTestCase {
     log.info("Collection ready after " + CloudTestUtils.waitForState(cluster, collectionName, 120, TimeUnit.SECONDS,
         CloudTestUtils.clusterShape(1, 2, false, true)) + "ms");
 
-    String sourceNodeName = cluster.getSimClusterStateProvider().simGetRandomNode(random());
+    String sourceNodeName = cluster.getSimClusterStateProvider().simGetRandomNode();
     ClusterState clusterState = cluster.getClusterStateProvider().getClusterState();
     DocCollection docCollection = clusterState.getCollection(collectionName);
     List<Replica> replicas = docCollection.getReplicas(sourceNodeName);
@@ -181,7 +181,7 @@ public class TestSimExecutePlanAction extends SimSolrCloudTestCase {
     CloudTestUtils.waitForState(cluster, "Timed out waiting for replicas of new collection to be active",
         collectionName, CloudTestUtils.clusterShape(1, 2, false, true));
 
-    String sourceNodeName = cluster.getSimClusterStateProvider().simGetRandomNode(random());
+    String sourceNodeName = cluster.getSimClusterStateProvider().simGetRandomNode();
     ClusterState clusterState = cluster.getClusterStateProvider().getClusterState();
     DocCollection docCollection = clusterState.getCollection(collectionName);
     List<Replica> replicas = docCollection.getReplicas(sourceNodeName);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4a1ee8e1/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimNodeLostTrigger.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimNodeLostTrigger.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimNodeLostTrigger.java
index 4ad0623..c1c5f4c 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimNodeLostTrigger.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimNodeLostTrigger.java
@@ -131,7 +131,7 @@ public class TestSimNodeLostTrigger extends SimSolrCloudTestCase {
       trigger.setProcessor(noFirstRunProcessor);
       trigger.run();
 
-      String lostNode = cluster.getSimClusterStateProvider().simGetRandomNode(random());
+      String lostNode = cluster.getSimClusterStateProvider().simGetRandomNode();
       cluster.simRemoveNode(lostNode, false);
       AtomicBoolean fired = new AtomicBoolean(false);
       trigger.setProcessor(event -> {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4a1ee8e1/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimPolicyCloud.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimPolicyCloud.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimPolicyCloud.java
index c964e44..7e9da4e 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimPolicyCloud.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimPolicyCloud.java
@@ -109,7 +109,7 @@ public class TestSimPolicyCloud extends SimSolrCloudTestCase {
 
   public void testCreateCollectionAddReplica() throws Exception  {
     SolrClient solrClient = cluster.simGetSolrClient();
-    String nodeId = cluster.getSimClusterStateProvider().simGetRandomNode(random());
+    String nodeId = cluster.getSimClusterStateProvider().simGetRandomNode();
 
     int port = (Integer)cluster.getSimNodeStateProvider().simGetNodeValue(nodeId, ImplicitSnitch.PORT);
 
@@ -134,13 +134,13 @@ public class TestSimPolicyCloud extends SimSolrCloudTestCase {
 
   public void testCreateCollectionSplitShard() throws Exception  {
     SolrClient solrClient = cluster.simGetSolrClient();
-    String firstNode = cluster.getSimClusterStateProvider().simGetRandomNode(random());
+    String firstNode = cluster.getSimClusterStateProvider().simGetRandomNode();
     int firstNodePort = (Integer)cluster.getSimNodeStateProvider().simGetNodeValue(firstNode, ImplicitSnitch.PORT);
 
     String secondNode;
     int secondNodePort;
     while (true)  {
-      secondNode = cluster.getSimClusterStateProvider().simGetRandomNode(random());
+      secondNode = cluster.getSimClusterStateProvider().simGetRandomNode();
       secondNodePort = (Integer)cluster.getSimNodeStateProvider().simGetNodeValue(secondNode, ImplicitSnitch.PORT);
       if (secondNodePort != firstNodePort)  break;
     }
@@ -292,7 +292,7 @@ public class TestSimPolicyCloud extends SimSolrCloudTestCase {
 
   public void testCreateCollectionAddShardUsingPolicy() throws Exception {
     SolrClient solrClient = cluster.simGetSolrClient();
-    String nodeId = cluster.getSimClusterStateProvider().simGetRandomNode(random());
+    String nodeId = cluster.getSimClusterStateProvider().simGetRandomNode();
     int port = (Integer)cluster.getSimNodeStateProvider().simGetNodeValue(nodeId, ImplicitSnitch.PORT);
 
     String commands =  "{set-policy :{c1 : [{replica:1 , shard:'#EACH', port: '" + port + "'}]}}";
@@ -343,7 +343,7 @@ public class TestSimPolicyCloud extends SimSolrCloudTestCase {
       assertTrue("sysLoadAvg value is " + ((Number) val.get("sysLoadAvg")).doubleValue(), Double.compare(((Number) val.get("sysLoadAvg")).doubleValue(), 0.0d) > 0);
     }
     // simulator doesn't have Overseer, so just pick a random node
-    String overseerNode = cluster.getSimClusterStateProvider().simGetRandomNode(random());
+    String overseerNode = cluster.getSimClusterStateProvider().simGetRandomNode();
     solrClient.request(CollectionAdminRequest.addRole(overseerNode, "overseer"));
     for (int i = 0; i < 10; i++) {
       Map<String, Object> data = Utils.getJson(cluster.getDistribStateManager(), ZkStateReader.ROLES);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4a1ee8e1/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimTriggerIntegration.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimTriggerIntegration.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimTriggerIntegration.java
index 81952af..53f26b9 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimTriggerIntegration.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimTriggerIntegration.java
@@ -479,7 +479,7 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
       fail("The TriggerAction should have been created by now");
     }
 
-    String lostNodeName = cluster.getSimClusterStateProvider().simGetRandomNode(random());
+    String lostNodeName = cluster.getSimClusterStateProvider().simGetRandomNode();
     cluster.simRemoveNode(lostNodeName, false);
     boolean await = triggerFiredLatch.await(20000 / SPEED, TimeUnit.MILLISECONDS);
     assertTrue("The trigger did not fire at all", await);
@@ -647,7 +647,7 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
         "'actions' : [{'name':'test','class':'" + TestEventQueueAction.class.getName() + "'}]" +
         "}}";
 
-    String overseerLeader = cluster.getSimClusterStateProvider().simGetRandomNode(random());
+    String overseerLeader = cluster.getSimClusterStateProvider().simGetRandomNode();
 
     SolrRequest req = createAutoScalingRequest(SolrRequest.METHOD.POST, setTriggerCommand);
     NamedList<Object> response = solrClient.request(req);
@@ -805,7 +805,7 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
     SolrClient solrClient = cluster.simGetSolrClient();
 
     // pick overseer node
-    String overseerLeader = cluster.getSimClusterStateProvider().simGetRandomNode(random());
+    String overseerLeader = cluster.getSimClusterStateProvider().simGetRandomNode();
 
     // add a node
     String node = cluster.simAddNode();
@@ -864,7 +864,7 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
     response = solrClient.request(req);
     assertEquals(response.get("result").toString(), "success");
 
-    overseerLeader = cluster.getSimClusterStateProvider().simGetRandomNode(random());
+    overseerLeader = cluster.getSimClusterStateProvider().simGetRandomNode();
 
     // create another node
     log.info("====== ADD NODE 1");

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4a1ee8e1/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/ReplicaInfo.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/ReplicaInfo.java b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/ReplicaInfo.java
index 97f2521..ca83ad4 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/ReplicaInfo.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/ReplicaInfo.java
@@ -49,10 +49,12 @@ public class ReplicaInfo implements MapWriter {
     this.collection = coll;
     this.shard = shard;
     this.type = r.getType();
-    this.isLeader = r.getBool(LEADER_PROP, false);
+    boolean maybeLeader = r.getBool(LEADER_PROP, false);
     if (vals != null) {
       this.variables.putAll(vals);
+      maybeLeader = "true".equals(String.valueOf(vals.getOrDefault(LEADER_PROP, maybeLeader)));
     }
+    this.isLeader = maybeLeader;
     this.node = r.getNodeName();
   }