You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2018/08/27 13:15:26 UTC

[4/6] lucene-solr:branch_7x: SOLR-12669: Rename tests that use the autoscaling simulation framework.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b949f57f/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimClusterStateProvider.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimClusterStateProvider.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimClusterStateProvider.java
new file mode 100644
index 0000000..40ca91b
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimClusterStateProvider.java
@@ -0,0 +1,225 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.cloud.autoscaling.sim;
+
+import java.lang.invoke.MethodHandles;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig;
+import org.apache.solr.client.solrj.cloud.autoscaling.Preference;
+import org.apache.solr.client.solrj.cloud.autoscaling.ReplicaInfo;
+import org.apache.solr.client.solrj.cloud.SolrCloudManager;
+import org.apache.solr.client.solrj.embedded.JettySolrRunner;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.cloud.SolrCloudTestCase;
+import org.apache.solr.common.cloud.ClusterState;
+import org.apache.solr.common.cloud.DocCollection;
+import org.apache.solr.common.cloud.Replica;
+import org.apache.solr.common.cloud.Slice;
+import org.apache.solr.common.cloud.ZkStateReader;
+import org.apache.solr.common.cloud.rule.ImplicitSnitch;
+import org.apache.solr.common.params.CollectionAdminParams;
+import org.apache.solr.common.util.TimeSource;
+import org.apache.solr.common.util.Utils;
+import org.apache.zookeeper.Watcher;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * This test compares the cluster state of a real cluster and a simulated one.
+ */
+public class TestSimClusterStateProvider extends SolrCloudTestCase {
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  private static int NODE_COUNT = 3;
+  private static boolean simulated;
+
+  private static SolrCloudManager cloudManager;
+
+  private static Collection<String> liveNodes;
+  private static Map<String, Object> clusterProperties;
+  private static AutoScalingConfig autoScalingConfig;
+  private static Map<String, Map<String, Map<String, List<ReplicaInfo>>>> replicas;
+  private static Map<String, Map<String, Object>> nodeValues;
+  private static ClusterState realState;
+
+  // set up a real cluster as the source of test data
+  @BeforeClass
+  public static void setupCluster() throws Exception {
+    simulated = random().nextBoolean();
+    log.info("####### Using simulated components? " + simulated);
+
+    configureCluster(NODE_COUNT)
+        .addConfig("conf", configset("cloud-minimal"))
+        .configure();
+    CollectionAdminRequest.createCollection(CollectionAdminParams.SYSTEM_COLL, null, 1, 2, 0, 1)
+        .process(cluster.getSolrClient());
+    init();
+  }
+
+  @AfterClass
+  public static void closeCloudManager() throws Exception {
+    if (simulated && cloudManager != null) {
+      cloudManager.close();
+    }
+  }
+
+  private static void init() throws Exception {
+    SolrCloudManager realManager = cluster.getJettySolrRunner(cluster.getJettySolrRunners().size() - 1).getCoreContainer()
+        .getZkController().getSolrCloudManager();
+    liveNodes = realManager.getClusterStateProvider().getLiveNodes();
+    clusterProperties = realManager.getClusterStateProvider().getClusterProperties();
+    autoScalingConfig = realManager.getDistribStateManager().getAutoScalingConfig();
+    replicas = new HashMap<>();
+    nodeValues = new HashMap<>();
+    liveNodes.forEach(n -> {
+      replicas.put(n, realManager.getNodeStateProvider().getReplicaInfo(n, Collections.emptySet()));
+      nodeValues.put(n, realManager.getNodeStateProvider().getNodeValues(n, ImplicitSnitch.tags));
+    });
+    realState = realManager.getClusterStateProvider().getClusterState();
+
+    if (simulated) {
+      // initialize simulated provider
+      SimCloudManager simCloudManager = new SimCloudManager(TimeSource.get("simTime:10"));
+      simCloudManager.getSimClusterStateProvider().simSetClusterProperties(clusterProperties);
+      simCloudManager.getSimDistribStateManager().simSetAutoScalingConfig(autoScalingConfig);
+      nodeValues.forEach((n, values) -> {
+        try {
+          simCloudManager.getSimNodeStateProvider().simSetNodeValues(n, values);
+        } catch (InterruptedException e) {
+          fail("Interrupted:" + e);
+        }
+      });
+      simCloudManager.getSimClusterStateProvider().simSetClusterState(realState);
+      ClusterState simState = simCloudManager.getClusterStateProvider().getClusterState();
+      assertClusterStateEquals(realState, simState);
+      cloudManager = simCloudManager;
+    } else {
+      cloudManager = realManager;
+    }
+  }
+
+  private static void assertClusterStateEquals(ClusterState one, ClusterState two) {
+    assertEquals(one.getLiveNodes(), two.getLiveNodes());
+    assertEquals(one.getCollectionsMap().keySet(), two.getCollectionsMap().keySet());
+    one.forEachCollection(oneColl -> {
+      DocCollection twoColl = two.getCollection(oneColl.getName());
+      Map<String, Slice> oneSlices = oneColl.getSlicesMap();
+      Map<String, Slice> twoSlices = twoColl.getSlicesMap();
+      assertEquals(oneSlices.keySet(), twoSlices.keySet());
+      oneSlices.forEach((s, slice) -> {
+        Slice sTwo = twoSlices.get(s);
+        for (Replica oneReplica : slice.getReplicas()) {
+          Replica twoReplica = sTwo.getReplica(oneReplica.getName());
+          assertNotNull(twoReplica);
+          assertEquals(oneReplica, twoReplica);
+        }
+      });
+    });
+  }
+
+  private String addNode() throws Exception {
+    JettySolrRunner solr = cluster.startJettySolrRunner();
+    String nodeId = solr.getNodeName();
+    if (simulated) {
+      ((SimCloudManager) cloudManager).getSimClusterStateProvider().simAddNode(nodeId);
+    }
+    return nodeId;
+  }
+
+  private String deleteNode() throws Exception {
+    String nodeId = cluster.getJettySolrRunner(0).getNodeName();
+    cluster.stopJettySolrRunner(0);
+    if (simulated) {
+      ((SimCloudManager) cloudManager).getSimClusterStateProvider().simRemoveNode(nodeId);
+    }
+    return nodeId;
+  }
+
+  private void setAutoScalingConfig(AutoScalingConfig cfg) throws Exception {
+    cluster.getJettySolrRunner(0).getCoreContainer().getZkController().getZkClient().setData(ZkStateReader.SOLR_AUTOSCALING_CONF_PATH,
+        Utils.toJSON(cfg), -1, true);
+    if (simulated) {
+      ((SimCloudManager) cloudManager).getSimDistribStateManager().simSetAutoScalingConfig(cfg);
+    }
+  }
+
+  @Test
+  public void testAddRemoveNode() throws Exception {
+    Set<String> lastNodes = new HashSet<>(cloudManager.getClusterStateProvider().getLiveNodes());
+    List<String> liveNodes = cloudManager.getDistribStateManager().listData(ZkStateReader.LIVE_NODES_ZKNODE);
+    assertEquals(lastNodes.size(), liveNodes.size());
+    liveNodes.removeAll(lastNodes);
+    assertTrue(liveNodes.isEmpty());
+
+    String node = addNode();
+    cloudManager.getTimeSource().sleep(2000);
+    assertFalse(lastNodes.contains(node));
+    lastNodes = new HashSet<>(cloudManager.getClusterStateProvider().getLiveNodes());
+    assertTrue(lastNodes.contains(node));
+    liveNodes = cloudManager.getDistribStateManager().listData(ZkStateReader.LIVE_NODES_ZKNODE);
+    assertEquals(lastNodes.size(), liveNodes.size());
+    liveNodes.removeAll(lastNodes);
+    assertTrue(liveNodes.isEmpty());
+
+    node = deleteNode();
+    cloudManager.getTimeSource().sleep(2000);
+    assertTrue(lastNodes.contains(node));
+    lastNodes = new HashSet<>(cloudManager.getClusterStateProvider().getLiveNodes());
+    assertFalse(lastNodes.contains(node));
+    liveNodes = cloudManager.getDistribStateManager().listData(ZkStateReader.LIVE_NODES_ZKNODE);
+    assertEquals(lastNodes.size(), liveNodes.size());
+    liveNodes.removeAll(lastNodes);
+    assertTrue(liveNodes.isEmpty());  }
+
+  @Test
+  public void testAutoScalingConfig() throws Exception {
+    final CountDownLatch triggered = new CountDownLatch(1);
+    Watcher w = ev -> {
+      if (triggered.getCount() == 0) {
+        fail("already triggered once!");
+      }
+      triggered.countDown();
+    };
+    AutoScalingConfig cfg = cloudManager.getDistribStateManager().getAutoScalingConfig(w);
+    assertEquals(autoScalingConfig, cfg);
+    Preference p = new Preference(Collections.singletonMap("maximize", "freedisk"));
+    cfg = cfg.withPolicy(cfg.getPolicy().withClusterPreferences(Collections.singletonList(p)));
+    setAutoScalingConfig(cfg);
+    if (!triggered.await(10, TimeUnit.SECONDS)) {
+      fail("Watch should be triggered on update!");
+    }
+    AutoScalingConfig cfg1 = cloudManager.getDistribStateManager().getAutoScalingConfig(null);
+    assertEquals(cfg, cfg1);
+
+    // restore
+    setAutoScalingConfig(autoScalingConfig);
+    cfg1 = cloudManager.getDistribStateManager().getAutoScalingConfig(null);
+    assertEquals(autoScalingConfig, cfg1);
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b949f57f/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimComputePlanAction.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimComputePlanAction.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimComputePlanAction.java
new file mode 100644
index 0000000..719bb7b
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimComputePlanAction.java
@@ -0,0 +1,358 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.cloud.autoscaling.sim;
+
+import java.io.IOException;
+import java.lang.invoke.MethodHandles;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicReference;
+
+import org.apache.solr.client.solrj.SolrClient;
+import org.apache.solr.client.solrj.SolrRequest;
+import org.apache.solr.client.solrj.SolrResponse;
+import org.apache.solr.client.solrj.cloud.SolrCloudManager;
+import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventType;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.cloud.CloudTestUtils;
+import org.apache.solr.cloud.autoscaling.ActionContext;
+import org.apache.solr.cloud.autoscaling.ComputePlanAction;
+import org.apache.solr.cloud.autoscaling.ScheduledTriggers;
+import org.apache.solr.cloud.autoscaling.TriggerAction;
+import org.apache.solr.cloud.autoscaling.TriggerEvent;
+import org.apache.solr.cloud.autoscaling.TriggerValidationException;
+import org.apache.solr.common.cloud.ClusterState;
+import org.apache.solr.common.cloud.DocCollection;
+import org.apache.solr.common.cloud.Replica;
+import org.apache.solr.common.params.CollectionParams;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.TimeSource;
+import org.apache.solr.common.util.Utils;
+import org.apache.solr.core.SolrResourceLoader;
+import org.apache.solr.util.LogLevel;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.apache.solr.cloud.autoscaling.AutoScalingHandlerTest.createAutoScalingRequest;
+import static org.apache.solr.common.params.CollectionParams.CollectionAction.MOVEREPLICA;
+
+/**
+ * Test for {@link ComputePlanAction}
+ */
+@LogLevel("org.apache.solr.cloud.autoscaling=DEBUG;org.apache.solr.cloud.Overseer=DEBUG;org.apache.solr.cloud.overseer=DEBUG;org.apache.solr.client.solrj.cloud.autoscaling=DEBUG;")
+public class TestSimComputePlanAction extends SimSolrCloudTestCase {
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  private static final AtomicBoolean fired = new AtomicBoolean(false);
+  private static final int NODE_COUNT = 1;
+  private static CountDownLatch triggerFiredLatch = new CountDownLatch(1);
+  private static final AtomicReference<Map> actionContextPropsRef = new AtomicReference<>();
+  private static final AtomicReference<TriggerEvent> eventRef = new AtomicReference<>();
+
+  @BeforeClass
+  public static void setupCluster() throws Exception {
+    configureCluster(1, TimeSource.get("simTime:50"));
+  }
+
+  @Before
+  public void init() throws Exception {
+
+    fired.set(false);
+    triggerFiredLatch = new CountDownLatch(1);
+    actionContextPropsRef.set(null);
+
+    String setClusterPolicyCommand = "{" +
+        " 'set-cluster-policy': [" +
+        "      {'cores':'<10', 'node':'#ANY'}," +
+        "      {'replica':'<2', 'shard': '#EACH', 'node': '#ANY'}," +
+        "      {'nodeRole':'overseer', 'replica':0}" +
+        "    ]" +
+        "}";
+    SolrRequest req = createAutoScalingRequest(SolrRequest.METHOD.POST, setClusterPolicyCommand);
+    SolrResponse rsp = cluster.request(req);
+    NamedList<Object> response = rsp.getResponse();
+    assertEquals(response.get("result").toString(), "success");
+
+    String setClusterPreferencesCommand = "{" +
+        "'set-cluster-preferences': [" +
+        "{'minimize': 'cores'}," +
+        "{'maximize': 'freedisk','precision': 100}]" +
+        "}";
+    req = createAutoScalingRequest(SolrRequest.METHOD.POST, setClusterPreferencesCommand);
+    rsp = cluster.request(req);
+    response = rsp.getResponse();
+    assertEquals(response.get("result").toString(), "success");
+    cluster.getTimeSource().sleep(TimeUnit.SECONDS.toMillis(ScheduledTriggers.DEFAULT_COOLDOWN_PERIOD_SECONDS));
+  }
+
+  @After
+  public void printState() throws Exception {
+    log.info("-------------_ FINAL STATE --------------");
+    log.info("* Node values: " + Utils.toJSONString(cluster.getSimNodeStateProvider().simGetAllNodeValues()));
+    log.info("* Live nodes: " + cluster.getClusterStateProvider().getLiveNodes());
+    ClusterState state = cluster.getClusterStateProvider().getClusterState();
+    for (String coll: cluster.getSimClusterStateProvider().simListCollections()) {
+      log.info("* Collection " + coll + " state: " + state.getCollection(coll));
+    }
+
+  }
+
+  @Test
+  public void testNodeLost() throws Exception  {
+    // let's start a node so that we have at least two
+    String node = cluster.simAddNode();
+    AssertingTriggerAction.expectedNode = node;
+
+    SolrClient solrClient = cluster.simGetSolrClient();
+    String setTriggerCommand = "{" +
+        "'set-trigger' : {" +
+        "'name' : 'node_lost_trigger'," +
+        "'event' : 'nodeLost'," +
+        "'waitFor' : '7s'," +
+        "'enabled' : true," +
+        "'actions' : [{'name':'compute_plan', 'class' : 'solr.ComputePlanAction'}," +
+        "{'name':'test','class':'" + TestSimComputePlanAction.AssertingTriggerAction.class.getName() + "'}]" +
+        "}}";
+    SolrRequest req = createAutoScalingRequest(SolrRequest.METHOD.POST, setTriggerCommand);
+    NamedList<Object> response = solrClient.request(req);
+    assertEquals(response.get("result").toString(), "success");
+
+    CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection("testNodeLost",
+        "conf",1, 2);
+    create.process(solrClient);
+
+    CloudTestUtils.waitForState(cluster, "Timed out waiting for replicas of new collection to be active",
+        "testNodeLost", CloudTestUtils.clusterShape(1, 2, false, true));
+
+    ClusterState clusterState = cluster.getClusterStateProvider().getClusterState();
+    log.debug("-- cluster state: {}", clusterState);
+    DocCollection collection = clusterState.getCollection("testNodeLost");
+    List<Replica> replicas = collection.getReplicas(node);
+    assertNotNull(replicas);
+    assertFalse(replicas.isEmpty());
+
+    // start another node because because when the other node goes away, the cluster policy requires only
+    // 1 replica per node and none on the overseer
+    String node2 = cluster.simAddNode();
+    assertTrue(node2 + "is not live yet", cluster.getClusterStateProvider().getClusterState().liveNodesContain(node2) );
+
+    // stop the original node
+    cluster.simRemoveNode(node, false);
+    log.info("Stopped_node : {}", node);
+
+    assertTrue("Trigger was not fired even after 10 seconds", triggerFiredLatch.await(10, TimeUnit.SECONDS));
+    assertTrue(fired.get());
+    Map context = actionContextPropsRef.get();
+    assertNotNull(context);
+    List<SolrRequest> operations = (List<SolrRequest>) context.get("operations");
+    assertNotNull("The operations computed by ComputePlanAction should not be null , " + eventRef.get(), operations);
+    assertEquals("ComputePlanAction should have computed exactly 1 operation", 1, operations.size());
+    SolrRequest solrRequest = operations.get(0);
+    SolrParams params = solrRequest.getParams();
+    assertEquals("Expected MOVEREPLICA action after adding node", MOVEREPLICA, CollectionParams.CollectionAction.get(params.get("action")));
+    String replicaToBeMoved = params.get("replica");
+    assertEquals("Unexpected node in computed operation", replicas.get(0).getName(), replicaToBeMoved);
+
+    // shutdown the extra node that we had started
+    cluster.simRemoveNode(node2, false);
+  }
+
+  public void testNodeWithMultipleReplicasLost() throws Exception {
+    AssertingTriggerAction.expectedNode = null;
+
+    // start 3 more nodes
+    cluster.simAddNode();
+    cluster.simAddNode();
+    cluster.simAddNode();
+
+    SolrClient solrClient = cluster.simGetSolrClient();
+    String setTriggerCommand = "{" +
+        "'set-trigger' : {" +
+        "'name' : 'node_lost_trigger'," +
+        "'event' : 'nodeLost'," +
+        "'waitFor' : '1s'," +
+        "'enabled' : true," +
+        "'actions' : [{'name':'compute_plan', 'class' : 'solr.ComputePlanAction'}," +
+        "{'name':'test','class':'" + AssertingTriggerAction.class.getName() + "'}]" +
+        "}}";
+    SolrRequest req = createAutoScalingRequest(SolrRequest.METHOD.POST, setTriggerCommand);
+    NamedList<Object> response = solrClient.request(req);
+    assertEquals(response.get("result").toString(), "success");
+
+    CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection("testNodeWithMultipleReplicasLost",
+        "conf",2, 3);
+//    create.setMaxShardsPerNode(2);
+    create.process(solrClient);
+
+    CloudTestUtils.waitForState(cluster, "Timed out waiting for replicas of new collection to be active",
+        "testNodeWithMultipleReplicasLost", CloudTestUtils.clusterShape(2, 3, false, true));
+
+    ClusterState clusterState = cluster.getClusterStateProvider().getClusterState();
+    log.debug("-- cluster state: {}", clusterState);
+    DocCollection docCollection = clusterState.getCollection("testNodeWithMultipleReplicasLost");
+
+    // lets find a node with at least 2 replicas
+    String stoppedNodeName = null;
+    List<Replica> replicasToBeMoved = null;
+    for (String node : cluster.getClusterStateProvider().getLiveNodes()) {
+      List<Replica> replicas = docCollection.getReplicas(node);
+      if (replicas != null && replicas.size() == 2) {
+        stoppedNodeName = node;
+        replicasToBeMoved = replicas;
+        cluster.simRemoveNode(node, false);
+        break;
+      }
+    }
+    assertNotNull(stoppedNodeName);
+
+    assertTrue("Trigger was not fired even after 5 seconds", triggerFiredLatch.await(5, TimeUnit.SECONDS));
+    assertTrue(fired.get());
+
+    TriggerEvent triggerEvent = eventRef.get();
+    assertNotNull(triggerEvent);
+    assertEquals(TriggerEventType.NODELOST, triggerEvent.getEventType());
+    // TODO assertEquals(stoppedNodeName, triggerEvent.getProperty(TriggerEvent.NODE_NAME));
+
+    Map context = actionContextPropsRef.get();
+    assertNotNull(context);
+    List<SolrRequest> operations = (List<SolrRequest>) context.get("operations");
+    assertNotNull("The operations computed by ComputePlanAction should not be null " + actionContextPropsRef.get() + "\nevent: " + eventRef.get(), operations);
+    operations.forEach(solrRequest -> log.info(solrRequest.getParams().toString()));
+    assertEquals("ComputePlanAction should have computed exactly 2 operation", 2, operations.size());
+
+    for (SolrRequest solrRequest : operations) {
+      SolrParams params = solrRequest.getParams();
+      assertEquals("Expected MOVEREPLICA action after adding node", MOVEREPLICA, CollectionParams.CollectionAction.get(params.get("action")));
+      String moved = params.get("replica");
+      assertTrue(replicasToBeMoved.stream().anyMatch(replica -> replica.getName().equals(moved)));
+    }
+  }
+
+  @Test
+  //17-Aug-2018 commented @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 28-June-2018
+  public void testNodeAdded() throws Exception {
+    AssertingTriggerAction.expectedNode = null;
+    SolrClient solrClient = cluster.simGetSolrClient();
+    String setTriggerCommand = "{" +
+        "'set-trigger' : {" +
+        "'name' : 'node_added_trigger'," +
+        "'event' : 'nodeAdded'," +
+        "'waitFor' : '1s'," +
+        "'enabled' : true," +
+        "'actions' : [{'name':'compute_plan', 'class' : 'solr.ComputePlanAction'}," +
+        "{'name':'test','class':'" + TestSimComputePlanAction.AssertingTriggerAction.class.getName() + "'}]" +
+        "}}";
+    SolrRequest req = createAutoScalingRequest(SolrRequest.METHOD.POST, setTriggerCommand);
+    NamedList<Object> response = solrClient.request(req);
+    assertEquals(response.get("result").toString(), "success");
+
+    // the default policy limits 1 replica per node, we need more right now
+    String setClusterPolicyCommand = "{" +
+        " 'set-cluster-policy': [" +
+        "      {'cores':'<10', 'node':'#ANY'}," +
+        "      {'replica':'<5', 'shard': '#EACH', 'node': '#ANY'}," +
+        "      {'nodeRole':'overseer', 'replica':0}" +
+        "    ]" +
+        "}";
+    req = createAutoScalingRequest(SolrRequest.METHOD.POST, setClusterPolicyCommand);
+    response = solrClient.request(req);
+    assertEquals(response.get("result").toString(), "success");
+
+    CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection("testNodeAdded",
+        "conf",1, 4);
+    create.process(solrClient);
+
+    CloudTestUtils.waitForState(cluster, "Timed out waiting for replicas of new collection to be active",
+        "testNodeAdded", (liveNodes, collectionState) -> collectionState.getReplicas().stream().allMatch(replica -> replica.isActive(liveNodes)));
+
+    // reset to the original policy which has only 1 replica per shard per node
+    setClusterPolicyCommand = "{" +
+        " 'set-cluster-policy': [" +
+        "      {'cores':'<10', 'node':'#ANY'}," +
+        "      {'replica':'<3', 'shard': '#EACH', 'node': '#ANY'}," +
+        "      {'nodeRole':'overseer', 'replica':0}" +
+        "    ]" +
+        "}";
+    req = createAutoScalingRequest(SolrRequest.METHOD.POST, setClusterPolicyCommand);
+    response = solrClient.request(req);
+    assertEquals(response.get("result").toString(), "success");
+
+    // start a node so that the 'violation' created by the previous policy update is fixed
+    String newNode = cluster.simAddNode();
+    assertTrue("Trigger was not fired even after 5 seconds", triggerFiredLatch.await(5, TimeUnit.SECONDS));
+    assertTrue(fired.get());
+    Map context = actionContextPropsRef.get();
+    assertNotNull(context);
+    log.info("Node values: " + Utils.toJSONString(cluster.getSimNodeStateProvider().simGetAllNodeValues()));
+    log.info("Live nodes: " + cluster.getClusterStateProvider().getLiveNodes() + ", collection state: " + cluster.getClusterStateProvider().getClusterState().getCollection("testNodeAdded"));
+    List<SolrRequest> operations = (List<SolrRequest>) context.get("operations");
+    assertNotNull("The operations computed by ComputePlanAction should not be null" + context, operations);
+    assertEquals("ComputePlanAction should have computed exactly 1 operation, but was: " + operations, 1, operations.size());
+    SolrRequest request = operations.get(0);
+    SolrParams params = request.getParams();
+    assertEquals("Expected MOVEREPLICA action after adding node", MOVEREPLICA, CollectionParams.CollectionAction.get(params.get("action")));
+    String nodeAdded = params.get("targetNode");
+    assertEquals("Unexpected node in computed operation", newNode, nodeAdded);
+  }
+
+  public static class AssertingTriggerAction implements TriggerAction {
+    static String expectedNode;
+
+    @Override
+    public void configure(SolrResourceLoader loader, SolrCloudManager cloudManager, Map<String, Object> properties) throws TriggerValidationException {
+
+    }
+
+    @Override
+    public void init() {
+
+    }
+
+    @Override
+    public String getName() {
+      return null;
+    }
+
+    @Override
+    public void process(TriggerEvent event, ActionContext context) {
+      if (expectedNode != null) {
+        Collection nodes = (Collection) event.getProperty(TriggerEvent.NODE_NAMES);
+        if (nodes == null || !nodes.contains(expectedNode)) return;//this is not the event we are looking for
+      }
+      if (fired.compareAndSet(false, true)) {
+        eventRef.set(event);
+        actionContextPropsRef.set(context.getProperties());
+        triggerFiredLatch.countDown();
+      }
+    }
+
+    @Override
+    public void close() throws IOException {
+
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b949f57f/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimDistribStateManager.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimDistribStateManager.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimDistribStateManager.java
new file mode 100644
index 0000000..b161a15
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimDistribStateManager.java
@@ -0,0 +1,342 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.cloud.autoscaling.sim;
+
+import java.lang.invoke.MethodHandles;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Locale;
+import java.util.NoSuchElementException;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.client.solrj.cloud.autoscaling.AlreadyExistsException;
+import org.apache.solr.client.solrj.cloud.autoscaling.BadVersionException;
+import org.apache.solr.client.solrj.cloud.DistribStateManager;
+import org.apache.solr.client.solrj.cloud.autoscaling.NotEmptyException;
+import org.apache.solr.client.solrj.cloud.autoscaling.VersionedData;
+import org.apache.solr.client.solrj.impl.ZkDistribStateManager;
+import org.apache.solr.cloud.ZkTestServer;
+import org.apache.solr.common.cloud.SolrZkClient;
+import org.apache.zookeeper.CreateMode;
+import org.apache.zookeeper.WatchedEvent;
+import org.apache.zookeeper.Watcher;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * This test compares a ZK-based {@link DistribStateManager} to the simulated one.
+ */
+public class TestSimDistribStateManager extends SolrTestCaseJ4 {
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  private DistribStateManager stateManager;
+  private ZkTestServer zkTestServer;
+  private SolrZkClient solrZkClient;
+  private boolean simulated;
+  private SimDistribStateManager.Node root;
+
+  @Before
+  public void setup() throws Exception {
+    simulated = random().nextBoolean();
+    if (simulated) {
+      root = SimDistribStateManager.createNewRootNode();
+    } else {
+      zkTestServer = new ZkTestServer(createTempDir("zkDir").toString());
+      zkTestServer.run();
+    }
+    reInit();
+  }
+
+  private void reInit() throws Exception {
+    if (stateManager != null) {
+      stateManager.close();
+    }
+    if (simulated) {
+      stateManager = new SimDistribStateManager(root);
+    } else {
+      if (solrZkClient != null) {
+        solrZkClient.close();
+      }
+      solrZkClient = new SolrZkClient(zkTestServer.getZkHost(), 30000);
+      stateManager = new ZkDistribStateManager(solrZkClient);
+    }
+    log.info("Using " + stateManager.getClass().getName());
+  }
+
+  private DistribStateManager createDistribStateManager() {
+    if (simulated) {
+      return new SimDistribStateManager(root);
+    } else {
+      SolrZkClient cli = new SolrZkClient(zkTestServer.getZkHost(), 30000);
+      return new ZkDistribStateManager(cli);
+    }
+  }
+
+  private void destroyDistribStateManager(DistribStateManager mgr) throws Exception {
+    mgr.close();
+    if (mgr instanceof ZkDistribStateManager) {
+      ((ZkDistribStateManager)mgr).getZkClient().close();
+    }
+  }
+
+  @After
+  public void teardown() throws Exception {
+    if (solrZkClient != null) {
+      solrZkClient.close();
+      solrZkClient = null;
+    }
+    if (zkTestServer != null) {
+      zkTestServer.shutdown();
+      zkTestServer = null;
+    }
+    if (stateManager != null) {
+      stateManager.close();
+    }
+    stateManager = null;
+  }
+
+  @Test
+  public void testHasData() throws Exception {
+    assertFalse(stateManager.hasData("/hasData/foo"));
+    assertFalse(stateManager.hasData("/hasData/bar"));
+    try {
+      stateManager.createData("/hasData/foo", new byte[0], CreateMode.PERSISTENT);
+      fail("should have failed (parent /hasData doesn't exist)");
+    } catch (NoSuchElementException e) {
+      // expected
+    }
+    stateManager.makePath("/hasData");
+    stateManager.createData("/hasData/foo", new byte[0], CreateMode.PERSISTENT);
+    stateManager.createData("/hasData/bar", new byte[0], CreateMode.PERSISTENT);
+    assertTrue(stateManager.hasData("/hasData/foo"));
+    assertTrue(stateManager.hasData("/hasData/bar"));
+  }
+
+  @Test
+  public void testRemoveData() throws Exception {
+    assertFalse(stateManager.hasData("/removeData/foo"));
+    assertFalse(stateManager.hasData("/removeData/foo/bar"));
+    assertFalse(stateManager.hasData("/removeData/baz"));
+    assertFalse(stateManager.hasData("/removeData/baz/1/2/3"));
+    stateManager.makePath("/removeData/foo/bar");
+    stateManager.makePath("/removeData/baz/1/2/3");
+    assertTrue(stateManager.hasData("/removeData/foo"));
+    assertTrue(stateManager.hasData("/removeData/foo/bar"));
+    assertTrue(stateManager.hasData("/removeData/baz/1/2/3"));
+    try {
+      stateManager.removeData("/removeData/foo", -1);
+      fail("should have failed (node has children)");
+    } catch (NotEmptyException e) {
+      // expected
+    }
+    stateManager.removeData("/removeData/foo/bar", -1);
+    stateManager.removeData("/removeData/foo", -1);
+    // test recursive listing and removal
+    stateManager.removeRecursively("/removeData/baz/1", false, false);
+    assertFalse(stateManager.hasData("/removeData/baz/1/2"));
+    assertTrue(stateManager.hasData("/removeData/baz/1"));
+    // should silently ignore
+    stateManager.removeRecursively("/removeData/baz/1/2", true, true);
+    stateManager.removeRecursively("/removeData/baz/1", false, true);
+    assertFalse(stateManager.hasData("/removeData/baz/1"));
+    try {
+      stateManager.removeRecursively("/removeData/baz/1", false, true);
+      fail("should throw exception - missing path");
+    } catch (NoSuchElementException e) {
+      // expected
+    }
+    stateManager.removeRecursively("/removeData", true, true);
+    assertFalse(stateManager.hasData("/removeData"));
+  }
+
+  @Test
+  public void testListData() throws Exception {
+    assertFalse(stateManager.hasData("/listData/foo"));
+    assertFalse(stateManager.hasData("/listData/foo/bar"));
+    try {
+      stateManager.createData("/listData/foo/bar", new byte[0], CreateMode.PERSISTENT);
+      fail("should not succeed");
+    } catch (NoSuchElementException e) {
+      // expected
+    }
+    try {
+      stateManager.listData("/listData/foo");
+      fail("should not succeed");
+    } catch (NoSuchElementException e) {
+      // expected
+    }
+    stateManager.makePath("/listData");
+    List<String> kids = stateManager.listData("/listData");
+    assertEquals(0, kids.size());
+    stateManager.makePath("/listData/foo");
+    kids = stateManager.listData("/listData");
+    assertEquals(1, kids.size());
+    assertEquals("foo", kids.get(0));
+    stateManager.createData("/listData/foo/bar", new byte[0], CreateMode.PERSISTENT);
+    stateManager.createData("/listData/foo/baz", new byte[0], CreateMode.PERSISTENT);
+    kids = stateManager.listData("/listData/foo");
+    assertEquals(2, kids.size());
+    assertTrue(kids.contains("bar"));
+    assertTrue(kids.contains("baz"));
+    try {
+      stateManager.createData("/listData/foo/bar", new byte[0], CreateMode.PERSISTENT);
+      fail("should not succeed");
+    } catch (AlreadyExistsException e) {
+      // expected
+    }
+  }
+
+  static final byte[] firstData = new byte[] {
+      (byte)0xca, (byte)0xfe, (byte)0xba, (byte)0xbe
+  };
+
+  static final byte[] secondData = new byte[] {
+      (byte)0xbe, (byte)0xba, (byte)0xfe, (byte)0xca
+  };
+
+  @Test
+  public void testCreateMode() throws Exception {
+    stateManager.makePath("/createMode");
+    stateManager.createData("/createMode/persistent", firstData, CreateMode.PERSISTENT);
+    stateManager.createData("/createMode/persistent_seq", firstData, CreateMode.PERSISTENT);
+    for (int i = 0; i < 10; i++) {
+      stateManager.createData("/createMode/persistent_seq/data", firstData, CreateMode.PERSISTENT_SEQUENTIAL);
+    }
+    // check what happens with gaps
+    stateManager.createData("/createMode/persistent_seq/data", firstData, CreateMode.PERSISTENT_SEQUENTIAL);
+    stateManager.removeData("/createMode/persistent_seq/data" + String.format(Locale.ROOT, "%010d", 10), -1);
+    stateManager.createData("/createMode/persistent_seq/data", firstData, CreateMode.PERSISTENT_SEQUENTIAL);
+
+    stateManager.createData("/createMode/ephemeral", firstData, CreateMode.EPHEMERAL);
+    stateManager.createData("/createMode/ephemeral_seq", firstData, CreateMode.PERSISTENT);
+    for (int i = 0; i < 10; i++) {
+      stateManager.createData("/createMode/ephemeral_seq/data", firstData, CreateMode.EPHEMERAL_SEQUENTIAL);
+    }
+    assertTrue(stateManager.hasData("/createMode"));
+    assertTrue(stateManager.hasData("/createMode/persistent"));
+    assertTrue(stateManager.hasData("/createMode/ephemeral"));
+    List<String> kids = stateManager.listData("/createMode/persistent_seq");
+    assertEquals(11, kids.size());
+    kids = stateManager.listData("/createMode/ephemeral_seq");
+    assertEquals(10, kids.size());
+    for (int i = 0; i < 10; i++) {
+      assertTrue(stateManager.hasData("/createMode/persistent_seq/data" + String.format(Locale.ROOT, "%010d", i)));
+    }
+    assertFalse(stateManager.hasData("/createMode/persistent_seq/data" + String.format(Locale.ROOT, "%010d", 10)));
+    assertTrue(stateManager.hasData("/createMode/persistent_seq/data" + String.format(Locale.ROOT, "%010d", 11)));
+
+    for (int i = 0; i < 10; i++) {
+      assertTrue(stateManager.hasData("/createMode/ephemeral_seq/data" + String.format(Locale.ROOT, "%010d", i)));
+    }
+    // check that ephemeral nodes disappear on disconnect
+    reInit();
+    assertTrue(stateManager.hasData("/createMode/persistent"));
+    for (int i = 0; i < 10; i++) {
+      assertTrue(stateManager.hasData("/createMode/persistent_seq/data" + String.format(Locale.ROOT, "%010d", i)));
+    }
+    assertTrue(stateManager.hasData("/createMode/persistent_seq/data" + String.format(Locale.ROOT, "%010d", 11)));
+
+    assertFalse(stateManager.hasData("/createMode/ephemeral"));
+    assertTrue(stateManager.hasData("/createMode/ephemeral_seq"));
+    kids = stateManager.listData("/createMode/ephemeral_seq");
+    assertEquals(0, kids.size());
+  }
+
+  static class OnceWatcher implements Watcher {
+    CountDownLatch triggered = new CountDownLatch(1);
+    WatchedEvent event;
+
+    @Override
+    public void process(WatchedEvent event) {
+      if (triggered.getCount() == 0) {
+        fail("Watch was already triggered once!");
+      }
+      triggered.countDown();
+      this.event = event;
+    }
+  }
+
+  @Test
+  public void testGetSetRemoveData() throws Exception {
+    stateManager.makePath("/getData");
+    stateManager.createData("/getData/persistentData", firstData, CreateMode.PERSISTENT);
+    OnceWatcher nodeWatcher = new OnceWatcher();
+    VersionedData vd = stateManager.getData("/getData/persistentData", nodeWatcher);
+    assertNotNull(vd);
+    assertEquals(0, vd.getVersion());
+    assertTrue(Arrays.equals(firstData, vd.getData()));
+
+    // update data, test versioning
+    try {
+      stateManager.setData("/getData/persistentData", secondData, 1);
+      fail("should have failed");
+    } catch (BadVersionException e) {
+      // expected
+    }
+    // watch should not have fired
+    assertEquals(1, nodeWatcher.triggered.getCount());
+
+    stateManager.setData("/getData/persistentData", secondData, 0);
+    if (!nodeWatcher.triggered.await(5, TimeUnit.SECONDS)) {
+      fail("Node watch should have fired!");
+    }
+    // watch should not fire now because it needs to be reset
+    stateManager.setData("/getData/persistentData", secondData, -1);
+
+    // create ephemeral node using another ZK connection
+    DistribStateManager ephemeralMgr = createDistribStateManager();
+    ephemeralMgr.createData("/getData/ephemeralData", firstData, CreateMode.EPHEMERAL);
+
+    nodeWatcher = new OnceWatcher();
+    vd = stateManager.getData("/getData/ephemeralData", nodeWatcher);
+    destroyDistribStateManager(ephemeralMgr);
+    if (!nodeWatcher.triggered.await(5, TimeUnit.SECONDS)) {
+      fail("Node watch should have fired!");
+    }
+    assertTrue(stateManager.hasData("/getData/persistentData"));
+    assertFalse(stateManager.hasData("/getData/ephemeralData"));
+
+    nodeWatcher = new OnceWatcher();
+    vd = stateManager.getData("/getData/persistentData", nodeWatcher);
+    // try wrong version
+    try {
+      stateManager.removeData("/getData/persistentData", vd.getVersion() - 1);
+      fail("should have failed");
+    } catch (BadVersionException e) {
+      // expected
+    }
+    // watch should not have fired
+    assertEquals(1, nodeWatcher.triggered.getCount());
+
+    stateManager.removeData("/getData/persistentData", vd.getVersion());
+    if (!nodeWatcher.triggered.await(5, TimeUnit.SECONDS)) {
+      fail("Node watch should have fired!");
+    }
+  }
+
+  @Test
+  public void testMulti() throws Exception {
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b949f57f/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimExecutePlanAction.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimExecutePlanAction.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimExecutePlanAction.java
new file mode 100644
index 0000000..d0d08fd
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimExecutePlanAction.java
@@ -0,0 +1,209 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.cloud.autoscaling.sim;
+
+import java.lang.invoke.MethodHandles;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.stream.Collectors;
+
+import com.google.common.collect.Lists;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.solr.client.solrj.SolrClient;
+import org.apache.solr.client.solrj.SolrRequest;
+import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventType;
+import org.apache.solr.client.solrj.cloud.autoscaling.VersionedData;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.cloud.CloudTestUtils;
+import org.apache.solr.cloud.autoscaling.ActionContext;
+import org.apache.solr.cloud.autoscaling.AutoScalingHandlerTest;
+import org.apache.solr.cloud.autoscaling.ExecutePlanAction;
+import org.apache.solr.cloud.autoscaling.NodeLostTrigger;
+import org.apache.solr.common.cloud.ClusterState;
+import org.apache.solr.common.cloud.DocCollection;
+import org.apache.solr.common.cloud.Replica;
+import org.apache.solr.common.cloud.ZkStateReader;
+import org.apache.solr.common.params.CollectionParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.Utils;
+import org.apache.solr.util.LogLevel;
+import org.apache.solr.common.util.TimeSource;
+import org.junit.After;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Test for {@link ExecutePlanAction}
+ */
+@LogLevel("org.apache.solr.cloud=DEBUG")
+public class TestSimExecutePlanAction extends SimSolrCloudTestCase {
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  private static final int NODE_COUNT = 2;
+
+  @BeforeClass
+  public static void setupCluster() throws Exception {
+    configureCluster(NODE_COUNT, TimeSource.get("simTime:50"));
+  }
+
+  @After
+  public void printState() throws Exception {
+    log.info("-------------_ FINAL STATE --------------");
+    log.info("* Node values: " + Utils.toJSONString(cluster.getSimNodeStateProvider().simGetAllNodeValues()));
+    log.info("* Live nodes: " + cluster.getClusterStateProvider().getLiveNodes());
+    ClusterState state = cluster.getClusterStateProvider().getClusterState();
+    for (String coll: cluster.getSimClusterStateProvider().simListCollections()) {
+      log.info("* Collection " + coll + " state: " + state.getCollection(coll));
+    }
+
+  }
+
+  @Test
+  @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 28-June-2018
+  public void testExecute() throws Exception {
+    SolrClient solrClient = cluster.simGetSolrClient();
+    String collectionName = "testExecute";
+    CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
+        "conf", 1, 2);
+    create.setMaxShardsPerNode(1);
+    create.process(solrClient);
+
+    log.info("Collection ready after " + CloudTestUtils.waitForState(cluster, collectionName, 120, TimeUnit.SECONDS,
+        CloudTestUtils.clusterShape(1, 2, false, true)) + "ms");
+
+    String sourceNodeName = cluster.getSimClusterStateProvider().simGetRandomNode(random());
+    ClusterState clusterState = cluster.getClusterStateProvider().getClusterState();
+    DocCollection docCollection = clusterState.getCollection(collectionName);
+    List<Replica> replicas = docCollection.getReplicas(sourceNodeName);
+    assertNotNull(replicas);
+    assertFalse(replicas.isEmpty());
+
+    List<String> otherNodes = cluster.getClusterStateProvider().getLiveNodes().stream()
+        .filter(node -> !node.equals(sourceNodeName)).collect(Collectors.toList());
+    assertFalse(otherNodes.isEmpty());
+    String survivor = otherNodes.get(0);
+
+    try (ExecutePlanAction action = new ExecutePlanAction()) {
+      action.configure(cluster.getLoader(), cluster, Collections.singletonMap("name", "execute_plan"));
+
+      // used to signal if we found that ExecutePlanAction did in fact create the right znode before executing the operation
+      AtomicBoolean znodeCreated = new AtomicBoolean(false);
+
+      CollectionAdminRequest.AsyncCollectionAdminRequest moveReplica = new CollectionAdminRequest.MoveReplica(collectionName, replicas.get(0).getName(), survivor);
+      CollectionAdminRequest.AsyncCollectionAdminRequest mockRequest = new CollectionAdminRequest.AsyncCollectionAdminRequest(CollectionParams.CollectionAction.OVERSEERSTATUS) {
+        @Override
+        public void setAsyncId(String asyncId) {
+          super.setAsyncId(asyncId);
+          String parentPath = ZkStateReader.SOLR_AUTOSCALING_TRIGGER_STATE_PATH + "/xyz/execute_plan";
+          try {
+            if (cluster.getDistribStateManager().hasData(parentPath)) {
+              java.util.List<String> children = cluster.getDistribStateManager().listData(parentPath);
+              if (!children.isEmpty()) {
+                String child = children.get(0);
+                VersionedData data = cluster.getDistribStateManager().getData(parentPath + "/" + child);
+                Map m = (Map) Utils.fromJSON(data.getData());
+                if (m.containsKey("requestid")) {
+                  znodeCreated.set(m.get("requestid").equals(asyncId));
+                }
+              }
+            }
+          } catch (Exception e) {
+            throw new RuntimeException(e);
+          }
+
+        }
+      };
+      List<CollectionAdminRequest.AsyncCollectionAdminRequest> operations = Lists.asList(moveReplica, new CollectionAdminRequest.AsyncCollectionAdminRequest[]{mockRequest});
+      NodeLostTrigger.NodeLostEvent nodeLostEvent = new NodeLostTrigger.NodeLostEvent(TriggerEventType.NODELOST,
+          "mock_trigger_name", Collections.singletonList(TimeSource.CURRENT_TIME.getTimeNs()),
+          Collections.singletonList(sourceNodeName));
+      ActionContext actionContext = new ActionContext(cluster, null,
+          new HashMap<>(Collections.singletonMap("operations", operations)));
+      action.process(nodeLostEvent, actionContext);
+
+//      assertTrue("ExecutePlanAction should have stored the requestid in ZK before executing the request", znodeCreated.get());
+      List<NamedList<Object>> responses = (List<NamedList<Object>>) actionContext.getProperty("responses");
+      assertNotNull(responses);
+      assertEquals(2, responses.size());
+      NamedList<Object> response = responses.get(0);
+      assertNull(response.get("failure"));
+      assertNotNull(response.get("success"));
+    }
+
+    log.info("Collection ready after " + CloudTestUtils.waitForState(cluster, collectionName, 300, TimeUnit.SECONDS,
+        CloudTestUtils.clusterShape(1, 2, false, true)) + "ms");
+  }
+
+  @Test
+  public void testIntegration() throws Exception  {
+    SolrClient solrClient = cluster.simGetSolrClient();
+
+    String setTriggerCommand = "{" +
+        "'set-trigger' : {" +
+        "'name' : 'node_lost_trigger'," +
+        "'event' : 'nodeLost'," +
+        "'waitFor' : '1s'," +
+        "'enabled' : true," +
+        "'actions' : [{'name':'compute_plan', 'class' : 'solr.ComputePlanAction'}," +
+        "{'name':'execute_plan','class':'solr.ExecutePlanAction'}]" +
+        "}}";
+    SolrRequest req = AutoScalingHandlerTest.createAutoScalingRequest(SolrRequest.METHOD.POST, setTriggerCommand);
+    NamedList<Object> response = solrClient.request(req);
+    assertEquals(response.get("result").toString(), "success");
+
+    String collectionName = "testIntegration";
+    CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
+        "conf", 1, 2);
+    create.setMaxShardsPerNode(1);
+    create.process(solrClient);
+
+    CloudTestUtils.waitForState(cluster, "Timed out waiting for replicas of new collection to be active",
+        collectionName, CloudTestUtils.clusterShape(1, 2, false, true));
+
+    String sourceNodeName = cluster.getSimClusterStateProvider().simGetRandomNode(random());
+    ClusterState clusterState = cluster.getClusterStateProvider().getClusterState();
+    DocCollection docCollection = clusterState.getCollection(collectionName);
+    List<Replica> replicas = docCollection.getReplicas(sourceNodeName);
+    assertNotNull(replicas);
+    assertFalse(replicas.isEmpty());
+
+    List<String> otherNodes = cluster.getClusterStateProvider().getLiveNodes().stream()
+        .filter(node -> !node.equals(sourceNodeName)).collect(Collectors.toList());
+    assertFalse(otherNodes.isEmpty());
+    String survivor = otherNodes.get(0);
+
+    cluster.simRemoveNode(sourceNodeName, false);
+
+    cluster.getTimeSource().sleep(3000);
+
+    CloudTestUtils.waitForState(cluster, "Timed out waiting for replicas of collection to be 2 again",
+        collectionName, CloudTestUtils.clusterShape(1, 2, false, true));
+
+    clusterState = cluster.getClusterStateProvider().getClusterState();
+    docCollection = clusterState.getCollection(collectionName);
+    List<Replica> replicasOnSurvivor = docCollection.getReplicas(survivor);
+    assertNotNull(replicasOnSurvivor);
+    assertEquals(2, replicasOnSurvivor.size());
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b949f57f/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimGenericDistributedQueue.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimGenericDistributedQueue.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimGenericDistributedQueue.java
new file mode 100644
index 0000000..25e36f3
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimGenericDistributedQueue.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.cloud.autoscaling.sim;
+
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.solr.client.solrj.cloud.DistributedQueue;
+import org.apache.solr.client.solrj.cloud.DistribStateManager;
+
+/**
+ *
+ */
+@LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 2018-02-26
+public class TestSimGenericDistributedQueue extends TestSimDistributedQueue {
+  DistribStateManager stateManager = new SimDistribStateManager();
+
+  @Override
+  @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 2-Aug-2018
+  protected DistributedQueue makeDistributedQueue(String dqZNode) throws Exception {
+    return new GenericDistributedQueue(stateManager, dqZNode);
+  }
+
+  @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // added 09-Aug-2018
+  public void testDistributedQueue() throws Exception {
+    super.testDistributedQueue();
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b949f57f/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimLargeCluster.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimLargeCluster.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimLargeCluster.java
new file mode 100644
index 0000000..decb585
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimLargeCluster.java
@@ -0,0 +1,727 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.cloud.autoscaling.sim;
+
+import java.lang.invoke.MethodHandles;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering;
+import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
+import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.solr.client.solrj.SolrClient;
+import org.apache.solr.client.solrj.SolrRequest;
+import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig;
+import org.apache.solr.client.solrj.cloud.SolrCloudManager;
+import org.apache.solr.client.solrj.cloud.autoscaling.Suggester;
+import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventProcessorStage;
+import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventType;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.cloud.CloudTestUtils;
+import org.apache.solr.cloud.autoscaling.ActionContext;
+import org.apache.solr.cloud.autoscaling.ComputePlanAction;
+import org.apache.solr.cloud.autoscaling.ExecutePlanAction;
+import org.apache.solr.cloud.autoscaling.SearchRateTrigger;
+import org.apache.solr.cloud.autoscaling.TriggerActionBase;
+import org.apache.solr.cloud.autoscaling.TriggerEvent;
+import org.apache.solr.cloud.autoscaling.TriggerListenerBase;
+import org.apache.solr.cloud.autoscaling.CapturedEvent;
+import org.apache.solr.cloud.autoscaling.TriggerValidationException;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.cloud.Replica;
+import org.apache.solr.common.params.CollectionAdminParams;
+import org.apache.solr.common.params.CollectionParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.Pair;
+import org.apache.solr.common.util.TimeSource;
+import org.apache.solr.core.SolrResourceLoader;
+import org.apache.solr.util.LogLevel;
+import org.apache.solr.util.TimeOut;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.apache.solr.cloud.autoscaling.AutoScalingHandlerTest.createAutoScalingRequest;
+
+/**
+ *
+ */
+@TimeoutSuite(millis = 4 * 3600 * 1000)
+@LogLevel("org.apache.solr.cloud.autoscaling=DEBUG")
+@ThreadLeakLingering(linger = 20000) // ComputePlanAction may take significant time to complete
+//05-Jul-2018 @LuceneTestCase.BadApple(bugUrl = "https://issues.apache.org/jira/browse/SOLR-12075")
+public class TestSimLargeCluster extends SimSolrCloudTestCase {
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  public static final int SPEED = 50;
+
+  public static final int NUM_NODES = 100;
+
+  static Map<String, List<CapturedEvent>> listenerEvents = new ConcurrentHashMap<>();
+  static AtomicInteger triggerFinishedCount = new AtomicInteger();
+  static AtomicInteger triggerStartedCount = new AtomicInteger();
+  static CountDownLatch triggerStartedLatch;
+  static CountDownLatch triggerFinishedLatch;
+  static int waitForSeconds;
+
+  @BeforeClass
+  public static void setupCluster() throws Exception {
+    configureCluster(NUM_NODES, TimeSource.get("simTime:" + SPEED));
+  }
+
+  @Before
+  public void setupTest() throws Exception {
+    waitForSeconds = 5;
+    triggerStartedCount.set(0);
+    triggerFinishedCount.set(0);
+    triggerStartedLatch = new CountDownLatch(1);
+    triggerFinishedLatch = new CountDownLatch(1);
+    listenerEvents.clear();
+    // disable .scheduled_maintenance and .auto_add_replicas
+    String suspendTriggerCommand = "{" +
+        "'suspend-trigger' : {'name' : '.scheduled_maintenance'}" +
+        "}";
+    SolrRequest req = createAutoScalingRequest(SolrRequest.METHOD.POST, suspendTriggerCommand);
+    SolrClient solrClient = cluster.simGetSolrClient();
+    NamedList<Object> response;
+    try {
+      response = solrClient.request(req);
+      assertEquals(response.get("result").toString(), "success");
+    } catch (Exception e) {
+      if (!e.toString().contains("No trigger exists")) {
+        throw e;
+      }
+    }
+    suspendTriggerCommand = "{" +
+        "'suspend-trigger' : {'name' : '.auto_add_replicas'}" +
+        "}";
+    req = createAutoScalingRequest(SolrRequest.METHOD.POST, suspendTriggerCommand);
+    try {
+      response = solrClient.request(req);
+      assertEquals(response.get("result").toString(), "success");
+    } catch (Exception e) {
+      if (!e.toString().contains("No trigger exists")) {
+        throw e;
+      }
+    }
+
+    // do this in advance if missing
+    if (!cluster.getSimClusterStateProvider().simListCollections().contains(CollectionAdminParams.SYSTEM_COLL)) {
+      cluster.getSimClusterStateProvider().createSystemCollection();
+      CloudTestUtils.waitForState(cluster, CollectionAdminParams.SYSTEM_COLL, 120, TimeUnit.SECONDS,
+          CloudTestUtils.clusterShape(1, 1, false, true));
+    }
+  }
+
+  public static class TestTriggerListener extends TriggerListenerBase {
+    @Override
+    public void configure(SolrResourceLoader loader, SolrCloudManager cloudManager, AutoScalingConfig.TriggerListenerConfig config) throws TriggerValidationException {
+      super.configure(loader, cloudManager, config);
+    }
+
+    @Override
+    public synchronized void onEvent(TriggerEvent event, TriggerEventProcessorStage stage, String actionName,
+                                     ActionContext context, Throwable error, String message) {
+      List<CapturedEvent> lst = listenerEvents.computeIfAbsent(config.name, s -> new ArrayList<>());
+      lst.add(new CapturedEvent(cluster.getTimeSource().getTimeNs(), context, config, stage, actionName, event, message));
+    }
+  }
+
+  public static class FinishTriggerAction extends TriggerActionBase {
+    @Override
+    public void process(TriggerEvent event, ActionContext context) throws Exception {
+      triggerFinishedCount.incrementAndGet();
+      triggerFinishedLatch.countDown();
+    }
+  }
+
+  public static class StartTriggerAction extends TriggerActionBase {
+    @Override
+    public void process(TriggerEvent event, ActionContext context) throws Exception {
+      triggerStartedLatch.countDown();
+      triggerStartedCount.incrementAndGet();
+    }
+  }
+
+  @Test
+  @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 2018-06-18
+  public void testBasic() throws Exception {
+    SolrClient solrClient = cluster.simGetSolrClient();
+    String setTriggerCommand = "{" +
+        "'set-trigger' : {" +
+        "'name' : 'node_lost_trigger1'," +
+        "'event' : 'nodeLost'," +
+        "'waitFor' : '" + waitForSeconds + "s'," +
+        "'enabled' : true," +
+        "'actions' : [" +
+        "{'name':'start','class':'" + StartTriggerAction.class.getName() + "'}," +
+        "{'name':'compute','class':'" + ComputePlanAction.class.getName() + "'}," +
+        "{'name':'execute','class':'" + ExecutePlanAction.class.getName() + "'}," +
+        "{'name':'test','class':'" + FinishTriggerAction.class.getName() + "'}" +
+        "]" +
+        "}}";
+    SolrRequest req = createAutoScalingRequest(SolrRequest.METHOD.POST, setTriggerCommand);
+    NamedList<Object> response = solrClient.request(req);
+    assertEquals(response.get("result").toString(), "success");
+
+    String setListenerCommand = "{" +
+        "'set-listener' : " +
+        "{" +
+        "'name' : 'foo'," +
+        "'trigger' : 'node_lost_trigger1'," +
+        "'stage' : ['STARTED','ABORTED','SUCCEEDED', 'FAILED']," +
+        "'beforeAction' : ['compute', 'execute']," +
+        "'afterAction' : ['compute', 'execute']," +
+        "'class' : '" + TestTriggerListener.class.getName() + "'" +
+        "}" +
+        "}";
+    req = createAutoScalingRequest(SolrRequest.METHOD.POST, setListenerCommand);
+    response = solrClient.request(req);
+    assertEquals(response.get("result").toString(), "success");
+
+    cluster.getTimeSource().sleep(5000);
+
+    // pick a few random nodes
+    List<String> nodes = new ArrayList<>();
+    int limit = 75;
+    for (String node : cluster.getClusterStateProvider().getLiveNodes()) {
+      nodes.add(node);
+      if (nodes.size() > limit) {
+        break;
+      }
+    }
+    Collections.shuffle(nodes, random());
+    // create collection on these nodes
+    String collectionName = "testBasic";
+    CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
+        "conf", 5, 5, 5, 5);
+    create.setMaxShardsPerNode(1);
+    create.setAutoAddReplicas(false);
+    create.setCreateNodeSet(String.join(",", nodes));
+    create.process(solrClient);
+
+    log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 30 * nodes.size(), TimeUnit.SECONDS,
+        CloudTestUtils.clusterShape(5, 15, false, true)) + "ms");
+
+    int KILL_NODES = 8;
+    // kill off a number of nodes
+    for (int i = 0; i < KILL_NODES; i++) {
+      cluster.simRemoveNode(nodes.get(i), false);
+    }
+    // should fully recover
+    log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 90 * KILL_NODES, TimeUnit.SECONDS,
+        CloudTestUtils.clusterShape(5, 15, false, true)) + "ms");
+
+    log.info("OP COUNTS: " + cluster.simGetOpCounts());
+    long moveReplicaOps = cluster.simGetOpCount(CollectionParams.CollectionAction.MOVEREPLICA.name());
+
+    // simulate a number of flaky nodes
+    int FLAKY_NODES = 10;
+    int flakyReplicas = 0;
+    for (int cnt = 0; cnt < 10; cnt++) {
+      for (int i = KILL_NODES; i < KILL_NODES + FLAKY_NODES; i++) {
+        flakyReplicas += cluster.getSimClusterStateProvider().simGetReplicaInfos(nodes.get(i))
+            .stream().filter(r -> r.getState().equals(Replica.State.ACTIVE)).count();
+        cluster.simRemoveNode(nodes.get(i), false);
+      }
+      cluster.getTimeSource().sleep(TimeUnit.SECONDS.toMillis(waitForSeconds) * 2);
+      for (int i = KILL_NODES; i < KILL_NODES + FLAKY_NODES; i++) {
+        final String nodeId = nodes.get(i);
+        cluster.submit(() -> cluster.getSimClusterStateProvider().simRestoreNode(nodeId));
+      }
+    }
+
+    // wait until started == finished
+    TimeOut timeOut = new TimeOut(20 * waitForSeconds * NUM_NODES, TimeUnit.SECONDS, cluster.getTimeSource());
+    while (!timeOut.hasTimedOut()) {
+      if (triggerStartedCount.get() == triggerFinishedCount.get()) {
+        break;
+      }
+      timeOut.sleep(1000);
+    }
+    if (timeOut.hasTimedOut()) {
+      fail("did not finish processing all events in time: started=" + triggerStartedCount.get() + ", finished=" + triggerFinishedCount.get());
+    }
+
+
+    log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 30 * nodes.size(), TimeUnit.SECONDS,
+        CloudTestUtils.clusterShape(5, 15, false, true)) + "ms");
+    long newMoveReplicaOps = cluster.simGetOpCount(CollectionParams.CollectionAction.MOVEREPLICA.name());
+    log.info("==== Flaky replicas: {}. Additional MOVEREPLICA count: {}", flakyReplicas, (newMoveReplicaOps - moveReplicaOps));
+    // flaky nodes lead to a number of MOVEREPLICA that is non-zero but lower than the number of flaky replicas
+    assertTrue("there should be new MOVERPLICA ops", newMoveReplicaOps - moveReplicaOps > 0);
+    assertTrue("there should be less than flakyReplicas=" + flakyReplicas + " MOVEREPLICA ops",
+        newMoveReplicaOps - moveReplicaOps < flakyReplicas);
+  }
+
+  @Test
+  @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 28-June-2018
+  public void testAddNode() throws Exception {
+    SolrClient solrClient = cluster.simGetSolrClient();
+    String setTriggerCommand = "{" +
+        "'set-trigger' : {" +
+        "'name' : 'node_added_trigger2'," +
+        "'event' : 'nodeAdded'," +
+        "'waitFor' : '" + waitForSeconds + "s'," +
+        "'enabled' : true," +
+        "'actions' : [" +
+        "{'name':'start','class':'" + StartTriggerAction.class.getName() + "'}," +
+        "{'name':'compute','class':'" + ComputePlanAction.class.getName() + "'}," +
+        "{'name':'execute','class':'" + ExecutePlanAction.class.getName() + "'}," +
+        "{'name':'test','class':'" + FinishTriggerAction.class.getName() + "'}" +
+        "]" +
+        "}}";
+    SolrRequest req = createAutoScalingRequest(SolrRequest.METHOD.POST, setTriggerCommand);
+    NamedList<Object> response = solrClient.request(req);
+    assertEquals(response.get("result").toString(), "success");
+
+    // create a collection with more than 1 replica per node
+    String collectionName = "testNodeAdded";
+    CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
+        "conf", NUM_NODES / 10, NUM_NODES / 8, NUM_NODES / 8, NUM_NODES / 8);
+    create.setMaxShardsPerNode(5);
+    create.setAutoAddReplicas(false);
+    create.process(solrClient);
+
+    log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 20 * NUM_NODES, TimeUnit.SECONDS,
+        CloudTestUtils.clusterShape(NUM_NODES / 10, NUM_NODES / 8 * 3, false, true)) + " ms");
+
+    // start adding nodes
+    int numAddNode = NUM_NODES / 5;
+    List<String> addNodesList = new ArrayList<>(numAddNode);
+    for (int i = 0; i < numAddNode; i++) {
+      addNodesList.add(cluster.simAddNode());
+      cluster.getTimeSource().sleep(5000);
+    }
+    // wait until at least one event is generated
+    boolean await = triggerStartedLatch.await(20000 / SPEED, TimeUnit.MILLISECONDS);
+    assertTrue("trigger did not fire", await);
+
+    // wait until started == finished
+    TimeOut timeOut = new TimeOut(20 * waitForSeconds * NUM_NODES, TimeUnit.SECONDS, cluster.getTimeSource());
+    while (!timeOut.hasTimedOut()) {
+      if (triggerStartedCount.get() == triggerFinishedCount.get()) {
+        break;
+      }
+      timeOut.sleep(1000);
+    }
+    if (timeOut.hasTimedOut()) {
+      fail("did not finish processing all events in time: started=" + triggerStartedCount.get() + ", finished=" + triggerFinishedCount.get());
+    }
+
+    List<SolrInputDocument> systemColl = cluster.simGetSystemCollection();
+    int startedEventPos = -1;
+    for (int i = 0; i < systemColl.size(); i++) {
+      SolrInputDocument d = systemColl.get(i);
+      if (!"node_added_trigger2".equals(d.getFieldValue("event.source_s"))) {
+        continue;
+      }
+      if ("NODEADDED".equals(d.getFieldValue("event.type_s")) &&
+          "STARTED".equals(d.getFieldValue("stage_s"))) {
+        startedEventPos = i;
+        break;
+      }
+    }
+    assertTrue("no STARTED event", startedEventPos > -1);
+    SolrInputDocument startedEvent = systemColl.get(startedEventPos);
+    int lastIgnoredPos = startedEventPos;
+    // make sure some replicas have been moved
+    assertTrue("no MOVEREPLICA ops?", cluster.simGetOpCount("MOVEREPLICA") > 0);
+
+    log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 20 * NUM_NODES, TimeUnit.SECONDS,
+        CloudTestUtils.clusterShape(NUM_NODES / 10, NUM_NODES / 8 * 3, false, true)) + " ms");
+
+    int count = 50;
+    SolrInputDocument finishedEvent = null;
+    long lastNumOps = cluster.simGetOpCount("MOVEREPLICA");
+    while (count-- > 0) {
+      cluster.getTimeSource().sleep(10000);
+      long currentNumOps = cluster.simGetOpCount("MOVEREPLICA");
+      if (currentNumOps == lastNumOps) {
+        int size = systemColl.size() - 1;
+        for (int i = size; i > lastIgnoredPos; i--) {
+          SolrInputDocument d = systemColl.get(i);
+          if (!"node_added_trigger2".equals(d.getFieldValue("event.source_s"))) {
+            continue;
+          }
+          if ("SUCCEEDED".equals(d.getFieldValue("stage_s"))) {
+            finishedEvent = d;
+            break;
+          }
+        }
+        break;
+      } else {
+        lastNumOps = currentNumOps;
+      }
+    }
+
+    assertTrue("did not finish processing changes", finishedEvent != null);
+    long delta = (Long)finishedEvent.getFieldValue("event.time_l") - (Long)startedEvent.getFieldValue("event.time_l");
+    log.info("#### System stabilized after " + TimeUnit.NANOSECONDS.toMillis(delta) + " ms");
+    assertTrue("unexpected number of MOVEREPLICA ops", cluster.simGetOpCount("MOVEREPLICA") > 1);
+  }
+
+  @Test
+  @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 2018-06-18
+  public void testNodeLost() throws Exception {
+    doTestNodeLost(waitForSeconds, 5000, 0);
+  }
+
+  // Renard R5 series - evenly covers a log10 range
+  private static final int[] renard5 = new int[] {
+      1, 2, 3, 4, 6,
+      10
+  };
+  private static final int[] renard5x = new int[] {
+      1, 2, 3, 4, 6,
+      10, 16, 25, 40, 63,
+      100
+  };
+  private static final int[] renard5xx = new int[] {
+      1, 2, 3, 4, 6,
+      10, 16, 25, 40, 63,
+      100, 158, 251, 398, 631,
+      1000, 1585, 2512, 3981, 6310,
+      10000
+  };
+  // Renard R10 series
+  private static final double[] renard10 = new double[] {
+      1, 1.3, 1.6, 2, 2.5, 3.2, 4, 5, 6.3, 7.9,
+      10
+  };
+  private static final double[] renard10x = new double[] {
+      1, 1.3, 1.6, 2, 2.5, 3.2, 4, 5, 6.3, 7.9,
+      10, 12.6, 15.8, 20, 25.1, 31.6, 39.8, 50.1, 63.1, 79.4,
+      100
+  };
+
+  private static final AtomicInteger ZERO = new AtomicInteger(0);
+
+  //@Test
+  public void benchmarkNodeLost() throws Exception {
+    List<String> results = new ArrayList<>();
+    for (int wait : renard5x) {
+      for (int delay : renard5x) {
+        SummaryStatistics totalTime = new SummaryStatistics();
+        SummaryStatistics ignoredOurEvents = new SummaryStatistics();
+        SummaryStatistics ignoredOtherEvents = new SummaryStatistics();
+        SummaryStatistics startedOurEvents = new SummaryStatistics();
+        SummaryStatistics startedOtherEvents = new SummaryStatistics();
+        for (int i = 0; i < 5; i++) {
+          if (cluster != null) {
+            cluster.close();
+          }
+          setupCluster();
+          setUp();
+          setupTest();
+          long total = doTestNodeLost(wait, delay * 1000, 0);
+          totalTime.addValue(total);
+          // get event counts
+          Map<String, Map<String, AtomicInteger>> counts = cluster.simGetEventCounts();
+          Map<String, AtomicInteger> map = counts.remove("node_lost_trigger");
+          startedOurEvents.addValue(map.getOrDefault("STARTED", ZERO).get());
+          ignoredOurEvents.addValue(map.getOrDefault("IGNORED", ZERO).get());
+          int otherStarted = 0;
+          int otherIgnored = 0;
+          for (Map<String, AtomicInteger> m : counts.values()) {
+            otherStarted += m.getOrDefault("STARTED", ZERO).get();
+            otherIgnored += m.getOrDefault("IGNORED", ZERO).get();
+          }
+          startedOtherEvents.addValue(otherStarted);
+          ignoredOtherEvents.addValue(otherIgnored);
+        }
+        results.add(String.format(Locale.ROOT, "%d\t%d\t%4.0f\t%4.0f\t%4.0f\t%4.0f\t%6.0f\t%6.0f\t%6.0f\t%6.0f\t%6.0f",
+            wait, delay, startedOurEvents.getMean(), ignoredOurEvents.getMean(),
+            startedOtherEvents.getMean(), ignoredOtherEvents.getMean(),
+            totalTime.getMin(), totalTime.getMax(), totalTime.getMean(), totalTime.getStandardDeviation(), totalTime.getVariance()));
+      }
+    }
+    log.info("===== RESULTS ======");
+    log.info("waitFor\tdelay\tSTRT\tIGN\toSTRT\toIGN\tmin\tmax\tmean\tstdev\tvar");
+    results.forEach(s -> log.info(s));
+  }
+
+  private long doTestNodeLost(int waitFor, long killDelay, int minIgnored) throws Exception {
+    SolrClient solrClient = cluster.simGetSolrClient();
+    String setTriggerCommand = "{" +
+        "'set-trigger' : {" +
+        "'name' : 'node_lost_trigger3'," +
+        "'event' : 'nodeLost'," +
+        "'waitFor' : '" + waitFor + "s'," +
+        "'enabled' : true," +
+        "'actions' : [" +
+        "{'name':'start','class':'" + StartTriggerAction.class.getName() + "'}," +
+        "{'name':'compute','class':'" + ComputePlanAction.class.getName() + "'}," +
+        "{'name':'execute','class':'" + ExecutePlanAction.class.getName() + "'}," +
+        "{'name':'test','class':'" + FinishTriggerAction.class.getName() + "'}" +
+        "]" +
+        "}}";
+    SolrRequest req = createAutoScalingRequest(SolrRequest.METHOD.POST, setTriggerCommand);
+    NamedList<Object> response = solrClient.request(req);
+    assertEquals(response.get("result").toString(), "success");
+
+    String setListenerCommand = "{" +
+        "'set-listener' : " +
+        "{" +
+        "'name' : 'failures'," +
+        "'trigger' : 'node_lost_trigger3'," +
+        "'stage' : ['FAILED']," +
+        "'class' : '" + TestTriggerListener.class.getName() + "'" +
+        "}" +
+        "}";
+    req = createAutoScalingRequest(SolrRequest.METHOD.POST, setListenerCommand);
+    response = solrClient.request(req);
+    assertEquals(response.get("result").toString(), "success");
+
+
+    // create a collection with 1 replica per node
+    String collectionName = "testNodeLost";
+    CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
+        "conf", NUM_NODES / 5, NUM_NODES / 10);
+    create.setMaxShardsPerNode(5);
+    create.setAutoAddReplicas(false);
+    create.process(solrClient);
+
+    log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 20 * NUM_NODES, TimeUnit.SECONDS,
+        CloudTestUtils.clusterShape(NUM_NODES / 5, NUM_NODES / 10, false, true)) + " ms");
+
+    // start killing nodes
+    int numNodes = NUM_NODES / 5;
+    List<String> nodes = new ArrayList<>(cluster.getLiveNodesSet().get());
+    for (int i = 0; i < numNodes; i++) {
+      // this may also select a node where a replica is moved to, so the total number of
+      // MOVEREPLICA may vary
+      cluster.simRemoveNode(nodes.get(i), false);
+      cluster.getTimeSource().sleep(killDelay);
+    }
+    // wait for the trigger to fire and complete at least once
+    boolean await = triggerFinishedLatch.await(20 * waitFor * 1000 / SPEED, TimeUnit.MILLISECONDS);
+    assertTrue("trigger did not fire within timeout, " +
+        "waitFor=" + waitFor + ", killDelay=" + killDelay + ", minIgnored=" + minIgnored,
+        await);
+    List<SolrInputDocument> systemColl = cluster.simGetSystemCollection();
+    int startedEventPos = -1;
+    for (int i = 0; i < systemColl.size(); i++) {
+      SolrInputDocument d = systemColl.get(i);
+      if (!"node_lost_trigger3".equals(d.getFieldValue("event.source_s"))) {
+        continue;
+      }
+      if ("NODELOST".equals(d.getFieldValue("event.type_s")) &&
+          "STARTED".equals(d.getFieldValue("stage_s"))) {
+        startedEventPos = i;
+        break;
+      }
+    }
+    assertTrue("no STARTED event: " + systemColl + ", " +
+            "waitFor=" + waitFor + ", killDelay=" + killDelay + ", minIgnored=" + minIgnored,
+          startedEventPos > -1);
+    SolrInputDocument startedEvent = systemColl.get(startedEventPos);
+    // we can expect some failures when target node in MOVEREPLICA has been killed
+    // between when the event processing started and the actual moment of MOVEREPLICA execution
+    // wait until started == (finished + failed)
+    TimeOut timeOut = new TimeOut(20 * waitFor * NUM_NODES, TimeUnit.SECONDS, cluster.getTimeSource());
+    while (!timeOut.hasTimedOut()) {
+      if (triggerStartedCount.get() == triggerFinishedCount.get()) {
+        break;
+      }
+      log.debug("started=" + triggerStartedCount.get() + ", finished=" + triggerFinishedCount.get() +
+          ", failed=" + listenerEvents.size());
+      timeOut.sleep(1000);
+    }
+    if (timeOut.hasTimedOut()) {
+      if (triggerStartedCount.get() > triggerFinishedCount.get() + listenerEvents.size()) {
+        fail("did not finish processing all events in time: started=" + triggerStartedCount.get() + ", finished=" + triggerFinishedCount.get() +
+            ", failed=" + listenerEvents.size());
+      }
+    }
+    int ignored = 0;
+    int lastIgnoredPos = startedEventPos;
+    for (int i = startedEventPos + 1; i < systemColl.size(); i++) {
+      SolrInputDocument d = systemColl.get(i);
+      if (!"node_lost_trigger3".equals(d.getFieldValue("event.source_s"))) {
+        continue;
+      }
+      if ("NODELOST".equals(d.getFieldValue("event.type_s"))) {
+        if ("IGNORED".equals(d.getFieldValue("stage_s"))) {
+          ignored++;
+          lastIgnoredPos = i;
+        }
+      }
+    }
+    assertTrue("should be at least " + minIgnored + " IGNORED events, " +
+            "waitFor=" + waitFor + ", killDelay=" + killDelay + ", minIgnored=" + minIgnored,
+            ignored >= minIgnored);
+    // make sure some replicas have been moved
+    assertTrue("no MOVEREPLICA ops? " +
+            "waitFor=" + waitFor + ", killDelay=" + killDelay + ", minIgnored=" + minIgnored,
+            cluster.simGetOpCount("MOVEREPLICA") > 0);
+
+    if (listenerEvents.isEmpty()) {
+      // no failed movements - verify collection shape
+      log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 20 * NUM_NODES, TimeUnit.SECONDS,
+          CloudTestUtils.clusterShape(NUM_NODES / 5, NUM_NODES / 10, false, true)) + " ms");
+    } else {
+      cluster.getTimeSource().sleep(NUM_NODES * 100);
+    }
+
+    int count = 50;
+    SolrInputDocument finishedEvent = null;
+    long lastNumOps = cluster.simGetOpCount("MOVEREPLICA");
+    while (count-- > 0) {
+      cluster.getTimeSource().sleep(waitFor * 10000);
+      long currentNumOps = cluster.simGetOpCount("MOVEREPLICA");
+      if (currentNumOps == lastNumOps) {
+        int size = systemColl.size() - 1;
+        for (int i = size; i > lastIgnoredPos; i--) {
+          SolrInputDocument d = systemColl.get(i);
+          if (!"node_lost_trigger3".equals(d.getFieldValue("event.source_s"))) {
+            continue;
+          }
+          if ("SUCCEEDED".equals(d.getFieldValue("stage_s"))) {
+            finishedEvent = d;
+            break;
+          }
+        }
+        break;
+      } else {
+        lastNumOps = currentNumOps;
+      }
+    }
+
+    assertTrue("did not finish processing changes, " +
+            "waitFor=" + waitFor + ", killDelay=" + killDelay + ", minIgnored=" + minIgnored,
+            finishedEvent != null);
+    long delta = (Long)finishedEvent.getFieldValue("event.time_l") - (Long)startedEvent.getFieldValue("event.time_l");
+    delta = TimeUnit.NANOSECONDS.toMillis(delta);
+    log.info("#### System stabilized after " + delta + " ms");
+    long ops = cluster.simGetOpCount("MOVEREPLICA");
+    long expectedMinOps = 40;
+    if (!listenerEvents.isEmpty()) {
+      expectedMinOps = 20;
+    }
+    assertTrue("unexpected number (" + expectedMinOps + ") of MOVEREPLICA ops: " + ops + ", " +
+            "waitFor=" + waitFor + ", killDelay=" + killDelay + ", minIgnored=" + minIgnored,
+            ops >= expectedMinOps);
+    return delta;
+  }
+
+  @Test
+  //commented 2-Aug-2018 @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 2018-06-18
+  public void testSearchRate() throws Exception {
+    SolrClient solrClient = cluster.simGetSolrClient();
+    String collectionName = "testSearchRate";
+    CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
+        "conf", 2, 10);
+    create.process(solrClient);
+
+    log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 300, TimeUnit.SECONDS,
+        CloudTestUtils.clusterShape(2, 10, false, true)) + " ms");
+
+    // collect the node names for shard1
+    Set<String> nodes = new HashSet<>();
+    cluster.getSimClusterStateProvider().getClusterState().getCollection(collectionName)
+        .getSlice("shard1")
+        .getReplicas()
+        .forEach(r -> nodes.add(r.getNodeName()));
+
+    String metricName = "QUERY./select.requestTimes:1minRate";
+    // simulate search traffic
+    cluster.getSimClusterStateProvider().simSetShardValue(collectionName, "shard1", metricName, 40, false, true);
+
+    // now define the trigger. doing it earlier may cause partial events to be generated (where only some
+    // nodes / replicas exceeded the threshold).
+    String setTriggerCommand = "{" +
+        "'set-trigger' : {" +
+        "'name' : 'search_rate_trigger'," +
+        "'event' : 'searchRate'," +
+        "'waitFor' : '" + waitForSeconds + "s'," +
+        "'aboveRate' : 1.0," +
+        "'aboveNodeRate' : 1.0," +
+        "'enabled' : true," +
+        "'actions' : [" +
+        "{'name':'compute','class':'" + ComputePlanAction.class.getName() + "'}," +
+        "{'name':'execute','class':'" + ExecutePlanAction.class.getName() + "'}," +
+        "{'name':'test','class':'" + FinishTriggerAction.class.getName() + "'}" +
+        "]" +
+        "}}";
+    SolrRequest req = createAutoScalingRequest(SolrRequest.METHOD.POST, setTriggerCommand);
+    NamedList<Object> response = solrClient.request(req);
+    assertEquals(response.get("result").toString(), "success");
+    String setListenerCommand1 = "{" +
+        "'set-listener' : " +
+        "{" +
+        "'name' : 'srt'," +
+        "'trigger' : 'search_rate_trigger'," +
+        "'stage' : ['FAILED','SUCCEEDED']," +
+        "'class' : '" + TestTriggerListener.class.getName() + "'" +
+        "}" +
+        "}";
+    req = createAutoScalingRequest(SolrRequest.METHOD.POST, setListenerCommand1);
+    response = solrClient.request(req);
+    assertEquals(response.get("result").toString(), "success");
+
+
+    boolean await = triggerFinishedLatch.await(waitForSeconds * 20000 / SPEED, TimeUnit.MILLISECONDS);
+    assertTrue("The trigger did not fire at all", await);
+    // wait for listener to capture the SUCCEEDED stage
+    cluster.getTimeSource().sleep(2000);
+    assertEquals(listenerEvents.toString(), 1, listenerEvents.get("srt").size());
+    CapturedEvent ev = listenerEvents.get("srt").get(0);
+    assertEquals(TriggerEventType.SEARCHRATE, ev.event.getEventType());
+    Map<String, Number> m = (Map<String, Number>)ev.event.getProperty(SearchRateTrigger.HOT_NODES);
+    assertNotNull(m);
+    assertEquals(nodes.size(), m.size());
+    assertEquals(nodes, m.keySet());
+    m.forEach((k, v) -> assertEquals(4.0, v.doubleValue(), 0.01));
+    List<TriggerEvent.Op> ops = (List<TriggerEvent.Op>)ev.event.getProperty(TriggerEvent.REQUESTED_OPS);
+    assertNotNull(ops);
+    assertEquals(ops.toString(), 1, ops.size());
+    ops.forEach(op -> {
+      assertEquals(CollectionParams.CollectionAction.ADDREPLICA, op.getAction());
+      assertEquals(1, op.getHints().size());
+      Object o = op.getHints().get(Suggester.Hint.COLL_SHARD);
+      // this may be a pair or a HashSet of pairs with size 1
+      Pair<String, String> hint = null;
+      if (o instanceof Pair) {
+        hint = (Pair<String, String>)o;
+      } else if (o instanceof Set) {
+        assertEquals("unexpected number of hints: " + o, 1, ((Set)o).size());
+        o = ((Set)o).iterator().next();
+        assertTrue("unexpected hint: " + o, o instanceof Pair);
+        hint = (Pair<String, String>)o;
+      } else {
+        fail("unexpected hints: " + o);
+      }
+      assertNotNull(hint);
+      assertEquals(collectionName, hint.first());
+      assertEquals("shard1", hint.second());
+    });
+  }
+}