You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2018/08/27 13:15:26 UTC
[4/6] lucene-solr:branch_7x: SOLR-12669: Rename tests that use the
autoscaling simulation framework.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b949f57f/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimClusterStateProvider.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimClusterStateProvider.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimClusterStateProvider.java
new file mode 100644
index 0000000..40ca91b
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimClusterStateProvider.java
@@ -0,0 +1,225 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.cloud.autoscaling.sim;
+
+import java.lang.invoke.MethodHandles;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig;
+import org.apache.solr.client.solrj.cloud.autoscaling.Preference;
+import org.apache.solr.client.solrj.cloud.autoscaling.ReplicaInfo;
+import org.apache.solr.client.solrj.cloud.SolrCloudManager;
+import org.apache.solr.client.solrj.embedded.JettySolrRunner;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.cloud.SolrCloudTestCase;
+import org.apache.solr.common.cloud.ClusterState;
+import org.apache.solr.common.cloud.DocCollection;
+import org.apache.solr.common.cloud.Replica;
+import org.apache.solr.common.cloud.Slice;
+import org.apache.solr.common.cloud.ZkStateReader;
+import org.apache.solr.common.cloud.rule.ImplicitSnitch;
+import org.apache.solr.common.params.CollectionAdminParams;
+import org.apache.solr.common.util.TimeSource;
+import org.apache.solr.common.util.Utils;
+import org.apache.zookeeper.Watcher;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * This test compares the cluster state of a real cluster and a simulated one.
+ */
+public class TestSimClusterStateProvider extends SolrCloudTestCase {
+ private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+ private static int NODE_COUNT = 3;
+ private static boolean simulated;
+
+ private static SolrCloudManager cloudManager;
+
+ private static Collection<String> liveNodes;
+ private static Map<String, Object> clusterProperties;
+ private static AutoScalingConfig autoScalingConfig;
+ private static Map<String, Map<String, Map<String, List<ReplicaInfo>>>> replicas;
+ private static Map<String, Map<String, Object>> nodeValues;
+ private static ClusterState realState;
+
+ // set up a real cluster as the source of test data
+ @BeforeClass
+ public static void setupCluster() throws Exception {
+ simulated = random().nextBoolean();
+ log.info("####### Using simulated components? " + simulated);
+
+ configureCluster(NODE_COUNT)
+ .addConfig("conf", configset("cloud-minimal"))
+ .configure();
+ CollectionAdminRequest.createCollection(CollectionAdminParams.SYSTEM_COLL, null, 1, 2, 0, 1)
+ .process(cluster.getSolrClient());
+ init();
+ }
+
+ @AfterClass
+ public static void closeCloudManager() throws Exception {
+ if (simulated && cloudManager != null) {
+ cloudManager.close();
+ }
+ }
+
+ private static void init() throws Exception {
+ SolrCloudManager realManager = cluster.getJettySolrRunner(cluster.getJettySolrRunners().size() - 1).getCoreContainer()
+ .getZkController().getSolrCloudManager();
+ liveNodes = realManager.getClusterStateProvider().getLiveNodes();
+ clusterProperties = realManager.getClusterStateProvider().getClusterProperties();
+ autoScalingConfig = realManager.getDistribStateManager().getAutoScalingConfig();
+ replicas = new HashMap<>();
+ nodeValues = new HashMap<>();
+ liveNodes.forEach(n -> {
+ replicas.put(n, realManager.getNodeStateProvider().getReplicaInfo(n, Collections.emptySet()));
+ nodeValues.put(n, realManager.getNodeStateProvider().getNodeValues(n, ImplicitSnitch.tags));
+ });
+ realState = realManager.getClusterStateProvider().getClusterState();
+
+ if (simulated) {
+ // initialize simulated provider
+ SimCloudManager simCloudManager = new SimCloudManager(TimeSource.get("simTime:10"));
+ simCloudManager.getSimClusterStateProvider().simSetClusterProperties(clusterProperties);
+ simCloudManager.getSimDistribStateManager().simSetAutoScalingConfig(autoScalingConfig);
+ nodeValues.forEach((n, values) -> {
+ try {
+ simCloudManager.getSimNodeStateProvider().simSetNodeValues(n, values);
+ } catch (InterruptedException e) {
+ fail("Interrupted:" + e);
+ }
+ });
+ simCloudManager.getSimClusterStateProvider().simSetClusterState(realState);
+ ClusterState simState = simCloudManager.getClusterStateProvider().getClusterState();
+ assertClusterStateEquals(realState, simState);
+ cloudManager = simCloudManager;
+ } else {
+ cloudManager = realManager;
+ }
+ }
+
+ private static void assertClusterStateEquals(ClusterState one, ClusterState two) {
+ assertEquals(one.getLiveNodes(), two.getLiveNodes());
+ assertEquals(one.getCollectionsMap().keySet(), two.getCollectionsMap().keySet());
+ one.forEachCollection(oneColl -> {
+ DocCollection twoColl = two.getCollection(oneColl.getName());
+ Map<String, Slice> oneSlices = oneColl.getSlicesMap();
+ Map<String, Slice> twoSlices = twoColl.getSlicesMap();
+ assertEquals(oneSlices.keySet(), twoSlices.keySet());
+ oneSlices.forEach((s, slice) -> {
+ Slice sTwo = twoSlices.get(s);
+ for (Replica oneReplica : slice.getReplicas()) {
+ Replica twoReplica = sTwo.getReplica(oneReplica.getName());
+ assertNotNull(twoReplica);
+ assertEquals(oneReplica, twoReplica);
+ }
+ });
+ });
+ }
+
+ private String addNode() throws Exception {
+ JettySolrRunner solr = cluster.startJettySolrRunner();
+ String nodeId = solr.getNodeName();
+ if (simulated) {
+ ((SimCloudManager) cloudManager).getSimClusterStateProvider().simAddNode(nodeId);
+ }
+ return nodeId;
+ }
+
+ private String deleteNode() throws Exception {
+ String nodeId = cluster.getJettySolrRunner(0).getNodeName();
+ cluster.stopJettySolrRunner(0);
+ if (simulated) {
+ ((SimCloudManager) cloudManager).getSimClusterStateProvider().simRemoveNode(nodeId);
+ }
+ return nodeId;
+ }
+
+ private void setAutoScalingConfig(AutoScalingConfig cfg) throws Exception {
+ cluster.getJettySolrRunner(0).getCoreContainer().getZkController().getZkClient().setData(ZkStateReader.SOLR_AUTOSCALING_CONF_PATH,
+ Utils.toJSON(cfg), -1, true);
+ if (simulated) {
+ ((SimCloudManager) cloudManager).getSimDistribStateManager().simSetAutoScalingConfig(cfg);
+ }
+ }
+
+ @Test
+ public void testAddRemoveNode() throws Exception {
+ Set<String> lastNodes = new HashSet<>(cloudManager.getClusterStateProvider().getLiveNodes());
+ List<String> liveNodes = cloudManager.getDistribStateManager().listData(ZkStateReader.LIVE_NODES_ZKNODE);
+ assertEquals(lastNodes.size(), liveNodes.size());
+ liveNodes.removeAll(lastNodes);
+ assertTrue(liveNodes.isEmpty());
+
+ String node = addNode();
+ cloudManager.getTimeSource().sleep(2000);
+ assertFalse(lastNodes.contains(node));
+ lastNodes = new HashSet<>(cloudManager.getClusterStateProvider().getLiveNodes());
+ assertTrue(lastNodes.contains(node));
+ liveNodes = cloudManager.getDistribStateManager().listData(ZkStateReader.LIVE_NODES_ZKNODE);
+ assertEquals(lastNodes.size(), liveNodes.size());
+ liveNodes.removeAll(lastNodes);
+ assertTrue(liveNodes.isEmpty());
+
+ node = deleteNode();
+ cloudManager.getTimeSource().sleep(2000);
+ assertTrue(lastNodes.contains(node));
+ lastNodes = new HashSet<>(cloudManager.getClusterStateProvider().getLiveNodes());
+ assertFalse(lastNodes.contains(node));
+ liveNodes = cloudManager.getDistribStateManager().listData(ZkStateReader.LIVE_NODES_ZKNODE);
+ assertEquals(lastNodes.size(), liveNodes.size());
+ liveNodes.removeAll(lastNodes);
+ assertTrue(liveNodes.isEmpty()); }
+
+ @Test
+ public void testAutoScalingConfig() throws Exception {
+ final CountDownLatch triggered = new CountDownLatch(1);
+ Watcher w = ev -> {
+ if (triggered.getCount() == 0) {
+ fail("already triggered once!");
+ }
+ triggered.countDown();
+ };
+ AutoScalingConfig cfg = cloudManager.getDistribStateManager().getAutoScalingConfig(w);
+ assertEquals(autoScalingConfig, cfg);
+ Preference p = new Preference(Collections.singletonMap("maximize", "freedisk"));
+ cfg = cfg.withPolicy(cfg.getPolicy().withClusterPreferences(Collections.singletonList(p)));
+ setAutoScalingConfig(cfg);
+ if (!triggered.await(10, TimeUnit.SECONDS)) {
+ fail("Watch should be triggered on update!");
+ }
+ AutoScalingConfig cfg1 = cloudManager.getDistribStateManager().getAutoScalingConfig(null);
+ assertEquals(cfg, cfg1);
+
+ // restore
+ setAutoScalingConfig(autoScalingConfig);
+ cfg1 = cloudManager.getDistribStateManager().getAutoScalingConfig(null);
+ assertEquals(autoScalingConfig, cfg1);
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b949f57f/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimComputePlanAction.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimComputePlanAction.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimComputePlanAction.java
new file mode 100644
index 0000000..719bb7b
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimComputePlanAction.java
@@ -0,0 +1,358 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.cloud.autoscaling.sim;
+
+import java.io.IOException;
+import java.lang.invoke.MethodHandles;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicReference;
+
+import org.apache.solr.client.solrj.SolrClient;
+import org.apache.solr.client.solrj.SolrRequest;
+import org.apache.solr.client.solrj.SolrResponse;
+import org.apache.solr.client.solrj.cloud.SolrCloudManager;
+import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventType;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.cloud.CloudTestUtils;
+import org.apache.solr.cloud.autoscaling.ActionContext;
+import org.apache.solr.cloud.autoscaling.ComputePlanAction;
+import org.apache.solr.cloud.autoscaling.ScheduledTriggers;
+import org.apache.solr.cloud.autoscaling.TriggerAction;
+import org.apache.solr.cloud.autoscaling.TriggerEvent;
+import org.apache.solr.cloud.autoscaling.TriggerValidationException;
+import org.apache.solr.common.cloud.ClusterState;
+import org.apache.solr.common.cloud.DocCollection;
+import org.apache.solr.common.cloud.Replica;
+import org.apache.solr.common.params.CollectionParams;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.TimeSource;
+import org.apache.solr.common.util.Utils;
+import org.apache.solr.core.SolrResourceLoader;
+import org.apache.solr.util.LogLevel;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.apache.solr.cloud.autoscaling.AutoScalingHandlerTest.createAutoScalingRequest;
+import static org.apache.solr.common.params.CollectionParams.CollectionAction.MOVEREPLICA;
+
+/**
+ * Test for {@link ComputePlanAction}
+ */
+@LogLevel("org.apache.solr.cloud.autoscaling=DEBUG;org.apache.solr.cloud.Overseer=DEBUG;org.apache.solr.cloud.overseer=DEBUG;org.apache.solr.client.solrj.cloud.autoscaling=DEBUG;")
+public class TestSimComputePlanAction extends SimSolrCloudTestCase {
+ private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+ private static final AtomicBoolean fired = new AtomicBoolean(false);
+ private static final int NODE_COUNT = 1;
+ private static CountDownLatch triggerFiredLatch = new CountDownLatch(1);
+ private static final AtomicReference<Map> actionContextPropsRef = new AtomicReference<>();
+ private static final AtomicReference<TriggerEvent> eventRef = new AtomicReference<>();
+
+ @BeforeClass
+ public static void setupCluster() throws Exception {
+ configureCluster(1, TimeSource.get("simTime:50"));
+ }
+
+ @Before
+ public void init() throws Exception {
+
+ fired.set(false);
+ triggerFiredLatch = new CountDownLatch(1);
+ actionContextPropsRef.set(null);
+
+ String setClusterPolicyCommand = "{" +
+ " 'set-cluster-policy': [" +
+ " {'cores':'<10', 'node':'#ANY'}," +
+ " {'replica':'<2', 'shard': '#EACH', 'node': '#ANY'}," +
+ " {'nodeRole':'overseer', 'replica':0}" +
+ " ]" +
+ "}";
+ SolrRequest req = createAutoScalingRequest(SolrRequest.METHOD.POST, setClusterPolicyCommand);
+ SolrResponse rsp = cluster.request(req);
+ NamedList<Object> response = rsp.getResponse();
+ assertEquals(response.get("result").toString(), "success");
+
+ String setClusterPreferencesCommand = "{" +
+ "'set-cluster-preferences': [" +
+ "{'minimize': 'cores'}," +
+ "{'maximize': 'freedisk','precision': 100}]" +
+ "}";
+ req = createAutoScalingRequest(SolrRequest.METHOD.POST, setClusterPreferencesCommand);
+ rsp = cluster.request(req);
+ response = rsp.getResponse();
+ assertEquals(response.get("result").toString(), "success");
+ cluster.getTimeSource().sleep(TimeUnit.SECONDS.toMillis(ScheduledTriggers.DEFAULT_COOLDOWN_PERIOD_SECONDS));
+ }
+
+ @After
+ public void printState() throws Exception {
+ log.info("-------------_ FINAL STATE --------------");
+ log.info("* Node values: " + Utils.toJSONString(cluster.getSimNodeStateProvider().simGetAllNodeValues()));
+ log.info("* Live nodes: " + cluster.getClusterStateProvider().getLiveNodes());
+ ClusterState state = cluster.getClusterStateProvider().getClusterState();
+ for (String coll: cluster.getSimClusterStateProvider().simListCollections()) {
+ log.info("* Collection " + coll + " state: " + state.getCollection(coll));
+ }
+
+ }
+
+ @Test
+ public void testNodeLost() throws Exception {
+ // let's start a node so that we have at least two
+ String node = cluster.simAddNode();
+ AssertingTriggerAction.expectedNode = node;
+
+ SolrClient solrClient = cluster.simGetSolrClient();
+ String setTriggerCommand = "{" +
+ "'set-trigger' : {" +
+ "'name' : 'node_lost_trigger'," +
+ "'event' : 'nodeLost'," +
+ "'waitFor' : '7s'," +
+ "'enabled' : true," +
+ "'actions' : [{'name':'compute_plan', 'class' : 'solr.ComputePlanAction'}," +
+ "{'name':'test','class':'" + TestSimComputePlanAction.AssertingTriggerAction.class.getName() + "'}]" +
+ "}}";
+ SolrRequest req = createAutoScalingRequest(SolrRequest.METHOD.POST, setTriggerCommand);
+ NamedList<Object> response = solrClient.request(req);
+ assertEquals(response.get("result").toString(), "success");
+
+ CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection("testNodeLost",
+ "conf",1, 2);
+ create.process(solrClient);
+
+ CloudTestUtils.waitForState(cluster, "Timed out waiting for replicas of new collection to be active",
+ "testNodeLost", CloudTestUtils.clusterShape(1, 2, false, true));
+
+ ClusterState clusterState = cluster.getClusterStateProvider().getClusterState();
+ log.debug("-- cluster state: {}", clusterState);
+ DocCollection collection = clusterState.getCollection("testNodeLost");
+ List<Replica> replicas = collection.getReplicas(node);
+ assertNotNull(replicas);
+ assertFalse(replicas.isEmpty());
+
+ // start another node because because when the other node goes away, the cluster policy requires only
+ // 1 replica per node and none on the overseer
+ String node2 = cluster.simAddNode();
+ assertTrue(node2 + "is not live yet", cluster.getClusterStateProvider().getClusterState().liveNodesContain(node2) );
+
+ // stop the original node
+ cluster.simRemoveNode(node, false);
+ log.info("Stopped_node : {}", node);
+
+ assertTrue("Trigger was not fired even after 10 seconds", triggerFiredLatch.await(10, TimeUnit.SECONDS));
+ assertTrue(fired.get());
+ Map context = actionContextPropsRef.get();
+ assertNotNull(context);
+ List<SolrRequest> operations = (List<SolrRequest>) context.get("operations");
+ assertNotNull("The operations computed by ComputePlanAction should not be null , " + eventRef.get(), operations);
+ assertEquals("ComputePlanAction should have computed exactly 1 operation", 1, operations.size());
+ SolrRequest solrRequest = operations.get(0);
+ SolrParams params = solrRequest.getParams();
+ assertEquals("Expected MOVEREPLICA action after adding node", MOVEREPLICA, CollectionParams.CollectionAction.get(params.get("action")));
+ String replicaToBeMoved = params.get("replica");
+ assertEquals("Unexpected node in computed operation", replicas.get(0).getName(), replicaToBeMoved);
+
+ // shutdown the extra node that we had started
+ cluster.simRemoveNode(node2, false);
+ }
+
+ public void testNodeWithMultipleReplicasLost() throws Exception {
+ AssertingTriggerAction.expectedNode = null;
+
+ // start 3 more nodes
+ cluster.simAddNode();
+ cluster.simAddNode();
+ cluster.simAddNode();
+
+ SolrClient solrClient = cluster.simGetSolrClient();
+ String setTriggerCommand = "{" +
+ "'set-trigger' : {" +
+ "'name' : 'node_lost_trigger'," +
+ "'event' : 'nodeLost'," +
+ "'waitFor' : '1s'," +
+ "'enabled' : true," +
+ "'actions' : [{'name':'compute_plan', 'class' : 'solr.ComputePlanAction'}," +
+ "{'name':'test','class':'" + AssertingTriggerAction.class.getName() + "'}]" +
+ "}}";
+ SolrRequest req = createAutoScalingRequest(SolrRequest.METHOD.POST, setTriggerCommand);
+ NamedList<Object> response = solrClient.request(req);
+ assertEquals(response.get("result").toString(), "success");
+
+ CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection("testNodeWithMultipleReplicasLost",
+ "conf",2, 3);
+// create.setMaxShardsPerNode(2);
+ create.process(solrClient);
+
+ CloudTestUtils.waitForState(cluster, "Timed out waiting for replicas of new collection to be active",
+ "testNodeWithMultipleReplicasLost", CloudTestUtils.clusterShape(2, 3, false, true));
+
+ ClusterState clusterState = cluster.getClusterStateProvider().getClusterState();
+ log.debug("-- cluster state: {}", clusterState);
+ DocCollection docCollection = clusterState.getCollection("testNodeWithMultipleReplicasLost");
+
+ // lets find a node with at least 2 replicas
+ String stoppedNodeName = null;
+ List<Replica> replicasToBeMoved = null;
+ for (String node : cluster.getClusterStateProvider().getLiveNodes()) {
+ List<Replica> replicas = docCollection.getReplicas(node);
+ if (replicas != null && replicas.size() == 2) {
+ stoppedNodeName = node;
+ replicasToBeMoved = replicas;
+ cluster.simRemoveNode(node, false);
+ break;
+ }
+ }
+ assertNotNull(stoppedNodeName);
+
+ assertTrue("Trigger was not fired even after 5 seconds", triggerFiredLatch.await(5, TimeUnit.SECONDS));
+ assertTrue(fired.get());
+
+ TriggerEvent triggerEvent = eventRef.get();
+ assertNotNull(triggerEvent);
+ assertEquals(TriggerEventType.NODELOST, triggerEvent.getEventType());
+ // TODO assertEquals(stoppedNodeName, triggerEvent.getProperty(TriggerEvent.NODE_NAME));
+
+ Map context = actionContextPropsRef.get();
+ assertNotNull(context);
+ List<SolrRequest> operations = (List<SolrRequest>) context.get("operations");
+ assertNotNull("The operations computed by ComputePlanAction should not be null " + actionContextPropsRef.get() + "\nevent: " + eventRef.get(), operations);
+ operations.forEach(solrRequest -> log.info(solrRequest.getParams().toString()));
+ assertEquals("ComputePlanAction should have computed exactly 2 operation", 2, operations.size());
+
+ for (SolrRequest solrRequest : operations) {
+ SolrParams params = solrRequest.getParams();
+ assertEquals("Expected MOVEREPLICA action after adding node", MOVEREPLICA, CollectionParams.CollectionAction.get(params.get("action")));
+ String moved = params.get("replica");
+ assertTrue(replicasToBeMoved.stream().anyMatch(replica -> replica.getName().equals(moved)));
+ }
+ }
+
+ @Test
+ //17-Aug-2018 commented @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 28-June-2018
+ public void testNodeAdded() throws Exception {
+ AssertingTriggerAction.expectedNode = null;
+ SolrClient solrClient = cluster.simGetSolrClient();
+ String setTriggerCommand = "{" +
+ "'set-trigger' : {" +
+ "'name' : 'node_added_trigger'," +
+ "'event' : 'nodeAdded'," +
+ "'waitFor' : '1s'," +
+ "'enabled' : true," +
+ "'actions' : [{'name':'compute_plan', 'class' : 'solr.ComputePlanAction'}," +
+ "{'name':'test','class':'" + TestSimComputePlanAction.AssertingTriggerAction.class.getName() + "'}]" +
+ "}}";
+ SolrRequest req = createAutoScalingRequest(SolrRequest.METHOD.POST, setTriggerCommand);
+ NamedList<Object> response = solrClient.request(req);
+ assertEquals(response.get("result").toString(), "success");
+
+ // the default policy limits 1 replica per node, we need more right now
+ String setClusterPolicyCommand = "{" +
+ " 'set-cluster-policy': [" +
+ " {'cores':'<10', 'node':'#ANY'}," +
+ " {'replica':'<5', 'shard': '#EACH', 'node': '#ANY'}," +
+ " {'nodeRole':'overseer', 'replica':0}" +
+ " ]" +
+ "}";
+ req = createAutoScalingRequest(SolrRequest.METHOD.POST, setClusterPolicyCommand);
+ response = solrClient.request(req);
+ assertEquals(response.get("result").toString(), "success");
+
+ CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection("testNodeAdded",
+ "conf",1, 4);
+ create.process(solrClient);
+
+ CloudTestUtils.waitForState(cluster, "Timed out waiting for replicas of new collection to be active",
+ "testNodeAdded", (liveNodes, collectionState) -> collectionState.getReplicas().stream().allMatch(replica -> replica.isActive(liveNodes)));
+
+ // reset to the original policy which has only 1 replica per shard per node
+ setClusterPolicyCommand = "{" +
+ " 'set-cluster-policy': [" +
+ " {'cores':'<10', 'node':'#ANY'}," +
+ " {'replica':'<3', 'shard': '#EACH', 'node': '#ANY'}," +
+ " {'nodeRole':'overseer', 'replica':0}" +
+ " ]" +
+ "}";
+ req = createAutoScalingRequest(SolrRequest.METHOD.POST, setClusterPolicyCommand);
+ response = solrClient.request(req);
+ assertEquals(response.get("result").toString(), "success");
+
+ // start a node so that the 'violation' created by the previous policy update is fixed
+ String newNode = cluster.simAddNode();
+ assertTrue("Trigger was not fired even after 5 seconds", triggerFiredLatch.await(5, TimeUnit.SECONDS));
+ assertTrue(fired.get());
+ Map context = actionContextPropsRef.get();
+ assertNotNull(context);
+ log.info("Node values: " + Utils.toJSONString(cluster.getSimNodeStateProvider().simGetAllNodeValues()));
+ log.info("Live nodes: " + cluster.getClusterStateProvider().getLiveNodes() + ", collection state: " + cluster.getClusterStateProvider().getClusterState().getCollection("testNodeAdded"));
+ List<SolrRequest> operations = (List<SolrRequest>) context.get("operations");
+ assertNotNull("The operations computed by ComputePlanAction should not be null" + context, operations);
+ assertEquals("ComputePlanAction should have computed exactly 1 operation, but was: " + operations, 1, operations.size());
+ SolrRequest request = operations.get(0);
+ SolrParams params = request.getParams();
+ assertEquals("Expected MOVEREPLICA action after adding node", MOVEREPLICA, CollectionParams.CollectionAction.get(params.get("action")));
+ String nodeAdded = params.get("targetNode");
+ assertEquals("Unexpected node in computed operation", newNode, nodeAdded);
+ }
+
+ public static class AssertingTriggerAction implements TriggerAction {
+ static String expectedNode;
+
+ @Override
+ public void configure(SolrResourceLoader loader, SolrCloudManager cloudManager, Map<String, Object> properties) throws TriggerValidationException {
+
+ }
+
+ @Override
+ public void init() {
+
+ }
+
+ @Override
+ public String getName() {
+ return null;
+ }
+
+ @Override
+ public void process(TriggerEvent event, ActionContext context) {
+ if (expectedNode != null) {
+ Collection nodes = (Collection) event.getProperty(TriggerEvent.NODE_NAMES);
+ if (nodes == null || !nodes.contains(expectedNode)) return;//this is not the event we are looking for
+ }
+ if (fired.compareAndSet(false, true)) {
+ eventRef.set(event);
+ actionContextPropsRef.set(context.getProperties());
+ triggerFiredLatch.countDown();
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b949f57f/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimDistribStateManager.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimDistribStateManager.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimDistribStateManager.java
new file mode 100644
index 0000000..b161a15
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimDistribStateManager.java
@@ -0,0 +1,342 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.cloud.autoscaling.sim;
+
+import java.lang.invoke.MethodHandles;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Locale;
+import java.util.NoSuchElementException;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.client.solrj.cloud.autoscaling.AlreadyExistsException;
+import org.apache.solr.client.solrj.cloud.autoscaling.BadVersionException;
+import org.apache.solr.client.solrj.cloud.DistribStateManager;
+import org.apache.solr.client.solrj.cloud.autoscaling.NotEmptyException;
+import org.apache.solr.client.solrj.cloud.autoscaling.VersionedData;
+import org.apache.solr.client.solrj.impl.ZkDistribStateManager;
+import org.apache.solr.cloud.ZkTestServer;
+import org.apache.solr.common.cloud.SolrZkClient;
+import org.apache.zookeeper.CreateMode;
+import org.apache.zookeeper.WatchedEvent;
+import org.apache.zookeeper.Watcher;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * This test compares a ZK-based {@link DistribStateManager} to the simulated one.
+ */
+public class TestSimDistribStateManager extends SolrTestCaseJ4 {
+ private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+ private DistribStateManager stateManager;
+ private ZkTestServer zkTestServer;
+ private SolrZkClient solrZkClient;
+ private boolean simulated;
+ private SimDistribStateManager.Node root;
+
+ @Before
+ public void setup() throws Exception {
+ simulated = random().nextBoolean();
+ if (simulated) {
+ root = SimDistribStateManager.createNewRootNode();
+ } else {
+ zkTestServer = new ZkTestServer(createTempDir("zkDir").toString());
+ zkTestServer.run();
+ }
+ reInit();
+ }
+
+ private void reInit() throws Exception {
+ if (stateManager != null) {
+ stateManager.close();
+ }
+ if (simulated) {
+ stateManager = new SimDistribStateManager(root);
+ } else {
+ if (solrZkClient != null) {
+ solrZkClient.close();
+ }
+ solrZkClient = new SolrZkClient(zkTestServer.getZkHost(), 30000);
+ stateManager = new ZkDistribStateManager(solrZkClient);
+ }
+ log.info("Using " + stateManager.getClass().getName());
+ }
+
+ private DistribStateManager createDistribStateManager() {
+ if (simulated) {
+ return new SimDistribStateManager(root);
+ } else {
+ SolrZkClient cli = new SolrZkClient(zkTestServer.getZkHost(), 30000);
+ return new ZkDistribStateManager(cli);
+ }
+ }
+
+ private void destroyDistribStateManager(DistribStateManager mgr) throws Exception {
+ mgr.close();
+ if (mgr instanceof ZkDistribStateManager) {
+ ((ZkDistribStateManager)mgr).getZkClient().close();
+ }
+ }
+
+ @After
+ public void teardown() throws Exception {
+ if (solrZkClient != null) {
+ solrZkClient.close();
+ solrZkClient = null;
+ }
+ if (zkTestServer != null) {
+ zkTestServer.shutdown();
+ zkTestServer = null;
+ }
+ if (stateManager != null) {
+ stateManager.close();
+ }
+ stateManager = null;
+ }
+
+ @Test
+ public void testHasData() throws Exception {
+ assertFalse(stateManager.hasData("/hasData/foo"));
+ assertFalse(stateManager.hasData("/hasData/bar"));
+ try {
+ stateManager.createData("/hasData/foo", new byte[0], CreateMode.PERSISTENT);
+ fail("should have failed (parent /hasData doesn't exist)");
+ } catch (NoSuchElementException e) {
+ // expected
+ }
+ stateManager.makePath("/hasData");
+ stateManager.createData("/hasData/foo", new byte[0], CreateMode.PERSISTENT);
+ stateManager.createData("/hasData/bar", new byte[0], CreateMode.PERSISTENT);
+ assertTrue(stateManager.hasData("/hasData/foo"));
+ assertTrue(stateManager.hasData("/hasData/bar"));
+ }
+
+ @Test
+ public void testRemoveData() throws Exception {
+ assertFalse(stateManager.hasData("/removeData/foo"));
+ assertFalse(stateManager.hasData("/removeData/foo/bar"));
+ assertFalse(stateManager.hasData("/removeData/baz"));
+ assertFalse(stateManager.hasData("/removeData/baz/1/2/3"));
+ stateManager.makePath("/removeData/foo/bar");
+ stateManager.makePath("/removeData/baz/1/2/3");
+ assertTrue(stateManager.hasData("/removeData/foo"));
+ assertTrue(stateManager.hasData("/removeData/foo/bar"));
+ assertTrue(stateManager.hasData("/removeData/baz/1/2/3"));
+ try {
+ stateManager.removeData("/removeData/foo", -1);
+ fail("should have failed (node has children)");
+ } catch (NotEmptyException e) {
+ // expected
+ }
+ stateManager.removeData("/removeData/foo/bar", -1);
+ stateManager.removeData("/removeData/foo", -1);
+ // test recursive listing and removal
+ stateManager.removeRecursively("/removeData/baz/1", false, false);
+ assertFalse(stateManager.hasData("/removeData/baz/1/2"));
+ assertTrue(stateManager.hasData("/removeData/baz/1"));
+ // should silently ignore
+ stateManager.removeRecursively("/removeData/baz/1/2", true, true);
+ stateManager.removeRecursively("/removeData/baz/1", false, true);
+ assertFalse(stateManager.hasData("/removeData/baz/1"));
+ try {
+ stateManager.removeRecursively("/removeData/baz/1", false, true);
+ fail("should throw exception - missing path");
+ } catch (NoSuchElementException e) {
+ // expected
+ }
+ stateManager.removeRecursively("/removeData", true, true);
+ assertFalse(stateManager.hasData("/removeData"));
+ }
+
+ @Test
+ public void testListData() throws Exception {
+ assertFalse(stateManager.hasData("/listData/foo"));
+ assertFalse(stateManager.hasData("/listData/foo/bar"));
+ try {
+ stateManager.createData("/listData/foo/bar", new byte[0], CreateMode.PERSISTENT);
+ fail("should not succeed");
+ } catch (NoSuchElementException e) {
+ // expected
+ }
+ try {
+ stateManager.listData("/listData/foo");
+ fail("should not succeed");
+ } catch (NoSuchElementException e) {
+ // expected
+ }
+ stateManager.makePath("/listData");
+ List<String> kids = stateManager.listData("/listData");
+ assertEquals(0, kids.size());
+ stateManager.makePath("/listData/foo");
+ kids = stateManager.listData("/listData");
+ assertEquals(1, kids.size());
+ assertEquals("foo", kids.get(0));
+ stateManager.createData("/listData/foo/bar", new byte[0], CreateMode.PERSISTENT);
+ stateManager.createData("/listData/foo/baz", new byte[0], CreateMode.PERSISTENT);
+ kids = stateManager.listData("/listData/foo");
+ assertEquals(2, kids.size());
+ assertTrue(kids.contains("bar"));
+ assertTrue(kids.contains("baz"));
+ try {
+ stateManager.createData("/listData/foo/bar", new byte[0], CreateMode.PERSISTENT);
+ fail("should not succeed");
+ } catch (AlreadyExistsException e) {
+ // expected
+ }
+ }
+
+ static final byte[] firstData = new byte[] {
+ (byte)0xca, (byte)0xfe, (byte)0xba, (byte)0xbe
+ };
+
+ static final byte[] secondData = new byte[] {
+ (byte)0xbe, (byte)0xba, (byte)0xfe, (byte)0xca
+ };
+
+ @Test
+ public void testCreateMode() throws Exception {
+ stateManager.makePath("/createMode");
+ stateManager.createData("/createMode/persistent", firstData, CreateMode.PERSISTENT);
+ stateManager.createData("/createMode/persistent_seq", firstData, CreateMode.PERSISTENT);
+ for (int i = 0; i < 10; i++) {
+ stateManager.createData("/createMode/persistent_seq/data", firstData, CreateMode.PERSISTENT_SEQUENTIAL);
+ }
+ // check what happens with gaps
+ stateManager.createData("/createMode/persistent_seq/data", firstData, CreateMode.PERSISTENT_SEQUENTIAL);
+ stateManager.removeData("/createMode/persistent_seq/data" + String.format(Locale.ROOT, "%010d", 10), -1);
+ stateManager.createData("/createMode/persistent_seq/data", firstData, CreateMode.PERSISTENT_SEQUENTIAL);
+
+ stateManager.createData("/createMode/ephemeral", firstData, CreateMode.EPHEMERAL);
+ stateManager.createData("/createMode/ephemeral_seq", firstData, CreateMode.PERSISTENT);
+ for (int i = 0; i < 10; i++) {
+ stateManager.createData("/createMode/ephemeral_seq/data", firstData, CreateMode.EPHEMERAL_SEQUENTIAL);
+ }
+ assertTrue(stateManager.hasData("/createMode"));
+ assertTrue(stateManager.hasData("/createMode/persistent"));
+ assertTrue(stateManager.hasData("/createMode/ephemeral"));
+ List<String> kids = stateManager.listData("/createMode/persistent_seq");
+ assertEquals(11, kids.size());
+ kids = stateManager.listData("/createMode/ephemeral_seq");
+ assertEquals(10, kids.size());
+ for (int i = 0; i < 10; i++) {
+ assertTrue(stateManager.hasData("/createMode/persistent_seq/data" + String.format(Locale.ROOT, "%010d", i)));
+ }
+ assertFalse(stateManager.hasData("/createMode/persistent_seq/data" + String.format(Locale.ROOT, "%010d", 10)));
+ assertTrue(stateManager.hasData("/createMode/persistent_seq/data" + String.format(Locale.ROOT, "%010d", 11)));
+
+ for (int i = 0; i < 10; i++) {
+ assertTrue(stateManager.hasData("/createMode/ephemeral_seq/data" + String.format(Locale.ROOT, "%010d", i)));
+ }
+ // check that ephemeral nodes disappear on disconnect
+ reInit();
+ assertTrue(stateManager.hasData("/createMode/persistent"));
+ for (int i = 0; i < 10; i++) {
+ assertTrue(stateManager.hasData("/createMode/persistent_seq/data" + String.format(Locale.ROOT, "%010d", i)));
+ }
+ assertTrue(stateManager.hasData("/createMode/persistent_seq/data" + String.format(Locale.ROOT, "%010d", 11)));
+
+ assertFalse(stateManager.hasData("/createMode/ephemeral"));
+ assertTrue(stateManager.hasData("/createMode/ephemeral_seq"));
+ kids = stateManager.listData("/createMode/ephemeral_seq");
+ assertEquals(0, kids.size());
+ }
+
+ static class OnceWatcher implements Watcher {
+ CountDownLatch triggered = new CountDownLatch(1);
+ WatchedEvent event;
+
+ @Override
+ public void process(WatchedEvent event) {
+ if (triggered.getCount() == 0) {
+ fail("Watch was already triggered once!");
+ }
+ triggered.countDown();
+ this.event = event;
+ }
+ }
+
+ @Test
+ public void testGetSetRemoveData() throws Exception {
+ stateManager.makePath("/getData");
+ stateManager.createData("/getData/persistentData", firstData, CreateMode.PERSISTENT);
+ OnceWatcher nodeWatcher = new OnceWatcher();
+ VersionedData vd = stateManager.getData("/getData/persistentData", nodeWatcher);
+ assertNotNull(vd);
+ assertEquals(0, vd.getVersion());
+ assertTrue(Arrays.equals(firstData, vd.getData()));
+
+ // update data, test versioning
+ try {
+ stateManager.setData("/getData/persistentData", secondData, 1);
+ fail("should have failed");
+ } catch (BadVersionException e) {
+ // expected
+ }
+ // watch should not have fired
+ assertEquals(1, nodeWatcher.triggered.getCount());
+
+ stateManager.setData("/getData/persistentData", secondData, 0);
+ if (!nodeWatcher.triggered.await(5, TimeUnit.SECONDS)) {
+ fail("Node watch should have fired!");
+ }
+ // watch should not fire now because it needs to be reset
+ stateManager.setData("/getData/persistentData", secondData, -1);
+
+ // create ephemeral node using another ZK connection
+ DistribStateManager ephemeralMgr = createDistribStateManager();
+ ephemeralMgr.createData("/getData/ephemeralData", firstData, CreateMode.EPHEMERAL);
+
+ nodeWatcher = new OnceWatcher();
+ vd = stateManager.getData("/getData/ephemeralData", nodeWatcher);
+ destroyDistribStateManager(ephemeralMgr);
+ if (!nodeWatcher.triggered.await(5, TimeUnit.SECONDS)) {
+ fail("Node watch should have fired!");
+ }
+ assertTrue(stateManager.hasData("/getData/persistentData"));
+ assertFalse(stateManager.hasData("/getData/ephemeralData"));
+
+ nodeWatcher = new OnceWatcher();
+ vd = stateManager.getData("/getData/persistentData", nodeWatcher);
+ // try wrong version
+ try {
+ stateManager.removeData("/getData/persistentData", vd.getVersion() - 1);
+ fail("should have failed");
+ } catch (BadVersionException e) {
+ // expected
+ }
+ // watch should not have fired
+ assertEquals(1, nodeWatcher.triggered.getCount());
+
+ stateManager.removeData("/getData/persistentData", vd.getVersion());
+ if (!nodeWatcher.triggered.await(5, TimeUnit.SECONDS)) {
+ fail("Node watch should have fired!");
+ }
+ }
+
+ @Test
+ public void testMulti() throws Exception {
+
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b949f57f/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimExecutePlanAction.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimExecutePlanAction.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimExecutePlanAction.java
new file mode 100644
index 0000000..d0d08fd
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimExecutePlanAction.java
@@ -0,0 +1,209 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.cloud.autoscaling.sim;
+
+import java.lang.invoke.MethodHandles;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.stream.Collectors;
+
+import com.google.common.collect.Lists;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.solr.client.solrj.SolrClient;
+import org.apache.solr.client.solrj.SolrRequest;
+import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventType;
+import org.apache.solr.client.solrj.cloud.autoscaling.VersionedData;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.cloud.CloudTestUtils;
+import org.apache.solr.cloud.autoscaling.ActionContext;
+import org.apache.solr.cloud.autoscaling.AutoScalingHandlerTest;
+import org.apache.solr.cloud.autoscaling.ExecutePlanAction;
+import org.apache.solr.cloud.autoscaling.NodeLostTrigger;
+import org.apache.solr.common.cloud.ClusterState;
+import org.apache.solr.common.cloud.DocCollection;
+import org.apache.solr.common.cloud.Replica;
+import org.apache.solr.common.cloud.ZkStateReader;
+import org.apache.solr.common.params.CollectionParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.Utils;
+import org.apache.solr.util.LogLevel;
+import org.apache.solr.common.util.TimeSource;
+import org.junit.After;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Test for {@link ExecutePlanAction}
+ */
+@LogLevel("org.apache.solr.cloud=DEBUG")
+public class TestSimExecutePlanAction extends SimSolrCloudTestCase {
+ private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+ private static final int NODE_COUNT = 2;
+
+ @BeforeClass
+ public static void setupCluster() throws Exception {
+ configureCluster(NODE_COUNT, TimeSource.get("simTime:50"));
+ }
+
+ @After
+ public void printState() throws Exception {
+ log.info("-------------_ FINAL STATE --------------");
+ log.info("* Node values: " + Utils.toJSONString(cluster.getSimNodeStateProvider().simGetAllNodeValues()));
+ log.info("* Live nodes: " + cluster.getClusterStateProvider().getLiveNodes());
+ ClusterState state = cluster.getClusterStateProvider().getClusterState();
+ for (String coll: cluster.getSimClusterStateProvider().simListCollections()) {
+ log.info("* Collection " + coll + " state: " + state.getCollection(coll));
+ }
+
+ }
+
+ @Test
+ @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 28-June-2018
+ public void testExecute() throws Exception {
+ SolrClient solrClient = cluster.simGetSolrClient();
+ String collectionName = "testExecute";
+ CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
+ "conf", 1, 2);
+ create.setMaxShardsPerNode(1);
+ create.process(solrClient);
+
+ log.info("Collection ready after " + CloudTestUtils.waitForState(cluster, collectionName, 120, TimeUnit.SECONDS,
+ CloudTestUtils.clusterShape(1, 2, false, true)) + "ms");
+
+ String sourceNodeName = cluster.getSimClusterStateProvider().simGetRandomNode(random());
+ ClusterState clusterState = cluster.getClusterStateProvider().getClusterState();
+ DocCollection docCollection = clusterState.getCollection(collectionName);
+ List<Replica> replicas = docCollection.getReplicas(sourceNodeName);
+ assertNotNull(replicas);
+ assertFalse(replicas.isEmpty());
+
+ List<String> otherNodes = cluster.getClusterStateProvider().getLiveNodes().stream()
+ .filter(node -> !node.equals(sourceNodeName)).collect(Collectors.toList());
+ assertFalse(otherNodes.isEmpty());
+ String survivor = otherNodes.get(0);
+
+ try (ExecutePlanAction action = new ExecutePlanAction()) {
+ action.configure(cluster.getLoader(), cluster, Collections.singletonMap("name", "execute_plan"));
+
+ // used to signal if we found that ExecutePlanAction did in fact create the right znode before executing the operation
+ AtomicBoolean znodeCreated = new AtomicBoolean(false);
+
+ CollectionAdminRequest.AsyncCollectionAdminRequest moveReplica = new CollectionAdminRequest.MoveReplica(collectionName, replicas.get(0).getName(), survivor);
+ CollectionAdminRequest.AsyncCollectionAdminRequest mockRequest = new CollectionAdminRequest.AsyncCollectionAdminRequest(CollectionParams.CollectionAction.OVERSEERSTATUS) {
+ @Override
+ public void setAsyncId(String asyncId) {
+ super.setAsyncId(asyncId);
+ String parentPath = ZkStateReader.SOLR_AUTOSCALING_TRIGGER_STATE_PATH + "/xyz/execute_plan";
+ try {
+ if (cluster.getDistribStateManager().hasData(parentPath)) {
+ java.util.List<String> children = cluster.getDistribStateManager().listData(parentPath);
+ if (!children.isEmpty()) {
+ String child = children.get(0);
+ VersionedData data = cluster.getDistribStateManager().getData(parentPath + "/" + child);
+ Map m = (Map) Utils.fromJSON(data.getData());
+ if (m.containsKey("requestid")) {
+ znodeCreated.set(m.get("requestid").equals(asyncId));
+ }
+ }
+ }
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+
+ }
+ };
+ List<CollectionAdminRequest.AsyncCollectionAdminRequest> operations = Lists.asList(moveReplica, new CollectionAdminRequest.AsyncCollectionAdminRequest[]{mockRequest});
+ NodeLostTrigger.NodeLostEvent nodeLostEvent = new NodeLostTrigger.NodeLostEvent(TriggerEventType.NODELOST,
+ "mock_trigger_name", Collections.singletonList(TimeSource.CURRENT_TIME.getTimeNs()),
+ Collections.singletonList(sourceNodeName));
+ ActionContext actionContext = new ActionContext(cluster, null,
+ new HashMap<>(Collections.singletonMap("operations", operations)));
+ action.process(nodeLostEvent, actionContext);
+
+// assertTrue("ExecutePlanAction should have stored the requestid in ZK before executing the request", znodeCreated.get());
+ List<NamedList<Object>> responses = (List<NamedList<Object>>) actionContext.getProperty("responses");
+ assertNotNull(responses);
+ assertEquals(2, responses.size());
+ NamedList<Object> response = responses.get(0);
+ assertNull(response.get("failure"));
+ assertNotNull(response.get("success"));
+ }
+
+ log.info("Collection ready after " + CloudTestUtils.waitForState(cluster, collectionName, 300, TimeUnit.SECONDS,
+ CloudTestUtils.clusterShape(1, 2, false, true)) + "ms");
+ }
+
+ @Test
+ public void testIntegration() throws Exception {
+ SolrClient solrClient = cluster.simGetSolrClient();
+
+ String setTriggerCommand = "{" +
+ "'set-trigger' : {" +
+ "'name' : 'node_lost_trigger'," +
+ "'event' : 'nodeLost'," +
+ "'waitFor' : '1s'," +
+ "'enabled' : true," +
+ "'actions' : [{'name':'compute_plan', 'class' : 'solr.ComputePlanAction'}," +
+ "{'name':'execute_plan','class':'solr.ExecutePlanAction'}]" +
+ "}}";
+ SolrRequest req = AutoScalingHandlerTest.createAutoScalingRequest(SolrRequest.METHOD.POST, setTriggerCommand);
+ NamedList<Object> response = solrClient.request(req);
+ assertEquals(response.get("result").toString(), "success");
+
+ String collectionName = "testIntegration";
+ CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
+ "conf", 1, 2);
+ create.setMaxShardsPerNode(1);
+ create.process(solrClient);
+
+ CloudTestUtils.waitForState(cluster, "Timed out waiting for replicas of new collection to be active",
+ collectionName, CloudTestUtils.clusterShape(1, 2, false, true));
+
+ String sourceNodeName = cluster.getSimClusterStateProvider().simGetRandomNode(random());
+ ClusterState clusterState = cluster.getClusterStateProvider().getClusterState();
+ DocCollection docCollection = clusterState.getCollection(collectionName);
+ List<Replica> replicas = docCollection.getReplicas(sourceNodeName);
+ assertNotNull(replicas);
+ assertFalse(replicas.isEmpty());
+
+ List<String> otherNodes = cluster.getClusterStateProvider().getLiveNodes().stream()
+ .filter(node -> !node.equals(sourceNodeName)).collect(Collectors.toList());
+ assertFalse(otherNodes.isEmpty());
+ String survivor = otherNodes.get(0);
+
+ cluster.simRemoveNode(sourceNodeName, false);
+
+ cluster.getTimeSource().sleep(3000);
+
+ CloudTestUtils.waitForState(cluster, "Timed out waiting for replicas of collection to be 2 again",
+ collectionName, CloudTestUtils.clusterShape(1, 2, false, true));
+
+ clusterState = cluster.getClusterStateProvider().getClusterState();
+ docCollection = clusterState.getCollection(collectionName);
+ List<Replica> replicasOnSurvivor = docCollection.getReplicas(survivor);
+ assertNotNull(replicasOnSurvivor);
+ assertEquals(2, replicasOnSurvivor.size());
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b949f57f/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimGenericDistributedQueue.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimGenericDistributedQueue.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimGenericDistributedQueue.java
new file mode 100644
index 0000000..25e36f3
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimGenericDistributedQueue.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.cloud.autoscaling.sim;
+
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.solr.client.solrj.cloud.DistributedQueue;
+import org.apache.solr.client.solrj.cloud.DistribStateManager;
+
+/**
+ *
+ */
+@LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 2018-02-26
+public class TestSimGenericDistributedQueue extends TestSimDistributedQueue {
+ DistribStateManager stateManager = new SimDistribStateManager();
+
+ @Override
+ @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 2-Aug-2018
+ protected DistributedQueue makeDistributedQueue(String dqZNode) throws Exception {
+ return new GenericDistributedQueue(stateManager, dqZNode);
+ }
+
+ @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // added 09-Aug-2018
+ public void testDistributedQueue() throws Exception {
+ super.testDistributedQueue();
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b949f57f/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimLargeCluster.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimLargeCluster.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimLargeCluster.java
new file mode 100644
index 0000000..decb585
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimLargeCluster.java
@@ -0,0 +1,727 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.cloud.autoscaling.sim;
+
+import java.lang.invoke.MethodHandles;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering;
+import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
+import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.solr.client.solrj.SolrClient;
+import org.apache.solr.client.solrj.SolrRequest;
+import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig;
+import org.apache.solr.client.solrj.cloud.SolrCloudManager;
+import org.apache.solr.client.solrj.cloud.autoscaling.Suggester;
+import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventProcessorStage;
+import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventType;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.cloud.CloudTestUtils;
+import org.apache.solr.cloud.autoscaling.ActionContext;
+import org.apache.solr.cloud.autoscaling.ComputePlanAction;
+import org.apache.solr.cloud.autoscaling.ExecutePlanAction;
+import org.apache.solr.cloud.autoscaling.SearchRateTrigger;
+import org.apache.solr.cloud.autoscaling.TriggerActionBase;
+import org.apache.solr.cloud.autoscaling.TriggerEvent;
+import org.apache.solr.cloud.autoscaling.TriggerListenerBase;
+import org.apache.solr.cloud.autoscaling.CapturedEvent;
+import org.apache.solr.cloud.autoscaling.TriggerValidationException;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.cloud.Replica;
+import org.apache.solr.common.params.CollectionAdminParams;
+import org.apache.solr.common.params.CollectionParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.Pair;
+import org.apache.solr.common.util.TimeSource;
+import org.apache.solr.core.SolrResourceLoader;
+import org.apache.solr.util.LogLevel;
+import org.apache.solr.util.TimeOut;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.apache.solr.cloud.autoscaling.AutoScalingHandlerTest.createAutoScalingRequest;
+
+/**
+ *
+ */
+@TimeoutSuite(millis = 4 * 3600 * 1000)
+@LogLevel("org.apache.solr.cloud.autoscaling=DEBUG")
+@ThreadLeakLingering(linger = 20000) // ComputePlanAction may take significant time to complete
+//05-Jul-2018 @LuceneTestCase.BadApple(bugUrl = "https://issues.apache.org/jira/browse/SOLR-12075")
+public class TestSimLargeCluster extends SimSolrCloudTestCase {
+ private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+ public static final int SPEED = 50;
+
+ public static final int NUM_NODES = 100;
+
+ static Map<String, List<CapturedEvent>> listenerEvents = new ConcurrentHashMap<>();
+ static AtomicInteger triggerFinishedCount = new AtomicInteger();
+ static AtomicInteger triggerStartedCount = new AtomicInteger();
+ static CountDownLatch triggerStartedLatch;
+ static CountDownLatch triggerFinishedLatch;
+ static int waitForSeconds;
+
+ @BeforeClass
+ public static void setupCluster() throws Exception {
+ configureCluster(NUM_NODES, TimeSource.get("simTime:" + SPEED));
+ }
+
+ @Before
+ public void setupTest() throws Exception {
+ waitForSeconds = 5;
+ triggerStartedCount.set(0);
+ triggerFinishedCount.set(0);
+ triggerStartedLatch = new CountDownLatch(1);
+ triggerFinishedLatch = new CountDownLatch(1);
+ listenerEvents.clear();
+ // disable .scheduled_maintenance and .auto_add_replicas
+ String suspendTriggerCommand = "{" +
+ "'suspend-trigger' : {'name' : '.scheduled_maintenance'}" +
+ "}";
+ SolrRequest req = createAutoScalingRequest(SolrRequest.METHOD.POST, suspendTriggerCommand);
+ SolrClient solrClient = cluster.simGetSolrClient();
+ NamedList<Object> response;
+ try {
+ response = solrClient.request(req);
+ assertEquals(response.get("result").toString(), "success");
+ } catch (Exception e) {
+ if (!e.toString().contains("No trigger exists")) {
+ throw e;
+ }
+ }
+ suspendTriggerCommand = "{" +
+ "'suspend-trigger' : {'name' : '.auto_add_replicas'}" +
+ "}";
+ req = createAutoScalingRequest(SolrRequest.METHOD.POST, suspendTriggerCommand);
+ try {
+ response = solrClient.request(req);
+ assertEquals(response.get("result").toString(), "success");
+ } catch (Exception e) {
+ if (!e.toString().contains("No trigger exists")) {
+ throw e;
+ }
+ }
+
+ // do this in advance if missing
+ if (!cluster.getSimClusterStateProvider().simListCollections().contains(CollectionAdminParams.SYSTEM_COLL)) {
+ cluster.getSimClusterStateProvider().createSystemCollection();
+ CloudTestUtils.waitForState(cluster, CollectionAdminParams.SYSTEM_COLL, 120, TimeUnit.SECONDS,
+ CloudTestUtils.clusterShape(1, 1, false, true));
+ }
+ }
+
+ public static class TestTriggerListener extends TriggerListenerBase {
+ @Override
+ public void configure(SolrResourceLoader loader, SolrCloudManager cloudManager, AutoScalingConfig.TriggerListenerConfig config) throws TriggerValidationException {
+ super.configure(loader, cloudManager, config);
+ }
+
+ @Override
+ public synchronized void onEvent(TriggerEvent event, TriggerEventProcessorStage stage, String actionName,
+ ActionContext context, Throwable error, String message) {
+ List<CapturedEvent> lst = listenerEvents.computeIfAbsent(config.name, s -> new ArrayList<>());
+ lst.add(new CapturedEvent(cluster.getTimeSource().getTimeNs(), context, config, stage, actionName, event, message));
+ }
+ }
+
+ public static class FinishTriggerAction extends TriggerActionBase {
+ @Override
+ public void process(TriggerEvent event, ActionContext context) throws Exception {
+ triggerFinishedCount.incrementAndGet();
+ triggerFinishedLatch.countDown();
+ }
+ }
+
+ public static class StartTriggerAction extends TriggerActionBase {
+ @Override
+ public void process(TriggerEvent event, ActionContext context) throws Exception {
+ triggerStartedLatch.countDown();
+ triggerStartedCount.incrementAndGet();
+ }
+ }
+
+ @Test
+ @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 2018-06-18
+ public void testBasic() throws Exception {
+ SolrClient solrClient = cluster.simGetSolrClient();
+ String setTriggerCommand = "{" +
+ "'set-trigger' : {" +
+ "'name' : 'node_lost_trigger1'," +
+ "'event' : 'nodeLost'," +
+ "'waitFor' : '" + waitForSeconds + "s'," +
+ "'enabled' : true," +
+ "'actions' : [" +
+ "{'name':'start','class':'" + StartTriggerAction.class.getName() + "'}," +
+ "{'name':'compute','class':'" + ComputePlanAction.class.getName() + "'}," +
+ "{'name':'execute','class':'" + ExecutePlanAction.class.getName() + "'}," +
+ "{'name':'test','class':'" + FinishTriggerAction.class.getName() + "'}" +
+ "]" +
+ "}}";
+ SolrRequest req = createAutoScalingRequest(SolrRequest.METHOD.POST, setTriggerCommand);
+ NamedList<Object> response = solrClient.request(req);
+ assertEquals(response.get("result").toString(), "success");
+
+ String setListenerCommand = "{" +
+ "'set-listener' : " +
+ "{" +
+ "'name' : 'foo'," +
+ "'trigger' : 'node_lost_trigger1'," +
+ "'stage' : ['STARTED','ABORTED','SUCCEEDED', 'FAILED']," +
+ "'beforeAction' : ['compute', 'execute']," +
+ "'afterAction' : ['compute', 'execute']," +
+ "'class' : '" + TestTriggerListener.class.getName() + "'" +
+ "}" +
+ "}";
+ req = createAutoScalingRequest(SolrRequest.METHOD.POST, setListenerCommand);
+ response = solrClient.request(req);
+ assertEquals(response.get("result").toString(), "success");
+
+ cluster.getTimeSource().sleep(5000);
+
+ // pick a few random nodes
+ List<String> nodes = new ArrayList<>();
+ int limit = 75;
+ for (String node : cluster.getClusterStateProvider().getLiveNodes()) {
+ nodes.add(node);
+ if (nodes.size() > limit) {
+ break;
+ }
+ }
+ Collections.shuffle(nodes, random());
+ // create collection on these nodes
+ String collectionName = "testBasic";
+ CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
+ "conf", 5, 5, 5, 5);
+ create.setMaxShardsPerNode(1);
+ create.setAutoAddReplicas(false);
+ create.setCreateNodeSet(String.join(",", nodes));
+ create.process(solrClient);
+
+ log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 30 * nodes.size(), TimeUnit.SECONDS,
+ CloudTestUtils.clusterShape(5, 15, false, true)) + "ms");
+
+ int KILL_NODES = 8;
+ // kill off a number of nodes
+ for (int i = 0; i < KILL_NODES; i++) {
+ cluster.simRemoveNode(nodes.get(i), false);
+ }
+ // should fully recover
+ log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 90 * KILL_NODES, TimeUnit.SECONDS,
+ CloudTestUtils.clusterShape(5, 15, false, true)) + "ms");
+
+ log.info("OP COUNTS: " + cluster.simGetOpCounts());
+ long moveReplicaOps = cluster.simGetOpCount(CollectionParams.CollectionAction.MOVEREPLICA.name());
+
+ // simulate a number of flaky nodes
+ int FLAKY_NODES = 10;
+ int flakyReplicas = 0;
+ for (int cnt = 0; cnt < 10; cnt++) {
+ for (int i = KILL_NODES; i < KILL_NODES + FLAKY_NODES; i++) {
+ flakyReplicas += cluster.getSimClusterStateProvider().simGetReplicaInfos(nodes.get(i))
+ .stream().filter(r -> r.getState().equals(Replica.State.ACTIVE)).count();
+ cluster.simRemoveNode(nodes.get(i), false);
+ }
+ cluster.getTimeSource().sleep(TimeUnit.SECONDS.toMillis(waitForSeconds) * 2);
+ for (int i = KILL_NODES; i < KILL_NODES + FLAKY_NODES; i++) {
+ final String nodeId = nodes.get(i);
+ cluster.submit(() -> cluster.getSimClusterStateProvider().simRestoreNode(nodeId));
+ }
+ }
+
+ // wait until started == finished
+ TimeOut timeOut = new TimeOut(20 * waitForSeconds * NUM_NODES, TimeUnit.SECONDS, cluster.getTimeSource());
+ while (!timeOut.hasTimedOut()) {
+ if (triggerStartedCount.get() == triggerFinishedCount.get()) {
+ break;
+ }
+ timeOut.sleep(1000);
+ }
+ if (timeOut.hasTimedOut()) {
+ fail("did not finish processing all events in time: started=" + triggerStartedCount.get() + ", finished=" + triggerFinishedCount.get());
+ }
+
+
+ log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 30 * nodes.size(), TimeUnit.SECONDS,
+ CloudTestUtils.clusterShape(5, 15, false, true)) + "ms");
+ long newMoveReplicaOps = cluster.simGetOpCount(CollectionParams.CollectionAction.MOVEREPLICA.name());
+ log.info("==== Flaky replicas: {}. Additional MOVEREPLICA count: {}", flakyReplicas, (newMoveReplicaOps - moveReplicaOps));
+ // flaky nodes lead to a number of MOVEREPLICA that is non-zero but lower than the number of flaky replicas
+ assertTrue("there should be new MOVERPLICA ops", newMoveReplicaOps - moveReplicaOps > 0);
+ assertTrue("there should be less than flakyReplicas=" + flakyReplicas + " MOVEREPLICA ops",
+ newMoveReplicaOps - moveReplicaOps < flakyReplicas);
+ }
+
+ @Test
+ @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 28-June-2018
+ public void testAddNode() throws Exception {
+ SolrClient solrClient = cluster.simGetSolrClient();
+ String setTriggerCommand = "{" +
+ "'set-trigger' : {" +
+ "'name' : 'node_added_trigger2'," +
+ "'event' : 'nodeAdded'," +
+ "'waitFor' : '" + waitForSeconds + "s'," +
+ "'enabled' : true," +
+ "'actions' : [" +
+ "{'name':'start','class':'" + StartTriggerAction.class.getName() + "'}," +
+ "{'name':'compute','class':'" + ComputePlanAction.class.getName() + "'}," +
+ "{'name':'execute','class':'" + ExecutePlanAction.class.getName() + "'}," +
+ "{'name':'test','class':'" + FinishTriggerAction.class.getName() + "'}" +
+ "]" +
+ "}}";
+ SolrRequest req = createAutoScalingRequest(SolrRequest.METHOD.POST, setTriggerCommand);
+ NamedList<Object> response = solrClient.request(req);
+ assertEquals(response.get("result").toString(), "success");
+
+ // create a collection with more than 1 replica per node
+ String collectionName = "testNodeAdded";
+ CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
+ "conf", NUM_NODES / 10, NUM_NODES / 8, NUM_NODES / 8, NUM_NODES / 8);
+ create.setMaxShardsPerNode(5);
+ create.setAutoAddReplicas(false);
+ create.process(solrClient);
+
+ log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 20 * NUM_NODES, TimeUnit.SECONDS,
+ CloudTestUtils.clusterShape(NUM_NODES / 10, NUM_NODES / 8 * 3, false, true)) + " ms");
+
+ // start adding nodes
+ int numAddNode = NUM_NODES / 5;
+ List<String> addNodesList = new ArrayList<>(numAddNode);
+ for (int i = 0; i < numAddNode; i++) {
+ addNodesList.add(cluster.simAddNode());
+ cluster.getTimeSource().sleep(5000);
+ }
+ // wait until at least one event is generated
+ boolean await = triggerStartedLatch.await(20000 / SPEED, TimeUnit.MILLISECONDS);
+ assertTrue("trigger did not fire", await);
+
+ // wait until started == finished
+ TimeOut timeOut = new TimeOut(20 * waitForSeconds * NUM_NODES, TimeUnit.SECONDS, cluster.getTimeSource());
+ while (!timeOut.hasTimedOut()) {
+ if (triggerStartedCount.get() == triggerFinishedCount.get()) {
+ break;
+ }
+ timeOut.sleep(1000);
+ }
+ if (timeOut.hasTimedOut()) {
+ fail("did not finish processing all events in time: started=" + triggerStartedCount.get() + ", finished=" + triggerFinishedCount.get());
+ }
+
+ List<SolrInputDocument> systemColl = cluster.simGetSystemCollection();
+ int startedEventPos = -1;
+ for (int i = 0; i < systemColl.size(); i++) {
+ SolrInputDocument d = systemColl.get(i);
+ if (!"node_added_trigger2".equals(d.getFieldValue("event.source_s"))) {
+ continue;
+ }
+ if ("NODEADDED".equals(d.getFieldValue("event.type_s")) &&
+ "STARTED".equals(d.getFieldValue("stage_s"))) {
+ startedEventPos = i;
+ break;
+ }
+ }
+ assertTrue("no STARTED event", startedEventPos > -1);
+ SolrInputDocument startedEvent = systemColl.get(startedEventPos);
+ int lastIgnoredPos = startedEventPos;
+ // make sure some replicas have been moved
+ assertTrue("no MOVEREPLICA ops?", cluster.simGetOpCount("MOVEREPLICA") > 0);
+
+ log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 20 * NUM_NODES, TimeUnit.SECONDS,
+ CloudTestUtils.clusterShape(NUM_NODES / 10, NUM_NODES / 8 * 3, false, true)) + " ms");
+
+ int count = 50;
+ SolrInputDocument finishedEvent = null;
+ long lastNumOps = cluster.simGetOpCount("MOVEREPLICA");
+ while (count-- > 0) {
+ cluster.getTimeSource().sleep(10000);
+ long currentNumOps = cluster.simGetOpCount("MOVEREPLICA");
+ if (currentNumOps == lastNumOps) {
+ int size = systemColl.size() - 1;
+ for (int i = size; i > lastIgnoredPos; i--) {
+ SolrInputDocument d = systemColl.get(i);
+ if (!"node_added_trigger2".equals(d.getFieldValue("event.source_s"))) {
+ continue;
+ }
+ if ("SUCCEEDED".equals(d.getFieldValue("stage_s"))) {
+ finishedEvent = d;
+ break;
+ }
+ }
+ break;
+ } else {
+ lastNumOps = currentNumOps;
+ }
+ }
+
+ assertTrue("did not finish processing changes", finishedEvent != null);
+ long delta = (Long)finishedEvent.getFieldValue("event.time_l") - (Long)startedEvent.getFieldValue("event.time_l");
+ log.info("#### System stabilized after " + TimeUnit.NANOSECONDS.toMillis(delta) + " ms");
+ assertTrue("unexpected number of MOVEREPLICA ops", cluster.simGetOpCount("MOVEREPLICA") > 1);
+ }
+
+ @Test
+ @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 2018-06-18
+ public void testNodeLost() throws Exception {
+ doTestNodeLost(waitForSeconds, 5000, 0);
+ }
+
+ // Renard R5 series - evenly covers a log10 range
+ private static final int[] renard5 = new int[] {
+ 1, 2, 3, 4, 6,
+ 10
+ };
+ private static final int[] renard5x = new int[] {
+ 1, 2, 3, 4, 6,
+ 10, 16, 25, 40, 63,
+ 100
+ };
+ private static final int[] renard5xx = new int[] {
+ 1, 2, 3, 4, 6,
+ 10, 16, 25, 40, 63,
+ 100, 158, 251, 398, 631,
+ 1000, 1585, 2512, 3981, 6310,
+ 10000
+ };
+ // Renard R10 series
+ private static final double[] renard10 = new double[] {
+ 1, 1.3, 1.6, 2, 2.5, 3.2, 4, 5, 6.3, 7.9,
+ 10
+ };
+ private static final double[] renard10x = new double[] {
+ 1, 1.3, 1.6, 2, 2.5, 3.2, 4, 5, 6.3, 7.9,
+ 10, 12.6, 15.8, 20, 25.1, 31.6, 39.8, 50.1, 63.1, 79.4,
+ 100
+ };
+
+ private static final AtomicInteger ZERO = new AtomicInteger(0);
+
+ //@Test
+ public void benchmarkNodeLost() throws Exception {
+ List<String> results = new ArrayList<>();
+ for (int wait : renard5x) {
+ for (int delay : renard5x) {
+ SummaryStatistics totalTime = new SummaryStatistics();
+ SummaryStatistics ignoredOurEvents = new SummaryStatistics();
+ SummaryStatistics ignoredOtherEvents = new SummaryStatistics();
+ SummaryStatistics startedOurEvents = new SummaryStatistics();
+ SummaryStatistics startedOtherEvents = new SummaryStatistics();
+ for (int i = 0; i < 5; i++) {
+ if (cluster != null) {
+ cluster.close();
+ }
+ setupCluster();
+ setUp();
+ setupTest();
+ long total = doTestNodeLost(wait, delay * 1000, 0);
+ totalTime.addValue(total);
+ // get event counts
+ Map<String, Map<String, AtomicInteger>> counts = cluster.simGetEventCounts();
+ Map<String, AtomicInteger> map = counts.remove("node_lost_trigger");
+ startedOurEvents.addValue(map.getOrDefault("STARTED", ZERO).get());
+ ignoredOurEvents.addValue(map.getOrDefault("IGNORED", ZERO).get());
+ int otherStarted = 0;
+ int otherIgnored = 0;
+ for (Map<String, AtomicInteger> m : counts.values()) {
+ otherStarted += m.getOrDefault("STARTED", ZERO).get();
+ otherIgnored += m.getOrDefault("IGNORED", ZERO).get();
+ }
+ startedOtherEvents.addValue(otherStarted);
+ ignoredOtherEvents.addValue(otherIgnored);
+ }
+ results.add(String.format(Locale.ROOT, "%d\t%d\t%4.0f\t%4.0f\t%4.0f\t%4.0f\t%6.0f\t%6.0f\t%6.0f\t%6.0f\t%6.0f",
+ wait, delay, startedOurEvents.getMean(), ignoredOurEvents.getMean(),
+ startedOtherEvents.getMean(), ignoredOtherEvents.getMean(),
+ totalTime.getMin(), totalTime.getMax(), totalTime.getMean(), totalTime.getStandardDeviation(), totalTime.getVariance()));
+ }
+ }
+ log.info("===== RESULTS ======");
+ log.info("waitFor\tdelay\tSTRT\tIGN\toSTRT\toIGN\tmin\tmax\tmean\tstdev\tvar");
+ results.forEach(s -> log.info(s));
+ }
+
+ private long doTestNodeLost(int waitFor, long killDelay, int minIgnored) throws Exception {
+ SolrClient solrClient = cluster.simGetSolrClient();
+ String setTriggerCommand = "{" +
+ "'set-trigger' : {" +
+ "'name' : 'node_lost_trigger3'," +
+ "'event' : 'nodeLost'," +
+ "'waitFor' : '" + waitFor + "s'," +
+ "'enabled' : true," +
+ "'actions' : [" +
+ "{'name':'start','class':'" + StartTriggerAction.class.getName() + "'}," +
+ "{'name':'compute','class':'" + ComputePlanAction.class.getName() + "'}," +
+ "{'name':'execute','class':'" + ExecutePlanAction.class.getName() + "'}," +
+ "{'name':'test','class':'" + FinishTriggerAction.class.getName() + "'}" +
+ "]" +
+ "}}";
+ SolrRequest req = createAutoScalingRequest(SolrRequest.METHOD.POST, setTriggerCommand);
+ NamedList<Object> response = solrClient.request(req);
+ assertEquals(response.get("result").toString(), "success");
+
+ String setListenerCommand = "{" +
+ "'set-listener' : " +
+ "{" +
+ "'name' : 'failures'," +
+ "'trigger' : 'node_lost_trigger3'," +
+ "'stage' : ['FAILED']," +
+ "'class' : '" + TestTriggerListener.class.getName() + "'" +
+ "}" +
+ "}";
+ req = createAutoScalingRequest(SolrRequest.METHOD.POST, setListenerCommand);
+ response = solrClient.request(req);
+ assertEquals(response.get("result").toString(), "success");
+
+
+ // create a collection with 1 replica per node
+ String collectionName = "testNodeLost";
+ CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
+ "conf", NUM_NODES / 5, NUM_NODES / 10);
+ create.setMaxShardsPerNode(5);
+ create.setAutoAddReplicas(false);
+ create.process(solrClient);
+
+ log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 20 * NUM_NODES, TimeUnit.SECONDS,
+ CloudTestUtils.clusterShape(NUM_NODES / 5, NUM_NODES / 10, false, true)) + " ms");
+
+ // start killing nodes
+ int numNodes = NUM_NODES / 5;
+ List<String> nodes = new ArrayList<>(cluster.getLiveNodesSet().get());
+ for (int i = 0; i < numNodes; i++) {
+ // this may also select a node where a replica is moved to, so the total number of
+ // MOVEREPLICA may vary
+ cluster.simRemoveNode(nodes.get(i), false);
+ cluster.getTimeSource().sleep(killDelay);
+ }
+ // wait for the trigger to fire and complete at least once
+ boolean await = triggerFinishedLatch.await(20 * waitFor * 1000 / SPEED, TimeUnit.MILLISECONDS);
+ assertTrue("trigger did not fire within timeout, " +
+ "waitFor=" + waitFor + ", killDelay=" + killDelay + ", minIgnored=" + minIgnored,
+ await);
+ List<SolrInputDocument> systemColl = cluster.simGetSystemCollection();
+ int startedEventPos = -1;
+ for (int i = 0; i < systemColl.size(); i++) {
+ SolrInputDocument d = systemColl.get(i);
+ if (!"node_lost_trigger3".equals(d.getFieldValue("event.source_s"))) {
+ continue;
+ }
+ if ("NODELOST".equals(d.getFieldValue("event.type_s")) &&
+ "STARTED".equals(d.getFieldValue("stage_s"))) {
+ startedEventPos = i;
+ break;
+ }
+ }
+ assertTrue("no STARTED event: " + systemColl + ", " +
+ "waitFor=" + waitFor + ", killDelay=" + killDelay + ", minIgnored=" + minIgnored,
+ startedEventPos > -1);
+ SolrInputDocument startedEvent = systemColl.get(startedEventPos);
+ // we can expect some failures when target node in MOVEREPLICA has been killed
+ // between when the event processing started and the actual moment of MOVEREPLICA execution
+ // wait until started == (finished + failed)
+ TimeOut timeOut = new TimeOut(20 * waitFor * NUM_NODES, TimeUnit.SECONDS, cluster.getTimeSource());
+ while (!timeOut.hasTimedOut()) {
+ if (triggerStartedCount.get() == triggerFinishedCount.get()) {
+ break;
+ }
+ log.debug("started=" + triggerStartedCount.get() + ", finished=" + triggerFinishedCount.get() +
+ ", failed=" + listenerEvents.size());
+ timeOut.sleep(1000);
+ }
+ if (timeOut.hasTimedOut()) {
+ if (triggerStartedCount.get() > triggerFinishedCount.get() + listenerEvents.size()) {
+ fail("did not finish processing all events in time: started=" + triggerStartedCount.get() + ", finished=" + triggerFinishedCount.get() +
+ ", failed=" + listenerEvents.size());
+ }
+ }
+ int ignored = 0;
+ int lastIgnoredPos = startedEventPos;
+ for (int i = startedEventPos + 1; i < systemColl.size(); i++) {
+ SolrInputDocument d = systemColl.get(i);
+ if (!"node_lost_trigger3".equals(d.getFieldValue("event.source_s"))) {
+ continue;
+ }
+ if ("NODELOST".equals(d.getFieldValue("event.type_s"))) {
+ if ("IGNORED".equals(d.getFieldValue("stage_s"))) {
+ ignored++;
+ lastIgnoredPos = i;
+ }
+ }
+ }
+ assertTrue("should be at least " + minIgnored + " IGNORED events, " +
+ "waitFor=" + waitFor + ", killDelay=" + killDelay + ", minIgnored=" + minIgnored,
+ ignored >= minIgnored);
+ // make sure some replicas have been moved
+ assertTrue("no MOVEREPLICA ops? " +
+ "waitFor=" + waitFor + ", killDelay=" + killDelay + ", minIgnored=" + minIgnored,
+ cluster.simGetOpCount("MOVEREPLICA") > 0);
+
+ if (listenerEvents.isEmpty()) {
+ // no failed movements - verify collection shape
+ log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 20 * NUM_NODES, TimeUnit.SECONDS,
+ CloudTestUtils.clusterShape(NUM_NODES / 5, NUM_NODES / 10, false, true)) + " ms");
+ } else {
+ cluster.getTimeSource().sleep(NUM_NODES * 100);
+ }
+
+ int count = 50;
+ SolrInputDocument finishedEvent = null;
+ long lastNumOps = cluster.simGetOpCount("MOVEREPLICA");
+ while (count-- > 0) {
+ cluster.getTimeSource().sleep(waitFor * 10000);
+ long currentNumOps = cluster.simGetOpCount("MOVEREPLICA");
+ if (currentNumOps == lastNumOps) {
+ int size = systemColl.size() - 1;
+ for (int i = size; i > lastIgnoredPos; i--) {
+ SolrInputDocument d = systemColl.get(i);
+ if (!"node_lost_trigger3".equals(d.getFieldValue("event.source_s"))) {
+ continue;
+ }
+ if ("SUCCEEDED".equals(d.getFieldValue("stage_s"))) {
+ finishedEvent = d;
+ break;
+ }
+ }
+ break;
+ } else {
+ lastNumOps = currentNumOps;
+ }
+ }
+
+ assertTrue("did not finish processing changes, " +
+ "waitFor=" + waitFor + ", killDelay=" + killDelay + ", minIgnored=" + minIgnored,
+ finishedEvent != null);
+ long delta = (Long)finishedEvent.getFieldValue("event.time_l") - (Long)startedEvent.getFieldValue("event.time_l");
+ delta = TimeUnit.NANOSECONDS.toMillis(delta);
+ log.info("#### System stabilized after " + delta + " ms");
+ long ops = cluster.simGetOpCount("MOVEREPLICA");
+ long expectedMinOps = 40;
+ if (!listenerEvents.isEmpty()) {
+ expectedMinOps = 20;
+ }
+ assertTrue("unexpected number (" + expectedMinOps + ") of MOVEREPLICA ops: " + ops + ", " +
+ "waitFor=" + waitFor + ", killDelay=" + killDelay + ", minIgnored=" + minIgnored,
+ ops >= expectedMinOps);
+ return delta;
+ }
+
+ @Test
+ //commented 2-Aug-2018 @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 2018-06-18
+ public void testSearchRate() throws Exception {
+ SolrClient solrClient = cluster.simGetSolrClient();
+ String collectionName = "testSearchRate";
+ CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
+ "conf", 2, 10);
+ create.process(solrClient);
+
+ log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 300, TimeUnit.SECONDS,
+ CloudTestUtils.clusterShape(2, 10, false, true)) + " ms");
+
+ // collect the node names for shard1
+ Set<String> nodes = new HashSet<>();
+ cluster.getSimClusterStateProvider().getClusterState().getCollection(collectionName)
+ .getSlice("shard1")
+ .getReplicas()
+ .forEach(r -> nodes.add(r.getNodeName()));
+
+ String metricName = "QUERY./select.requestTimes:1minRate";
+ // simulate search traffic
+ cluster.getSimClusterStateProvider().simSetShardValue(collectionName, "shard1", metricName, 40, false, true);
+
+ // now define the trigger. doing it earlier may cause partial events to be generated (where only some
+ // nodes / replicas exceeded the threshold).
+ String setTriggerCommand = "{" +
+ "'set-trigger' : {" +
+ "'name' : 'search_rate_trigger'," +
+ "'event' : 'searchRate'," +
+ "'waitFor' : '" + waitForSeconds + "s'," +
+ "'aboveRate' : 1.0," +
+ "'aboveNodeRate' : 1.0," +
+ "'enabled' : true," +
+ "'actions' : [" +
+ "{'name':'compute','class':'" + ComputePlanAction.class.getName() + "'}," +
+ "{'name':'execute','class':'" + ExecutePlanAction.class.getName() + "'}," +
+ "{'name':'test','class':'" + FinishTriggerAction.class.getName() + "'}" +
+ "]" +
+ "}}";
+ SolrRequest req = createAutoScalingRequest(SolrRequest.METHOD.POST, setTriggerCommand);
+ NamedList<Object> response = solrClient.request(req);
+ assertEquals(response.get("result").toString(), "success");
+ String setListenerCommand1 = "{" +
+ "'set-listener' : " +
+ "{" +
+ "'name' : 'srt'," +
+ "'trigger' : 'search_rate_trigger'," +
+ "'stage' : ['FAILED','SUCCEEDED']," +
+ "'class' : '" + TestTriggerListener.class.getName() + "'" +
+ "}" +
+ "}";
+ req = createAutoScalingRequest(SolrRequest.METHOD.POST, setListenerCommand1);
+ response = solrClient.request(req);
+ assertEquals(response.get("result").toString(), "success");
+
+
+ boolean await = triggerFinishedLatch.await(waitForSeconds * 20000 / SPEED, TimeUnit.MILLISECONDS);
+ assertTrue("The trigger did not fire at all", await);
+ // wait for listener to capture the SUCCEEDED stage
+ cluster.getTimeSource().sleep(2000);
+ assertEquals(listenerEvents.toString(), 1, listenerEvents.get("srt").size());
+ CapturedEvent ev = listenerEvents.get("srt").get(0);
+ assertEquals(TriggerEventType.SEARCHRATE, ev.event.getEventType());
+ Map<String, Number> m = (Map<String, Number>)ev.event.getProperty(SearchRateTrigger.HOT_NODES);
+ assertNotNull(m);
+ assertEquals(nodes.size(), m.size());
+ assertEquals(nodes, m.keySet());
+ m.forEach((k, v) -> assertEquals(4.0, v.doubleValue(), 0.01));
+ List<TriggerEvent.Op> ops = (List<TriggerEvent.Op>)ev.event.getProperty(TriggerEvent.REQUESTED_OPS);
+ assertNotNull(ops);
+ assertEquals(ops.toString(), 1, ops.size());
+ ops.forEach(op -> {
+ assertEquals(CollectionParams.CollectionAction.ADDREPLICA, op.getAction());
+ assertEquals(1, op.getHints().size());
+ Object o = op.getHints().get(Suggester.Hint.COLL_SHARD);
+ // this may be a pair or a HashSet of pairs with size 1
+ Pair<String, String> hint = null;
+ if (o instanceof Pair) {
+ hint = (Pair<String, String>)o;
+ } else if (o instanceof Set) {
+ assertEquals("unexpected number of hints: " + o, 1, ((Set)o).size());
+ o = ((Set)o).iterator().next();
+ assertTrue("unexpected hint: " + o, o instanceof Pair);
+ hint = (Pair<String, String>)o;
+ } else {
+ fail("unexpected hints: " + o);
+ }
+ assertNotNull(hint);
+ assertEquals(collectionName, hint.first());
+ assertEquals("shard1", hint.second());
+ });
+ }
+}