You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2018/08/27 13:14:49 UTC
[5/6] lucene-solr:master: SOLR-12669: Rename tests that use the
autoscaling simulation framework.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7a3f837a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestLargeCluster.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestLargeCluster.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestLargeCluster.java
deleted file mode 100644
index 93f92ec..0000000
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestLargeCluster.java
+++ /dev/null
@@ -1,727 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.solr.cloud.autoscaling.sim;
-
-import java.lang.invoke.MethodHandles;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Set;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.CountDownLatch;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicInteger;
-
-import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering;
-import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
-import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.solr.client.solrj.SolrClient;
-import org.apache.solr.client.solrj.SolrRequest;
-import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig;
-import org.apache.solr.client.solrj.cloud.SolrCloudManager;
-import org.apache.solr.client.solrj.cloud.autoscaling.Suggester;
-import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventProcessorStage;
-import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventType;
-import org.apache.solr.client.solrj.request.CollectionAdminRequest;
-import org.apache.solr.cloud.CloudTestUtils;
-import org.apache.solr.cloud.autoscaling.ActionContext;
-import org.apache.solr.cloud.autoscaling.ComputePlanAction;
-import org.apache.solr.cloud.autoscaling.ExecutePlanAction;
-import org.apache.solr.cloud.autoscaling.SearchRateTrigger;
-import org.apache.solr.cloud.autoscaling.TriggerActionBase;
-import org.apache.solr.cloud.autoscaling.TriggerEvent;
-import org.apache.solr.cloud.autoscaling.TriggerListenerBase;
-import org.apache.solr.cloud.autoscaling.CapturedEvent;
-import org.apache.solr.cloud.autoscaling.TriggerValidationException;
-import org.apache.solr.common.SolrInputDocument;
-import org.apache.solr.common.cloud.Replica;
-import org.apache.solr.common.params.CollectionAdminParams;
-import org.apache.solr.common.params.CollectionParams;
-import org.apache.solr.common.util.NamedList;
-import org.apache.solr.common.util.Pair;
-import org.apache.solr.common.util.TimeSource;
-import org.apache.solr.core.SolrResourceLoader;
-import org.apache.solr.util.LogLevel;
-import org.apache.solr.util.TimeOut;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Test;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import static org.apache.solr.cloud.autoscaling.AutoScalingHandlerTest.createAutoScalingRequest;
-
-/**
- *
- */
-@TimeoutSuite(millis = 4 * 3600 * 1000)
-@LogLevel("org.apache.solr.cloud.autoscaling=DEBUG")
-@ThreadLeakLingering(linger = 20000) // ComputePlanAction may take significant time to complete
-//05-Jul-2018 @LuceneTestCase.BadApple(bugUrl = "https://issues.apache.org/jira/browse/SOLR-12075")
-public class TestLargeCluster extends SimSolrCloudTestCase {
- private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
- public static final int SPEED = 50;
-
- public static final int NUM_NODES = 100;
-
- static Map<String, List<CapturedEvent>> listenerEvents = new ConcurrentHashMap<>();
- static AtomicInteger triggerFinishedCount = new AtomicInteger();
- static AtomicInteger triggerStartedCount = new AtomicInteger();
- static CountDownLatch triggerStartedLatch;
- static CountDownLatch triggerFinishedLatch;
- static int waitForSeconds;
-
- @BeforeClass
- public static void setupCluster() throws Exception {
- configureCluster(NUM_NODES, TimeSource.get("simTime:" + SPEED));
- }
-
- @Before
- public void setupTest() throws Exception {
- waitForSeconds = 5;
- triggerStartedCount.set(0);
- triggerFinishedCount.set(0);
- triggerStartedLatch = new CountDownLatch(1);
- triggerFinishedLatch = new CountDownLatch(1);
- listenerEvents.clear();
- // disable .scheduled_maintenance and .auto_add_replicas
- String suspendTriggerCommand = "{" +
- "'suspend-trigger' : {'name' : '.scheduled_maintenance'}" +
- "}";
- SolrRequest req = createAutoScalingRequest(SolrRequest.METHOD.POST, suspendTriggerCommand);
- SolrClient solrClient = cluster.simGetSolrClient();
- NamedList<Object> response;
- try {
- response = solrClient.request(req);
- assertEquals(response.get("result").toString(), "success");
- } catch (Exception e) {
- if (!e.toString().contains("No trigger exists")) {
- throw e;
- }
- }
- suspendTriggerCommand = "{" +
- "'suspend-trigger' : {'name' : '.auto_add_replicas'}" +
- "}";
- req = createAutoScalingRequest(SolrRequest.METHOD.POST, suspendTriggerCommand);
- try {
- response = solrClient.request(req);
- assertEquals(response.get("result").toString(), "success");
- } catch (Exception e) {
- if (!e.toString().contains("No trigger exists")) {
- throw e;
- }
- }
-
- // do this in advance if missing
- if (!cluster.getSimClusterStateProvider().simListCollections().contains(CollectionAdminParams.SYSTEM_COLL)) {
- cluster.getSimClusterStateProvider().createSystemCollection();
- CloudTestUtils.waitForState(cluster, CollectionAdminParams.SYSTEM_COLL, 120, TimeUnit.SECONDS,
- CloudTestUtils.clusterShape(1, 1, false, true));
- }
- }
-
- public static class TestTriggerListener extends TriggerListenerBase {
- @Override
- public void configure(SolrResourceLoader loader, SolrCloudManager cloudManager, AutoScalingConfig.TriggerListenerConfig config) throws TriggerValidationException {
- super.configure(loader, cloudManager, config);
- }
-
- @Override
- public synchronized void onEvent(TriggerEvent event, TriggerEventProcessorStage stage, String actionName,
- ActionContext context, Throwable error, String message) {
- List<CapturedEvent> lst = listenerEvents.computeIfAbsent(config.name, s -> new ArrayList<>());
- lst.add(new CapturedEvent(cluster.getTimeSource().getTimeNs(), context, config, stage, actionName, event, message));
- }
- }
-
- public static class FinishTriggerAction extends TriggerActionBase {
- @Override
- public void process(TriggerEvent event, ActionContext context) throws Exception {
- triggerFinishedCount.incrementAndGet();
- triggerFinishedLatch.countDown();
- }
- }
-
- public static class StartTriggerAction extends TriggerActionBase {
- @Override
- public void process(TriggerEvent event, ActionContext context) throws Exception {
- triggerStartedLatch.countDown();
- triggerStartedCount.incrementAndGet();
- }
- }
-
- @Test
- @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 2018-06-18
- public void testBasic() throws Exception {
- SolrClient solrClient = cluster.simGetSolrClient();
- String setTriggerCommand = "{" +
- "'set-trigger' : {" +
- "'name' : 'node_lost_trigger1'," +
- "'event' : 'nodeLost'," +
- "'waitFor' : '" + waitForSeconds + "s'," +
- "'enabled' : true," +
- "'actions' : [" +
- "{'name':'start','class':'" + StartTriggerAction.class.getName() + "'}," +
- "{'name':'compute','class':'" + ComputePlanAction.class.getName() + "'}," +
- "{'name':'execute','class':'" + ExecutePlanAction.class.getName() + "'}," +
- "{'name':'test','class':'" + FinishTriggerAction.class.getName() + "'}" +
- "]" +
- "}}";
- SolrRequest req = createAutoScalingRequest(SolrRequest.METHOD.POST, setTriggerCommand);
- NamedList<Object> response = solrClient.request(req);
- assertEquals(response.get("result").toString(), "success");
-
- String setListenerCommand = "{" +
- "'set-listener' : " +
- "{" +
- "'name' : 'foo'," +
- "'trigger' : 'node_lost_trigger1'," +
- "'stage' : ['STARTED','ABORTED','SUCCEEDED', 'FAILED']," +
- "'beforeAction' : ['compute', 'execute']," +
- "'afterAction' : ['compute', 'execute']," +
- "'class' : '" + TestTriggerListener.class.getName() + "'" +
- "}" +
- "}";
- req = createAutoScalingRequest(SolrRequest.METHOD.POST, setListenerCommand);
- response = solrClient.request(req);
- assertEquals(response.get("result").toString(), "success");
-
- cluster.getTimeSource().sleep(5000);
-
- // pick a few random nodes
- List<String> nodes = new ArrayList<>();
- int limit = 75;
- for (String node : cluster.getClusterStateProvider().getLiveNodes()) {
- nodes.add(node);
- if (nodes.size() > limit) {
- break;
- }
- }
- Collections.shuffle(nodes, random());
- // create collection on these nodes
- String collectionName = "testBasic";
- CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
- "conf", 5, 5, 5, 5);
- create.setMaxShardsPerNode(1);
- create.setAutoAddReplicas(false);
- create.setCreateNodeSet(String.join(",", nodes));
- create.process(solrClient);
-
- log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 30 * nodes.size(), TimeUnit.SECONDS,
- CloudTestUtils.clusterShape(5, 15, false, true)) + "ms");
-
- int KILL_NODES = 8;
- // kill off a number of nodes
- for (int i = 0; i < KILL_NODES; i++) {
- cluster.simRemoveNode(nodes.get(i), false);
- }
- // should fully recover
- log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 90 * KILL_NODES, TimeUnit.SECONDS,
- CloudTestUtils.clusterShape(5, 15, false, true)) + "ms");
-
- log.info("OP COUNTS: " + cluster.simGetOpCounts());
- long moveReplicaOps = cluster.simGetOpCount(CollectionParams.CollectionAction.MOVEREPLICA.name());
-
- // simulate a number of flaky nodes
- int FLAKY_NODES = 10;
- int flakyReplicas = 0;
- for (int cnt = 0; cnt < 10; cnt++) {
- for (int i = KILL_NODES; i < KILL_NODES + FLAKY_NODES; i++) {
- flakyReplicas += cluster.getSimClusterStateProvider().simGetReplicaInfos(nodes.get(i))
- .stream().filter(r -> r.getState().equals(Replica.State.ACTIVE)).count();
- cluster.simRemoveNode(nodes.get(i), false);
- }
- cluster.getTimeSource().sleep(TimeUnit.SECONDS.toMillis(waitForSeconds) * 2);
- for (int i = KILL_NODES; i < KILL_NODES + FLAKY_NODES; i++) {
- final String nodeId = nodes.get(i);
- cluster.submit(() -> cluster.getSimClusterStateProvider().simRestoreNode(nodeId));
- }
- }
-
- // wait until started == finished
- TimeOut timeOut = new TimeOut(20 * waitForSeconds * NUM_NODES, TimeUnit.SECONDS, cluster.getTimeSource());
- while (!timeOut.hasTimedOut()) {
- if (triggerStartedCount.get() == triggerFinishedCount.get()) {
- break;
- }
- timeOut.sleep(1000);
- }
- if (timeOut.hasTimedOut()) {
- fail("did not finish processing all events in time: started=" + triggerStartedCount.get() + ", finished=" + triggerFinishedCount.get());
- }
-
-
- log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 30 * nodes.size(), TimeUnit.SECONDS,
- CloudTestUtils.clusterShape(5, 15, false, true)) + "ms");
- long newMoveReplicaOps = cluster.simGetOpCount(CollectionParams.CollectionAction.MOVEREPLICA.name());
- log.info("==== Flaky replicas: {}. Additional MOVEREPLICA count: {}", flakyReplicas, (newMoveReplicaOps - moveReplicaOps));
- // flaky nodes lead to a number of MOVEREPLICA that is non-zero but lower than the number of flaky replicas
- assertTrue("there should be new MOVERPLICA ops", newMoveReplicaOps - moveReplicaOps > 0);
- assertTrue("there should be less than flakyReplicas=" + flakyReplicas + " MOVEREPLICA ops",
- newMoveReplicaOps - moveReplicaOps < flakyReplicas);
- }
-
- @Test
- @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 28-June-2018
- public void testAddNode() throws Exception {
- SolrClient solrClient = cluster.simGetSolrClient();
- String setTriggerCommand = "{" +
- "'set-trigger' : {" +
- "'name' : 'node_added_trigger2'," +
- "'event' : 'nodeAdded'," +
- "'waitFor' : '" + waitForSeconds + "s'," +
- "'enabled' : true," +
- "'actions' : [" +
- "{'name':'start','class':'" + StartTriggerAction.class.getName() + "'}," +
- "{'name':'compute','class':'" + ComputePlanAction.class.getName() + "'}," +
- "{'name':'execute','class':'" + ExecutePlanAction.class.getName() + "'}," +
- "{'name':'test','class':'" + FinishTriggerAction.class.getName() + "'}" +
- "]" +
- "}}";
- SolrRequest req = createAutoScalingRequest(SolrRequest.METHOD.POST, setTriggerCommand);
- NamedList<Object> response = solrClient.request(req);
- assertEquals(response.get("result").toString(), "success");
-
- // create a collection with more than 1 replica per node
- String collectionName = "testNodeAdded";
- CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
- "conf", NUM_NODES / 10, NUM_NODES / 8, NUM_NODES / 8, NUM_NODES / 8);
- create.setMaxShardsPerNode(5);
- create.setAutoAddReplicas(false);
- create.process(solrClient);
-
- log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 20 * NUM_NODES, TimeUnit.SECONDS,
- CloudTestUtils.clusterShape(NUM_NODES / 10, NUM_NODES / 8 * 3, false, true)) + " ms");
-
- // start adding nodes
- int numAddNode = NUM_NODES / 5;
- List<String> addNodesList = new ArrayList<>(numAddNode);
- for (int i = 0; i < numAddNode; i++) {
- addNodesList.add(cluster.simAddNode());
- cluster.getTimeSource().sleep(5000);
- }
- // wait until at least one event is generated
- boolean await = triggerStartedLatch.await(20000 / SPEED, TimeUnit.MILLISECONDS);
- assertTrue("trigger did not fire", await);
-
- // wait until started == finished
- TimeOut timeOut = new TimeOut(20 * waitForSeconds * NUM_NODES, TimeUnit.SECONDS, cluster.getTimeSource());
- while (!timeOut.hasTimedOut()) {
- if (triggerStartedCount.get() == triggerFinishedCount.get()) {
- break;
- }
- timeOut.sleep(1000);
- }
- if (timeOut.hasTimedOut()) {
- fail("did not finish processing all events in time: started=" + triggerStartedCount.get() + ", finished=" + triggerFinishedCount.get());
- }
-
- List<SolrInputDocument> systemColl = cluster.simGetSystemCollection();
- int startedEventPos = -1;
- for (int i = 0; i < systemColl.size(); i++) {
- SolrInputDocument d = systemColl.get(i);
- if (!"node_added_trigger2".equals(d.getFieldValue("event.source_s"))) {
- continue;
- }
- if ("NODEADDED".equals(d.getFieldValue("event.type_s")) &&
- "STARTED".equals(d.getFieldValue("stage_s"))) {
- startedEventPos = i;
- break;
- }
- }
- assertTrue("no STARTED event", startedEventPos > -1);
- SolrInputDocument startedEvent = systemColl.get(startedEventPos);
- int lastIgnoredPos = startedEventPos;
- // make sure some replicas have been moved
- assertTrue("no MOVEREPLICA ops?", cluster.simGetOpCount("MOVEREPLICA") > 0);
-
- log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 20 * NUM_NODES, TimeUnit.SECONDS,
- CloudTestUtils.clusterShape(NUM_NODES / 10, NUM_NODES / 8 * 3, false, true)) + " ms");
-
- int count = 50;
- SolrInputDocument finishedEvent = null;
- long lastNumOps = cluster.simGetOpCount("MOVEREPLICA");
- while (count-- > 0) {
- cluster.getTimeSource().sleep(10000);
- long currentNumOps = cluster.simGetOpCount("MOVEREPLICA");
- if (currentNumOps == lastNumOps) {
- int size = systemColl.size() - 1;
- for (int i = size; i > lastIgnoredPos; i--) {
- SolrInputDocument d = systemColl.get(i);
- if (!"node_added_trigger2".equals(d.getFieldValue("event.source_s"))) {
- continue;
- }
- if ("SUCCEEDED".equals(d.getFieldValue("stage_s"))) {
- finishedEvent = d;
- break;
- }
- }
- break;
- } else {
- lastNumOps = currentNumOps;
- }
- }
-
- assertTrue("did not finish processing changes", finishedEvent != null);
- long delta = (Long)finishedEvent.getFieldValue("event.time_l") - (Long)startedEvent.getFieldValue("event.time_l");
- log.info("#### System stabilized after " + TimeUnit.NANOSECONDS.toMillis(delta) + " ms");
- assertTrue("unexpected number of MOVEREPLICA ops", cluster.simGetOpCount("MOVEREPLICA") > 1);
- }
-
- @Test
- @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 2018-06-18
- public void testNodeLost() throws Exception {
- doTestNodeLost(waitForSeconds, 5000, 0);
- }
-
- // Renard R5 series - evenly covers a log10 range
- private static final int[] renard5 = new int[] {
- 1, 2, 3, 4, 6,
- 10
- };
- private static final int[] renard5x = new int[] {
- 1, 2, 3, 4, 6,
- 10, 16, 25, 40, 63,
- 100
- };
- private static final int[] renard5xx = new int[] {
- 1, 2, 3, 4, 6,
- 10, 16, 25, 40, 63,
- 100, 158, 251, 398, 631,
- 1000, 1585, 2512, 3981, 6310,
- 10000
- };
- // Renard R10 series
- private static final double[] renard10 = new double[] {
- 1, 1.3, 1.6, 2, 2.5, 3.2, 4, 5, 6.3, 7.9,
- 10
- };
- private static final double[] renard10x = new double[] {
- 1, 1.3, 1.6, 2, 2.5, 3.2, 4, 5, 6.3, 7.9,
- 10, 12.6, 15.8, 20, 25.1, 31.6, 39.8, 50.1, 63.1, 79.4,
- 100
- };
-
- private static final AtomicInteger ZERO = new AtomicInteger(0);
-
- //@Test
- public void benchmarkNodeLost() throws Exception {
- List<String> results = new ArrayList<>();
- for (int wait : renard5x) {
- for (int delay : renard5x) {
- SummaryStatistics totalTime = new SummaryStatistics();
- SummaryStatistics ignoredOurEvents = new SummaryStatistics();
- SummaryStatistics ignoredOtherEvents = new SummaryStatistics();
- SummaryStatistics startedOurEvents = new SummaryStatistics();
- SummaryStatistics startedOtherEvents = new SummaryStatistics();
- for (int i = 0; i < 5; i++) {
- if (cluster != null) {
- cluster.close();
- }
- setupCluster();
- setUp();
- setupTest();
- long total = doTestNodeLost(wait, delay * 1000, 0);
- totalTime.addValue(total);
- // get event counts
- Map<String, Map<String, AtomicInteger>> counts = cluster.simGetEventCounts();
- Map<String, AtomicInteger> map = counts.remove("node_lost_trigger");
- startedOurEvents.addValue(map.getOrDefault("STARTED", ZERO).get());
- ignoredOurEvents.addValue(map.getOrDefault("IGNORED", ZERO).get());
- int otherStarted = 0;
- int otherIgnored = 0;
- for (Map<String, AtomicInteger> m : counts.values()) {
- otherStarted += m.getOrDefault("STARTED", ZERO).get();
- otherIgnored += m.getOrDefault("IGNORED", ZERO).get();
- }
- startedOtherEvents.addValue(otherStarted);
- ignoredOtherEvents.addValue(otherIgnored);
- }
- results.add(String.format(Locale.ROOT, "%d\t%d\t%4.0f\t%4.0f\t%4.0f\t%4.0f\t%6.0f\t%6.0f\t%6.0f\t%6.0f\t%6.0f",
- wait, delay, startedOurEvents.getMean(), ignoredOurEvents.getMean(),
- startedOtherEvents.getMean(), ignoredOtherEvents.getMean(),
- totalTime.getMin(), totalTime.getMax(), totalTime.getMean(), totalTime.getStandardDeviation(), totalTime.getVariance()));
- }
- }
- log.info("===== RESULTS ======");
- log.info("waitFor\tdelay\tSTRT\tIGN\toSTRT\toIGN\tmin\tmax\tmean\tstdev\tvar");
- results.forEach(s -> log.info(s));
- }
-
- private long doTestNodeLost(int waitFor, long killDelay, int minIgnored) throws Exception {
- SolrClient solrClient = cluster.simGetSolrClient();
- String setTriggerCommand = "{" +
- "'set-trigger' : {" +
- "'name' : 'node_lost_trigger3'," +
- "'event' : 'nodeLost'," +
- "'waitFor' : '" + waitFor + "s'," +
- "'enabled' : true," +
- "'actions' : [" +
- "{'name':'start','class':'" + StartTriggerAction.class.getName() + "'}," +
- "{'name':'compute','class':'" + ComputePlanAction.class.getName() + "'}," +
- "{'name':'execute','class':'" + ExecutePlanAction.class.getName() + "'}," +
- "{'name':'test','class':'" + FinishTriggerAction.class.getName() + "'}" +
- "]" +
- "}}";
- SolrRequest req = createAutoScalingRequest(SolrRequest.METHOD.POST, setTriggerCommand);
- NamedList<Object> response = solrClient.request(req);
- assertEquals(response.get("result").toString(), "success");
-
- String setListenerCommand = "{" +
- "'set-listener' : " +
- "{" +
- "'name' : 'failures'," +
- "'trigger' : 'node_lost_trigger3'," +
- "'stage' : ['FAILED']," +
- "'class' : '" + TestTriggerListener.class.getName() + "'" +
- "}" +
- "}";
- req = createAutoScalingRequest(SolrRequest.METHOD.POST, setListenerCommand);
- response = solrClient.request(req);
- assertEquals(response.get("result").toString(), "success");
-
-
- // create a collection with 1 replica per node
- String collectionName = "testNodeLost";
- CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
- "conf", NUM_NODES / 5, NUM_NODES / 10);
- create.setMaxShardsPerNode(5);
- create.setAutoAddReplicas(false);
- create.process(solrClient);
-
- log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 20 * NUM_NODES, TimeUnit.SECONDS,
- CloudTestUtils.clusterShape(NUM_NODES / 5, NUM_NODES / 10, false, true)) + " ms");
-
- // start killing nodes
- int numNodes = NUM_NODES / 5;
- List<String> nodes = new ArrayList<>(cluster.getLiveNodesSet().get());
- for (int i = 0; i < numNodes; i++) {
- // this may also select a node where a replica is moved to, so the total number of
- // MOVEREPLICA may vary
- cluster.simRemoveNode(nodes.get(i), false);
- cluster.getTimeSource().sleep(killDelay);
- }
- // wait for the trigger to fire and complete at least once
- boolean await = triggerFinishedLatch.await(20 * waitFor * 1000 / SPEED, TimeUnit.MILLISECONDS);
- assertTrue("trigger did not fire within timeout, " +
- "waitFor=" + waitFor + ", killDelay=" + killDelay + ", minIgnored=" + minIgnored,
- await);
- List<SolrInputDocument> systemColl = cluster.simGetSystemCollection();
- int startedEventPos = -1;
- for (int i = 0; i < systemColl.size(); i++) {
- SolrInputDocument d = systemColl.get(i);
- if (!"node_lost_trigger3".equals(d.getFieldValue("event.source_s"))) {
- continue;
- }
- if ("NODELOST".equals(d.getFieldValue("event.type_s")) &&
- "STARTED".equals(d.getFieldValue("stage_s"))) {
- startedEventPos = i;
- break;
- }
- }
- assertTrue("no STARTED event: " + systemColl + ", " +
- "waitFor=" + waitFor + ", killDelay=" + killDelay + ", minIgnored=" + minIgnored,
- startedEventPos > -1);
- SolrInputDocument startedEvent = systemColl.get(startedEventPos);
- // we can expect some failures when target node in MOVEREPLICA has been killed
- // between when the event processing started and the actual moment of MOVEREPLICA execution
- // wait until started == (finished + failed)
- TimeOut timeOut = new TimeOut(20 * waitFor * NUM_NODES, TimeUnit.SECONDS, cluster.getTimeSource());
- while (!timeOut.hasTimedOut()) {
- if (triggerStartedCount.get() == triggerFinishedCount.get()) {
- break;
- }
- log.debug("started=" + triggerStartedCount.get() + ", finished=" + triggerFinishedCount.get() +
- ", failed=" + listenerEvents.size());
- timeOut.sleep(1000);
- }
- if (timeOut.hasTimedOut()) {
- if (triggerStartedCount.get() > triggerFinishedCount.get() + listenerEvents.size()) {
- fail("did not finish processing all events in time: started=" + triggerStartedCount.get() + ", finished=" + triggerFinishedCount.get() +
- ", failed=" + listenerEvents.size());
- }
- }
- int ignored = 0;
- int lastIgnoredPos = startedEventPos;
- for (int i = startedEventPos + 1; i < systemColl.size(); i++) {
- SolrInputDocument d = systemColl.get(i);
- if (!"node_lost_trigger3".equals(d.getFieldValue("event.source_s"))) {
- continue;
- }
- if ("NODELOST".equals(d.getFieldValue("event.type_s"))) {
- if ("IGNORED".equals(d.getFieldValue("stage_s"))) {
- ignored++;
- lastIgnoredPos = i;
- }
- }
- }
- assertTrue("should be at least " + minIgnored + " IGNORED events, " +
- "waitFor=" + waitFor + ", killDelay=" + killDelay + ", minIgnored=" + minIgnored,
- ignored >= minIgnored);
- // make sure some replicas have been moved
- assertTrue("no MOVEREPLICA ops? " +
- "waitFor=" + waitFor + ", killDelay=" + killDelay + ", minIgnored=" + minIgnored,
- cluster.simGetOpCount("MOVEREPLICA") > 0);
-
- if (listenerEvents.isEmpty()) {
- // no failed movements - verify collection shape
- log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 20 * NUM_NODES, TimeUnit.SECONDS,
- CloudTestUtils.clusterShape(NUM_NODES / 5, NUM_NODES / 10, false, true)) + " ms");
- } else {
- cluster.getTimeSource().sleep(NUM_NODES * 100);
- }
-
- int count = 50;
- SolrInputDocument finishedEvent = null;
- long lastNumOps = cluster.simGetOpCount("MOVEREPLICA");
- while (count-- > 0) {
- cluster.getTimeSource().sleep(waitFor * 10000);
- long currentNumOps = cluster.simGetOpCount("MOVEREPLICA");
- if (currentNumOps == lastNumOps) {
- int size = systemColl.size() - 1;
- for (int i = size; i > lastIgnoredPos; i--) {
- SolrInputDocument d = systemColl.get(i);
- if (!"node_lost_trigger3".equals(d.getFieldValue("event.source_s"))) {
- continue;
- }
- if ("SUCCEEDED".equals(d.getFieldValue("stage_s"))) {
- finishedEvent = d;
- break;
- }
- }
- break;
- } else {
- lastNumOps = currentNumOps;
- }
- }
-
- assertTrue("did not finish processing changes, " +
- "waitFor=" + waitFor + ", killDelay=" + killDelay + ", minIgnored=" + minIgnored,
- finishedEvent != null);
- long delta = (Long)finishedEvent.getFieldValue("event.time_l") - (Long)startedEvent.getFieldValue("event.time_l");
- delta = TimeUnit.NANOSECONDS.toMillis(delta);
- log.info("#### System stabilized after " + delta + " ms");
- long ops = cluster.simGetOpCount("MOVEREPLICA");
- long expectedMinOps = 40;
- if (!listenerEvents.isEmpty()) {
- expectedMinOps = 20;
- }
- assertTrue("unexpected number (" + expectedMinOps + ") of MOVEREPLICA ops: " + ops + ", " +
- "waitFor=" + waitFor + ", killDelay=" + killDelay + ", minIgnored=" + minIgnored,
- ops >= expectedMinOps);
- return delta;
- }
-
- @Test
- //commented 2-Aug-2018 @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 2018-06-18
- public void testSearchRate() throws Exception {
- SolrClient solrClient = cluster.simGetSolrClient();
- String collectionName = "testSearchRate";
- CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
- "conf", 2, 10);
- create.process(solrClient);
-
- log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 300, TimeUnit.SECONDS,
- CloudTestUtils.clusterShape(2, 10, false, true)) + " ms");
-
- // collect the node names for shard1
- Set<String> nodes = new HashSet<>();
- cluster.getSimClusterStateProvider().getClusterState().getCollection(collectionName)
- .getSlice("shard1")
- .getReplicas()
- .forEach(r -> nodes.add(r.getNodeName()));
-
- String metricName = "QUERY./select.requestTimes:1minRate";
- // simulate search traffic
- cluster.getSimClusterStateProvider().simSetShardValue(collectionName, "shard1", metricName, 40, false, true);
-
- // now define the trigger. doing it earlier may cause partial events to be generated (where only some
- // nodes / replicas exceeded the threshold).
- String setTriggerCommand = "{" +
- "'set-trigger' : {" +
- "'name' : 'search_rate_trigger'," +
- "'event' : 'searchRate'," +
- "'waitFor' : '" + waitForSeconds + "s'," +
- "'aboveRate' : 1.0," +
- "'aboveNodeRate' : 1.0," +
- "'enabled' : true," +
- "'actions' : [" +
- "{'name':'compute','class':'" + ComputePlanAction.class.getName() + "'}," +
- "{'name':'execute','class':'" + ExecutePlanAction.class.getName() + "'}," +
- "{'name':'test','class':'" + FinishTriggerAction.class.getName() + "'}" +
- "]" +
- "}}";
- SolrRequest req = createAutoScalingRequest(SolrRequest.METHOD.POST, setTriggerCommand);
- NamedList<Object> response = solrClient.request(req);
- assertEquals(response.get("result").toString(), "success");
- String setListenerCommand1 = "{" +
- "'set-listener' : " +
- "{" +
- "'name' : 'srt'," +
- "'trigger' : 'search_rate_trigger'," +
- "'stage' : ['FAILED','SUCCEEDED']," +
- "'class' : '" + TestTriggerListener.class.getName() + "'" +
- "}" +
- "}";
- req = createAutoScalingRequest(SolrRequest.METHOD.POST, setListenerCommand1);
- response = solrClient.request(req);
- assertEquals(response.get("result").toString(), "success");
-
-
- boolean await = triggerFinishedLatch.await(waitForSeconds * 20000 / SPEED, TimeUnit.MILLISECONDS);
- assertTrue("The trigger did not fire at all", await);
- // wait for listener to capture the SUCCEEDED stage
- cluster.getTimeSource().sleep(2000);
- assertEquals(listenerEvents.toString(), 1, listenerEvents.get("srt").size());
- CapturedEvent ev = listenerEvents.get("srt").get(0);
- assertEquals(TriggerEventType.SEARCHRATE, ev.event.getEventType());
- Map<String, Number> m = (Map<String, Number>)ev.event.getProperty(SearchRateTrigger.HOT_NODES);
- assertNotNull(m);
- assertEquals(nodes.size(), m.size());
- assertEquals(nodes, m.keySet());
- m.forEach((k, v) -> assertEquals(4.0, v.doubleValue(), 0.01));
- List<TriggerEvent.Op> ops = (List<TriggerEvent.Op>)ev.event.getProperty(TriggerEvent.REQUESTED_OPS);
- assertNotNull(ops);
- assertEquals(ops.toString(), 1, ops.size());
- ops.forEach(op -> {
- assertEquals(CollectionParams.CollectionAction.ADDREPLICA, op.getAction());
- assertEquals(1, op.getHints().size());
- Object o = op.getHints().get(Suggester.Hint.COLL_SHARD);
- // this may be a pair or a HashSet of pairs with size 1
- Pair<String, String> hint = null;
- if (o instanceof Pair) {
- hint = (Pair<String, String>)o;
- } else if (o instanceof Set) {
- assertEquals("unexpected number of hints: " + o, 1, ((Set)o).size());
- o = ((Set)o).iterator().next();
- assertTrue("unexpected hint: " + o, o instanceof Pair);
- hint = (Pair<String, String>)o;
- } else {
- fail("unexpected hints: " + o);
- }
- assertNotNull(hint);
- assertEquals(collectionName, hint.first());
- assertEquals("shard1", hint.second());
- });
- }
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7a3f837a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestNodeAddedTrigger.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestNodeAddedTrigger.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestNodeAddedTrigger.java
deleted file mode 100644
index 04e8c1d..0000000
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestNodeAddedTrigger.java
+++ /dev/null
@@ -1,327 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.solr.cloud.autoscaling.sim;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.atomic.AtomicReference;
-
-import org.apache.solr.client.solrj.cloud.SolrCloudManager;
-import org.apache.solr.cloud.autoscaling.ActionContext;
-import org.apache.solr.cloud.autoscaling.AutoScaling;
-import org.apache.solr.cloud.autoscaling.NodeAddedTrigger;
-import org.apache.solr.cloud.autoscaling.TriggerAction;
-import org.apache.solr.cloud.autoscaling.TriggerEvent;
-import org.apache.solr.cloud.autoscaling.TriggerValidationException;
-import org.apache.solr.common.util.TimeSource;
-import org.apache.solr.core.SolrResourceLoader;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Test;
-
-/**
- * Test for {@link NodeAddedTrigger}
- */
-public class TestNodeAddedTrigger extends SimSolrCloudTestCase {
- private static AtomicBoolean actionConstructorCalled = new AtomicBoolean(false);
- private static AtomicBoolean actionInitCalled = new AtomicBoolean(false);
- private static AtomicBoolean actionCloseCalled = new AtomicBoolean(false);
-
- private AutoScaling.TriggerEventProcessor noFirstRunProcessor = event -> {
- fail("Did not expect the listener to fire on first run!");
- return true;
- };
-
- private static int SPEED = 50;
-
- // currentTimeMillis is not as precise so to avoid false positives while comparing time of fire, we add some delta
- private static final long WAIT_FOR_DELTA_NANOS = TimeUnit.MILLISECONDS.toNanos(2);
-
- private static TimeSource timeSource;
-
- @BeforeClass
- public static void setupCluster() throws Exception {
- configureCluster(1, TimeSource.get("simTime:" + SPEED));
- timeSource = cluster.getTimeSource();
- }
-
- @Before
- public void beforeTest() throws Exception {
- actionConstructorCalled = new AtomicBoolean(false);
- actionInitCalled = new AtomicBoolean(false);
- actionCloseCalled = new AtomicBoolean(false);
- }
-
- @Test
- public void testTrigger() throws Exception {
- long waitForSeconds = 1 + random().nextInt(5);
- Map<String, Object> props = createTriggerProps(waitForSeconds);
-
- try (NodeAddedTrigger trigger = new NodeAddedTrigger("node_added_trigger")) {
- trigger.configure(cluster.getLoader(), cluster, props);
- trigger.init();
- trigger.setProcessor(noFirstRunProcessor);
- trigger.run();
-
- String newNode1 = cluster.simAddNode();
- String newNode2 = cluster.simAddNode();
- AtomicBoolean fired = new AtomicBoolean(false);
- AtomicReference<TriggerEvent> eventRef = new AtomicReference<>();
- trigger.setProcessor(event -> {
- if (fired.compareAndSet(false, true)) {
- eventRef.set(event);
- long currentTimeNanos = timeSource.getTimeNs();
- long eventTimeNanos = event.getEventTime();
- long waitForNanos = TimeUnit.NANOSECONDS.convert(waitForSeconds, TimeUnit.SECONDS) - WAIT_FOR_DELTA_NANOS;
- if (currentTimeNanos - eventTimeNanos <= waitForNanos) {
- fail("NodeAddedListener was fired before the configured waitFor period: currentTimeNanos=" + currentTimeNanos + ", eventTimeNanos=" + eventTimeNanos + ",waitForNanos=" + waitForNanos);
- }
- } else {
- fail("NodeAddedTrigger was fired more than once!");
- }
- return true;
- });
- int counter = 0;
- do {
- trigger.run();
- timeSource.sleep(1000);
- if (counter++ > 10) {
- fail("Newly added node was not discovered by trigger even after 10 seconds");
- }
- } while (!fired.get());
-
- TriggerEvent nodeAddedEvent = eventRef.get();
- assertNotNull(nodeAddedEvent);
- List<String> nodeNames = (List<String>)nodeAddedEvent.getProperty(TriggerEvent.NODE_NAMES);
- assertTrue(nodeNames.contains(newNode1));
- assertTrue(nodeNames.contains(newNode2));
- }
-
- // add a new node but remove it before the waitFor period expires
- // and assert that the trigger doesn't fire at all
- try (NodeAddedTrigger trigger = new NodeAddedTrigger("node_added_trigger")) {
- trigger.configure(cluster.getLoader(), cluster, props);
- trigger.init();
- final long waitTime = 2;
- props.put("waitFor", waitTime);
- trigger.setProcessor(noFirstRunProcessor);
- trigger.run();
-
- String newNode = cluster.simAddNode();
- AtomicBoolean fired = new AtomicBoolean(false);
- trigger.setProcessor(event -> {
- if (fired.compareAndSet(false, true)) {
- long currentTimeNanos = timeSource.getTimeNs();
- long eventTimeNanos = event.getEventTime();
- long waitForNanos = TimeUnit.NANOSECONDS.convert(waitForSeconds, TimeUnit.SECONDS) - WAIT_FOR_DELTA_NANOS;
- if (currentTimeNanos - eventTimeNanos <= waitForNanos) {
- fail("NodeAddedListener was fired before the configured waitFor period: currentTimeNanos=" + currentTimeNanos + ", eventTimeNanos=" + eventTimeNanos + ",waitForNanos=" + waitForNanos);
- }
- } else {
- fail("NodeAddedTrigger was fired more than once!");
- }
- return true;
- });
- trigger.run(); // first run should detect the new node
- cluster.simRemoveNode(newNode, false);
- int counter = 0;
- do {
- trigger.run();
- timeSource.sleep(1000);
- if (counter++ > waitTime + 1) { // run it a little more than the wait time
- break;
- }
- } while (true);
-
- // ensure the event was not fired
- assertFalse(fired.get());
- }
- }
-
- public void testActionLifecycle() throws Exception {
- Map<String, Object> props = createTriggerProps(0);
- List<Map<String, String>> actions = (List<Map<String, String>>) props.get("actions");
- Map<String, String> action = new HashMap<>(2);
- action.put("name", "testActionInit");
- action.put("class", TestNodeAddedTrigger.AssertInitTriggerAction.class.getName());
- actions.add(action);
- try (NodeAddedTrigger trigger = new NodeAddedTrigger("node_added_trigger")) {
- trigger.configure(cluster.getLoader(), cluster, props);
- assertEquals(true, actionConstructorCalled.get());
- assertEquals(false, actionInitCalled.get());
- assertEquals(false, actionCloseCalled.get());
- trigger.init();
- assertEquals(true, actionInitCalled.get());
- assertEquals(false, actionCloseCalled.get());
- }
- assertEquals(true, actionCloseCalled.get());
- }
-
- public static class AssertInitTriggerAction implements TriggerAction {
- public AssertInitTriggerAction() {
- actionConstructorCalled.set(true);
- }
-
- @Override
- public void configure(SolrResourceLoader loader, SolrCloudManager cloudManager, Map<String, Object> properties) throws TriggerValidationException {
-
- }
-
- @Override
- public void init() {
- actionInitCalled.compareAndSet(false, true);
- }
-
- @Override
- public String getName() {
- return "";
- }
-
- @Override
- public void process(TriggerEvent event, ActionContext actionContext) {
-
- }
-
- @Override
- public void close() throws IOException {
- actionCloseCalled.compareAndSet(false, true);
- }
- }
-
- @Test
- public void testListenerAcceptance() throws Exception {
- Map<String, Object> props = createTriggerProps(0);
- try (NodeAddedTrigger trigger = new NodeAddedTrigger("node_added_trigger")) {
- trigger.configure(cluster.getLoader(), cluster, props);
- trigger.init();
- trigger.setProcessor(noFirstRunProcessor);
- trigger.run(); // starts tracking live nodes
-
- String newNode = cluster.simAddNode();
- AtomicInteger callCount = new AtomicInteger(0);
- AtomicBoolean fired = new AtomicBoolean(false);
-
- trigger.setProcessor(event -> {
- if (callCount.incrementAndGet() < 2) {
- return false;
- } else {
- fired.compareAndSet(false, true);
- return true;
- }
- });
-
- trigger.run(); // first run should detect the new node and fire immediately but listener isn't ready
- assertEquals(1, callCount.get());
- assertFalse(fired.get());
- trigger.run(); // second run should again fire
- assertEquals(2, callCount.get());
- assertTrue(fired.get());
- trigger.run(); // should not fire
- assertEquals(2, callCount.get());
- }
- }
-
- @Test
- public void testRestoreState() throws Exception {
- long waitForSeconds = 1 + random().nextInt(5);
- Map<String, Object> props = createTriggerProps(waitForSeconds);
-
- // add a new node but update the trigger before the waitFor period expires
- // and assert that the new trigger still fires
- NodeAddedTrigger trigger = new NodeAddedTrigger("node_added_trigger");
- trigger.configure(cluster.getLoader(), cluster, props);
- trigger.init();
- trigger.setProcessor(noFirstRunProcessor);
- trigger.run();
-
- String newNode = cluster.simAddNode();
- trigger.run(); // this run should detect the new node
- trigger.close(); // close the old trigger
-
- try (NodeAddedTrigger newTrigger = new NodeAddedTrigger("some_different_name")) {
- newTrigger.configure(cluster.getLoader(), cluster, props);
- trigger.init();
- try {
- newTrigger.restoreState(trigger);
- fail("Trigger should only be able to restore state from an old trigger of the same name");
- } catch (AssertionError e) {
- // expected
- }
- }
-
- try (NodeAddedTrigger newTrigger = new NodeAddedTrigger("node_added_trigger")) {
- newTrigger.configure(cluster.getLoader(), cluster, props);
- newTrigger.init();
- AtomicBoolean fired = new AtomicBoolean(false);
- AtomicReference<TriggerEvent> eventRef = new AtomicReference<>();
- newTrigger.setProcessor(event -> {
- if (fired.compareAndSet(false, true)) {
- eventRef.set(event);
- long currentTimeNanos = timeSource.getTimeNs();
- long eventTimeNanos = event.getEventTime();
- long waitForNanos = TimeUnit.NANOSECONDS.convert(waitForSeconds, TimeUnit.SECONDS) - WAIT_FOR_DELTA_NANOS;
- if (currentTimeNanos - eventTimeNanos <= waitForNanos) {
- fail("NodeAddedListener was fired before the configured waitFor period: currentTimeNanos=" + currentTimeNanos + ", eventTimeNanos=" + eventTimeNanos + ",waitForNanos=" + waitForNanos);
- }
- } else {
- fail("NodeAddedTrigger was fired more than once!");
- }
- return true;
- });
- newTrigger.restoreState(trigger); // restore state from the old trigger
- int counter = 0;
- do {
- newTrigger.run();
- timeSource.sleep(1000);
- if (counter++ > 10) {
- fail("Newly added node was not discovered by trigger even after 10 seconds");
- }
- } while (!fired.get());
-
- // ensure the event was fired
- assertTrue(fired.get());
- TriggerEvent nodeAddedEvent = eventRef.get();
- assertNotNull(nodeAddedEvent);
- //TODO assertEquals("", newNode.getNodeName(), nodeAddedEvent.getProperty(NodeAddedTrigger.NodeAddedEvent.NODE_NAME));
- }
- }
-
- private Map<String, Object> createTriggerProps(long waitForSeconds) {
- Map<String, Object> props = new HashMap<>();
- props.put("event", "nodeLost");
- props.put("waitFor", waitForSeconds);
- props.put("enabled", true);
- List<Map<String, String>> actions = new ArrayList<>(3);
- Map<String, String> map = new HashMap<>(2);
- map.put("name", "compute_plan");
- map.put("class", "solr.ComputePlanAction");
- actions.add(map);
- map = new HashMap<>(2);
- map.put("name", "execute_plan");
- map.put("class", "solr.ExecutePlanAction");
- actions.add(map);
- props.put("actions", actions);
- return props;
- }
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7a3f837a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestNodeLostTrigger.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestNodeLostTrigger.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestNodeLostTrigger.java
deleted file mode 100644
index 33c2efa..0000000
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestNodeLostTrigger.java
+++ /dev/null
@@ -1,346 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.solr.cloud.autoscaling.sim;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.atomic.AtomicReference;
-
-import org.apache.solr.client.solrj.cloud.SolrCloudManager;
-import org.apache.solr.cloud.autoscaling.ActionContext;
-import org.apache.solr.cloud.autoscaling.AutoScaling;
-import org.apache.solr.cloud.autoscaling.NodeLostTrigger;
-import org.apache.solr.cloud.autoscaling.TriggerAction;
-import org.apache.solr.cloud.autoscaling.TriggerEvent;
-import org.apache.solr.cloud.autoscaling.TriggerValidationException;
-import org.apache.solr.common.util.TimeSource;
-import org.apache.solr.core.SolrResourceLoader;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Test;
-
-/**
- * Test for {@link NodeLostTrigger}
- */
-public class TestNodeLostTrigger extends SimSolrCloudTestCase {
- private static AtomicBoolean actionConstructorCalled = new AtomicBoolean(false);
- private static AtomicBoolean actionInitCalled = new AtomicBoolean(false);
- private static AtomicBoolean actionCloseCalled = new AtomicBoolean(false);
-
- private AutoScaling.TriggerEventProcessor noFirstRunProcessor = event -> {
- fail("Did not expect the listener to fire on first run!");
- return true;
- };
-
- private static final int SPEED = 50;
- // use the same time source as the trigger
- private static TimeSource timeSource;
- // currentTimeMillis is not as precise so to avoid false positives while comparing time of fire, we add some delta
- private static final long WAIT_FOR_DELTA_NANOS = TimeUnit.MILLISECONDS.toNanos(5);
-
- @BeforeClass
- public static void setupCluster() throws Exception {
- configureCluster(5, TimeSource.get("simTime:" + SPEED));
- timeSource = cluster.getTimeSource();
- }
-
- @Before
- public void beforeTest() throws Exception {
- actionConstructorCalled = new AtomicBoolean(false);
- actionInitCalled = new AtomicBoolean(false);
- actionCloseCalled = new AtomicBoolean(false);
- }
-
- @Test
- public void testTrigger() throws Exception {
- long waitForSeconds = 1 + random().nextInt(5);
- Map<String, Object> props = createTriggerProps(waitForSeconds);
-
- try (NodeLostTrigger trigger = new NodeLostTrigger("node_lost_trigger")) {
- trigger.configure(cluster.getLoader(), cluster, props);
- trigger.setProcessor(noFirstRunProcessor);
- trigger.run();
- Iterator<String> it = cluster.getLiveNodesSet().get().iterator();
- String lostNodeName1 = it.next();
- String lostNodeName2 = it.next();
- cluster.simRemoveNode(lostNodeName1, false);
- cluster.simRemoveNode(lostNodeName2, false);
- timeSource.sleep(1000);
-
- AtomicBoolean fired = new AtomicBoolean(false);
- AtomicReference<TriggerEvent> eventRef = new AtomicReference<>();
- trigger.setProcessor(event -> {
- if (fired.compareAndSet(false, true)) {
- eventRef.set(event);
- long currentTimeNanos = timeSource.getTimeNs();
- long eventTimeNanos = event.getEventTime();
- long waitForNanos = TimeUnit.NANOSECONDS.convert(waitForSeconds, TimeUnit.SECONDS) - WAIT_FOR_DELTA_NANOS;
- if (currentTimeNanos - eventTimeNanos <= waitForNanos) {
- fail("NodeLostListener was fired before the configured waitFor period: currentTimeNanos=" + currentTimeNanos + ", eventTimeNanos=" + eventTimeNanos + ",waitForNanos=" + waitForNanos);
- }
- } else {
- fail("NodeLostListener was fired more than once!");
- }
- return true;
- });
- int counter = 0;
- do {
- trigger.run();
- timeSource.sleep(1000);
- if (counter++ > 10) {
- fail("Lost node was not discovered by trigger even after 10 seconds");
- }
- } while (!fired.get());
-
- TriggerEvent nodeLostEvent = eventRef.get();
- assertNotNull(nodeLostEvent);
- List<String> nodeNames = (List<String>)nodeLostEvent.getProperty(TriggerEvent.NODE_NAMES);
- assertTrue(nodeNames + " doesn't contain " + lostNodeName1, nodeNames.contains(lostNodeName1));
- assertTrue(nodeNames + " doesn't contain " + lostNodeName2, nodeNames.contains(lostNodeName2));
-
- }
-
- // remove a node but add it back before the waitFor period expires
- // and assert that the trigger doesn't fire at all
- try (NodeLostTrigger trigger = new NodeLostTrigger("node_lost_trigger")) {
- trigger.configure(cluster.getLoader(), cluster, props);
- final long waitTime = 2;
- props.put("waitFor", waitTime);
- trigger.setProcessor(noFirstRunProcessor);
- trigger.run();
-
- String lostNode = cluster.getSimClusterStateProvider().simGetRandomNode(random());
- cluster.simRemoveNode(lostNode, false);
- AtomicBoolean fired = new AtomicBoolean(false);
- trigger.setProcessor(event -> {
- if (fired.compareAndSet(false, true)) {
- long currentTimeNanos = timeSource.getTimeNs();
- long eventTimeNanos = event.getEventTime();
- long waitForNanos = TimeUnit.NANOSECONDS.convert(waitTime, TimeUnit.SECONDS) - WAIT_FOR_DELTA_NANOS;
- if (currentTimeNanos - eventTimeNanos <= waitForNanos) {
- fail("NodeLostListener was fired before the configured waitFor period: currentTimeNanos=" + currentTimeNanos + ", eventTimeNanos=" + eventTimeNanos + ",waitForNanos=" + waitForNanos);
- }
- } else {
- fail("NodeLostListener was fired more than once!");
- }
- return true;
- });
- trigger.run(); // first run should detect the lost node
- int counter = 0;
- do {
- if (cluster.getLiveNodesSet().get().size() == 2) {
- break;
- }
- timeSource.sleep(100);
- if (counter++ > 20) {
- fail("Live nodes not updated!");
- }
- } while (true);
- counter = 0;
- cluster.getSimClusterStateProvider().simRestoreNode(lostNode);
- do {
- trigger.run();
- timeSource.sleep(1000);
- if (counter++ > waitTime + 1) { // run it a little more than the wait time
- break;
- }
- } while (true);
-
- // ensure the event was not fired
- assertFalse(fired.get());
- }
- }
-
- public void testActionLifecycle() throws Exception {
- Map<String, Object> props = createTriggerProps(0);
- List<Map<String, String>> actions = (List<Map<String, String>>) props.get("actions");
- Map<String, String> action = new HashMap<>(2);
- action.put("name", "testActionInit");
- action.put("class", AssertInitTriggerAction.class.getName());
- actions.add(action);
- try (NodeLostTrigger trigger = new NodeLostTrigger("node_added_trigger")) {
- trigger.configure(cluster.getLoader(), cluster, props);
- assertEquals(true, actionConstructorCalled.get());
- assertEquals(false, actionInitCalled.get());
- assertEquals(false, actionCloseCalled.get());
- trigger.init();
- assertEquals(true, actionInitCalled.get());
- assertEquals(false, actionCloseCalled.get());
- }
- assertEquals(true, actionCloseCalled.get());
- }
-
- public static class AssertInitTriggerAction implements TriggerAction {
- public AssertInitTriggerAction() {
- actionConstructorCalled.set(true);
- }
-
- @Override
- public void configure(SolrResourceLoader loader, SolrCloudManager cloudManager, Map<String, Object> properties) throws TriggerValidationException {
-
- }
-
- @Override
- public void init() {
- actionInitCalled.compareAndSet(false, true);
- }
-
- @Override
- public String getName() {
- return "";
- }
-
- @Override
- public void process(TriggerEvent event, ActionContext actionContext) {
-
- }
-
- @Override
- public void close() throws IOException {
- actionCloseCalled.compareAndSet(false, true);
- }
- }
-
- @Test
- public void testListenerAcceptance() throws Exception {
- Map<String, Object> props = createTriggerProps(0);
- try (NodeLostTrigger trigger = new NodeLostTrigger("node_added_trigger")) {
- trigger.configure(cluster.getLoader(), cluster, props);
- trigger.setProcessor(noFirstRunProcessor);
-
- String newNode = cluster.simAddNode();
-
- trigger.run(); // starts tracking live nodes
-
- // stop the newly created node
- cluster.simRemoveNode(newNode, false);
-
- AtomicInteger callCount = new AtomicInteger(0);
- AtomicBoolean fired = new AtomicBoolean(false);
-
- trigger.setProcessor(event -> {
- if (callCount.incrementAndGet() < 2) {
- return false;
- } else {
- fired.compareAndSet(false, true);
- return true;
- }
- });
-
- trigger.run(); // first run should detect the lost node and fire immediately but listener isn't ready
- assertEquals(1, callCount.get());
- assertFalse(fired.get());
- trigger.run(); // second run should again fire
- assertEquals(2, callCount.get());
- assertTrue(fired.get());
- trigger.run(); // should not fire
- assertEquals(2, callCount.get());
- }
- }
-
- @Test
- public void testRestoreState() throws Exception {
- long waitForSeconds = 1 + random().nextInt(5);
- Map<String, Object> props = createTriggerProps(waitForSeconds);
-
- String newNode = cluster.simAddNode();
-
- // remove a node but update the trigger before the waitFor period expires
- // and assert that the new trigger still fires
-
- NodeLostTrigger trigger = new NodeLostTrigger("node_lost_trigger");
- trigger.configure(cluster.getLoader(), cluster, props);
- trigger.setProcessor(noFirstRunProcessor);
- trigger.run();
-
- // stop the newly created node
- cluster.simRemoveNode(newNode, false);
-
- trigger.run(); // this run should detect the lost node
- trigger.close(); // close the old trigger
-
- try (NodeLostTrigger newTrigger = new NodeLostTrigger("some_different_name")) {
- newTrigger.configure(cluster.getLoader(), cluster, props);
- try {
- newTrigger.restoreState(trigger);
- fail("Trigger should only be able to restore state from an old trigger of the same name");
- } catch (AssertionError e) {
- // expected
- }
- }
-
- try (NodeLostTrigger newTrigger = new NodeLostTrigger("node_lost_trigger")) {
- newTrigger.configure(cluster.getLoader(), cluster, props);
- AtomicBoolean fired = new AtomicBoolean(false);
- AtomicReference<TriggerEvent> eventRef = new AtomicReference<>();
- newTrigger.setProcessor(event -> {
- if (fired.compareAndSet(false, true)) {
- eventRef.set(event);
- long currentTimeNanos = timeSource.getTimeNs();
- long eventTimeNanos = event.getEventTime();
- long waitForNanos = TimeUnit.NANOSECONDS.convert(waitForSeconds, TimeUnit.SECONDS) - WAIT_FOR_DELTA_NANOS;
- if (currentTimeNanos - eventTimeNanos <= waitForNanos) {
- fail("NodeLostListener was fired before the configured waitFor period: currentTimeNanos=" + currentTimeNanos + ", eventTimeNanos=" + eventTimeNanos + ",waitForNanos=" + waitForNanos);
- }
- } else {
- fail("NodeLostListener was fired more than once!");
- }
- return true;
- });
- newTrigger.restoreState(trigger); // restore state from the old trigger
- int counter = 0;
- do {
- newTrigger.run();
- timeSource.sleep(1000);
- if (counter++ > 10) {
- fail("Lost node was not discovered by trigger even after 10 seconds");
- }
- } while (!fired.get());
-
- TriggerEvent nodeLostEvent = eventRef.get();
- assertNotNull(nodeLostEvent);
- List<String> nodeNames = (List<String>)nodeLostEvent.getProperty(TriggerEvent.NODE_NAMES);
- assertTrue(nodeNames.contains(newNode));
- }
- }
-
- private Map<String, Object> createTriggerProps(long waitForSeconds) {
- Map<String, Object> props = new HashMap<>();
- props.put("event", "nodeLost");
- props.put("waitFor", waitForSeconds);
- props.put("enabled", true);
- List<Map<String, String>> actions = new ArrayList<>(3);
- Map<String, String> map = new HashMap<>(2);
- map.put("name", "compute_plan");
- map.put("class", "solr.ComputePlanAction");
- actions.add(map);
- map = new HashMap<>(2);
- map.put("name", "execute_plan");
- map.put("class", "solr.ExecutePlanAction");
- actions.add(map);
- props.put("actions", actions);
- return props;
- }
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7a3f837a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestPolicyCloud.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestPolicyCloud.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestPolicyCloud.java
deleted file mode 100644
index 3237639..0000000
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestPolicyCloud.java
+++ /dev/null
@@ -1,366 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.cloud.autoscaling.sim;
-
-import java.io.IOException;
-import java.lang.invoke.MethodHandles;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.function.BiConsumer;
-
-import org.apache.lucene.util.Constants;
-import org.apache.solr.client.solrj.SolrClient;
-import org.apache.solr.client.solrj.SolrRequest;
-import org.apache.solr.client.solrj.SolrServerException;
-import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig;
-import org.apache.solr.client.solrj.cloud.autoscaling.Policy;
-import org.apache.solr.client.solrj.cloud.autoscaling.ReplicaInfo;
-import org.apache.solr.client.solrj.cloud.autoscaling.Row;
-import org.apache.solr.client.solrj.cloud.autoscaling.Variable.Type;
-import org.apache.solr.client.solrj.request.CollectionAdminRequest;
-import org.apache.solr.cloud.CloudTestUtils;
-import org.apache.solr.cloud.autoscaling.AutoScalingHandlerTest;
-import org.apache.solr.common.cloud.DocCollection;
-import org.apache.solr.common.cloud.Replica;
-import org.apache.solr.common.cloud.ZkStateReader;
-import org.apache.solr.common.cloud.rule.ImplicitSnitch;
-import org.apache.solr.common.util.NamedList;
-import org.apache.solr.common.util.TimeSource;
-import org.apache.solr.common.util.Utils;
-import org.apache.zookeeper.KeeperException;
-import org.junit.BeforeClass;
-import org.junit.rules.ExpectedException;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import static org.apache.solr.cloud.autoscaling.AutoScalingHandlerTest.createAutoScalingRequest;
-
-public class TestPolicyCloud extends SimSolrCloudTestCase {
-
- private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
- @org.junit.Rule
- public ExpectedException expectedException = ExpectedException.none();
-
- @BeforeClass
- public static void setupCluster() throws Exception {
- configureCluster(5, TimeSource.get("simTime:50"));
- }
-
- public void testDataProviderPerReplicaDetails() throws Exception {
- SolrClient solrClient = cluster.simGetSolrClient();
- CollectionAdminRequest.createCollection("perReplicaDataColl", "conf", 1, 5)
- .process(solrClient);
-
- CloudTestUtils.waitForState(cluster, "Timeout waiting for collection to become active", "perReplicaDataColl",
- CloudTestUtils.clusterShape(1, 5, false, true));
- DocCollection coll = getCollectionState("perReplicaDataColl");
- String autoScaleJson = "{" +
- " 'cluster-preferences': [" +
- " { maximize : freedisk , precision: 50}," +
- " { minimize : cores, precision: 2}" +
- " ]," +
- " 'cluster-policy': [" +
- " { replica : '0' , 'nodeRole': 'overseer'}," +
- " { 'replica': '<2', 'shard': '#ANY', 'node': '#ANY'" +
- " }" +
- " ]," +
- " 'policies': {" +
- " 'policy1': [" +
- " { 'replica': '<2', 'shard': '#EACH', 'node': '#ANY'}," +
- " { 'replica': '<2', 'shard': '#EACH', 'sysprop.rack': 'rack1'}" +
- " ]" +
- " }" +
- "}";
- AutoScalingConfig config = new AutoScalingConfig((Map<String, Object>) Utils.fromJSONString(autoScaleJson));
- Policy.Session session = config.getPolicy().createSession(cluster);
-
- AtomicInteger count = new AtomicInteger(0);
- for (Row row : session.getSortedNodes()) {
- row.collectionVsShardVsReplicas.forEach((c, shardVsReplicas) -> shardVsReplicas.forEach((s, replicaInfos) -> {
- for (ReplicaInfo replicaInfo : replicaInfos) {
- if (replicaInfo.getVariables().containsKey(Type.CORE_IDX.tagName)) count.incrementAndGet();
- }
- }));
- }
- assertTrue(count.get() > 0);
-
- CollectionAdminRequest.deleteCollection("perReplicaDataColl").process(solrClient);
-
- }
-
- public void testCreateCollectionAddReplica() throws Exception {
- SolrClient solrClient = cluster.simGetSolrClient();
- String nodeId = cluster.getSimClusterStateProvider().simGetRandomNode(random());
-
- int port = (Integer)cluster.getSimNodeStateProvider().simGetNodeValue(nodeId, ImplicitSnitch.PORT);
-
- String commands = "{set-policy :{c1 : [{replica:0 , shard:'#EACH', port: '!" + port + "'}]}}";
- solrClient.request(AutoScalingHandlerTest.createAutoScalingRequest(SolrRequest.METHOD.POST, commands));
-
- String collectionName = "testCreateCollectionAddReplica";
- CollectionAdminRequest.createCollection(collectionName, "conf", 1, 1)
- .setPolicy("c1")
- .process(solrClient);
- CloudTestUtils.waitForState(cluster, "Timeout waiting for collection to become active", collectionName,
- CloudTestUtils.clusterShape(1, 1, false, true));
-
- getCollectionState(collectionName).forEachReplica((s, replica) -> assertEquals(nodeId, replica.getNodeName()));
-
- CollectionAdminRequest.addReplicaToShard(collectionName, "shard1").process(solrClient);
- CloudTestUtils.waitForState(cluster, "Timed out waiting to see 2 replicas for collection: " + collectionName,
- collectionName, (liveNodes, collectionState) -> collectionState.getReplicas().size() == 2);
-
- getCollectionState(collectionName).forEachReplica((s, replica) -> assertEquals(nodeId, replica.getNodeName()));
- }
-
- public void testCreateCollectionSplitShard() throws Exception {
- SolrClient solrClient = cluster.simGetSolrClient();
- String firstNode = cluster.getSimClusterStateProvider().simGetRandomNode(random());
- int firstNodePort = (Integer)cluster.getSimNodeStateProvider().simGetNodeValue(firstNode, ImplicitSnitch.PORT);
-
- String secondNode;
- int secondNodePort;
- while (true) {
- secondNode = cluster.getSimClusterStateProvider().simGetRandomNode(random());
- secondNodePort = (Integer)cluster.getSimNodeStateProvider().simGetNodeValue(secondNode, ImplicitSnitch.PORT);
- if (secondNodePort != firstNodePort) break;
- }
-
- String commands = "{set-policy :{c1 : [{replica:1 , shard:'#EACH', port: '" + firstNodePort + "'}, {replica:1, shard:'#EACH', port:'" + secondNodePort + "'}]}}";
- NamedList<Object> response = solrClient.request(AutoScalingHandlerTest.createAutoScalingRequest(SolrRequest.METHOD.POST, commands));
- assertEquals("success", response.get("result"));
-
- String collectionName = "testCreateCollectionSplitShard";
- CollectionAdminRequest.createCollection(collectionName, "conf", 1, 2)
- .setPolicy("c1")
- .process(solrClient);
- CloudTestUtils.waitForState(cluster, "Timeout waiting for collection to become active", collectionName,
- CloudTestUtils.clusterShape(1, 2, false, true));
-
- DocCollection docCollection = getCollectionState(collectionName);
- List<Replica> list = docCollection.getReplicas(firstNode);
- int replicasOnNode1 = list != null ? list.size() : 0;
- list = docCollection.getReplicas(secondNode);
- int replicasOnNode2 = list != null ? list.size() : 0;
-
- assertEquals("Expected exactly one replica of collection on node with port: " + firstNodePort, 1, replicasOnNode1);
- assertEquals("Expected exactly one replica of collection on node with port: " + secondNodePort, 1, replicasOnNode2);
-
- CollectionAdminRequest.splitShard(collectionName).setShardName("shard1").process(solrClient);
-
- CloudTestUtils.waitForState(cluster, "Timed out waiting to see 6 replicas for collection: " + collectionName,
- collectionName, (liveNodes, collectionState) -> collectionState.getReplicas().size() == 6);
-
- docCollection = getCollectionState(collectionName);
- list = docCollection.getReplicas(firstNode);
- replicasOnNode1 = list != null ? list.size() : 0;
- list = docCollection.getReplicas(secondNode);
- replicasOnNode2 = list != null ? list.size() : 0;
-
- assertEquals("Expected exactly three replica of collection on node with port: " + firstNodePort, 3, replicasOnNode1);
- assertEquals("Expected exactly three replica of collection on node with port: " + secondNodePort, 3, replicasOnNode2);
- CollectionAdminRequest.deleteCollection(collectionName).process(solrClient);
-
- }
-
- public void testMetricsTag() throws Exception {
- SolrClient solrClient = cluster.simGetSolrClient();
- String setClusterPolicyCommand = "{" +
- " 'set-cluster-policy': [" +
- " {'cores':'<10', 'node':'#ANY'}," +
- " {'replica':'<2', 'shard': '#EACH', 'node': '#ANY'}," +
- " {'metrics:abc':'overseer', 'replica':0}" +
- " ]" +
- "}";
- SolrRequest req = createAutoScalingRequest(SolrRequest.METHOD.POST, setClusterPolicyCommand);
- try {
- solrClient.request(req);
- fail("expected exception");
- } catch (Exception e) {
- // expected
- assertTrue(e.toString().contains("Invalid metrics: param in"));
- }
- setClusterPolicyCommand = "{" +
- " 'set-cluster-policy': [" +
- " {'cores':'<10', 'node':'#ANY'}," +
- " {'replica':'<2', 'shard': '#EACH', 'node': '#ANY'}," +
- " {'metrics:solr.node:ADMIN./admin/authorization.clientErrors:count':'>58768765', 'replica':0}" +
- " ]" +
- "}";
- req = createAutoScalingRequest(SolrRequest.METHOD.POST, setClusterPolicyCommand);
- solrClient.request(req);
-
- //org.eclipse.jetty.server.handler.DefaultHandler.2xx-responses
- CollectionAdminRequest.createCollection("metricsTest", "conf", 1, 1)
- .process(solrClient);
- CloudTestUtils.waitForState(cluster, "Timeout waiting for collection to become active", "metricsTest",
- CloudTestUtils.clusterShape(1, 1));
-
- DocCollection collection = getCollectionState("metricsTest");
- List<String> tags = Arrays.asList("metrics:solr.node:ADMIN./admin/authorization.clientErrors:count",
- "metrics:solr.jvm:buffers.direct.Count");
- Map<String, Object> val = cluster.getNodeStateProvider().getNodeValues(collection.getReplicas().get(0).getNodeName(), tags);
- for (String tag : tags) {
- assertNotNull( "missing : "+ tag , val.get(tag));
- }
-
-
- }
-
- public void testCreateCollectionAddShardWithReplicaTypeUsingPolicy() throws Exception {
- SolrClient solrClient = cluster.simGetSolrClient();
- List<String> nodes = new ArrayList<>(cluster.getClusterStateProvider().getLiveNodes());
- String nrtNodeName = nodes.get(0);
- int nrtPort = (Integer)cluster.getSimNodeStateProvider().simGetNodeValue(nrtNodeName, ImplicitSnitch.PORT);
-
-
- String pullNodeName = nodes.get(1);
- int pullPort = (Integer)cluster.getSimNodeStateProvider().simGetNodeValue(pullNodeName, ImplicitSnitch.PORT);
-
- String tlogNodeName = nodes.get(1);
- int tlogPort = (Integer)cluster.getSimNodeStateProvider().simGetNodeValue(tlogNodeName, ImplicitSnitch.PORT);
- log.info("NRT {} PULL {} , TLOG {} ", nrtNodeName, pullNodeName, tlogNodeName);
-
- String commands = "{set-cluster-policy :[" +
- "{replica:0 , shard:'#EACH', type: NRT, port: '!" + nrtPort + "'}" +
- "{replica:0 , shard:'#EACH', type: PULL, port: '!" + pullPort + "'}" +
- "{replica:0 , shard:'#EACH', type: TLOG, port: '!" + tlogPort + "'}" +
- "]}";
-
-
- solrClient.request(AutoScalingHandlerTest.createAutoScalingRequest(SolrRequest.METHOD.POST, commands));
- Map<String, Object> json = Utils.getJson(cluster.getDistribStateManager(), ZkStateReader.SOLR_AUTOSCALING_CONF_PATH);
- assertEquals("full json:" + Utils.toJSONString(json), "!" + nrtPort,
- Utils.getObjectByPath(json, true, "cluster-policy[0]/port"));
- assertEquals("full json:" + Utils.toJSONString(json), "!" + pullPort,
- Utils.getObjectByPath(json, true, "cluster-policy[1]/port"));
- assertEquals("full json:" + Utils.toJSONString(json), "!" + tlogPort,
- Utils.getObjectByPath(json, true, "cluster-policy[2]/port"));
-
- CollectionAdminRequest.createCollectionWithImplicitRouter("policiesTest", "conf", "s1", 1, 1, 1)
- .process(solrClient);
- CloudTestUtils.waitForState(cluster, "Timeout waiting for collection to become active", "policiesTest",
- CloudTestUtils.clusterShape(1, 3, false, true));
-
- DocCollection coll = getCollectionState("policiesTest");
-
-
- BiConsumer<String, Replica> verifyReplicas = (s, replica) -> {
- switch (replica.getType()) {
- case NRT: {
- assertTrue("NRT replica should be in " + nrtNodeName, replica.getNodeName().equals(nrtNodeName));
- break;
- }
- case TLOG: {
- assertTrue("TLOG replica should be in " + tlogNodeName, replica.getNodeName().equals(tlogNodeName));
- break;
- }
- case PULL: {
- assertTrue("PULL replica should be in " + pullNodeName, replica.getNodeName().equals(pullNodeName));
- break;
- }
- }
-
- };
- coll.forEachReplica(verifyReplicas);
-
- CollectionAdminRequest.createShard("policiesTest", "s3").
- process(solrClient);
- coll = getCollectionState("policiesTest");
- assertEquals(3, coll.getSlice("s3").getReplicas().size());
- coll.forEachReplica(verifyReplicas);
- }
-
- public void testCreateCollectionAddShardUsingPolicy() throws Exception {
- SolrClient solrClient = cluster.simGetSolrClient();
- String nodeId = cluster.getSimClusterStateProvider().simGetRandomNode(random());
- int port = (Integer)cluster.getSimNodeStateProvider().simGetNodeValue(nodeId, ImplicitSnitch.PORT);
-
- String commands = "{set-policy :{c1 : [{replica:1 , shard:'#EACH', port: '" + port + "'}]}}";
- solrClient.request(AutoScalingHandlerTest.createAutoScalingRequest(SolrRequest.METHOD.POST, commands));
- Map<String, Object> json = Utils.getJson(cluster.getDistribStateManager(), ZkStateReader.SOLR_AUTOSCALING_CONF_PATH);
- assertEquals("full json:"+ Utils.toJSONString(json) , "#EACH",
- Utils.getObjectByPath(json, true, "/policies/c1[0]/shard"));
- CollectionAdminRequest.createCollectionWithImplicitRouter("policiesTest", "conf", "s1,s2", 1)
- .setPolicy("c1")
- .process(solrClient);
- CloudTestUtils.waitForState(cluster, "Timeout waiting for collection to become active", "policiesTest",
- CloudTestUtils.clusterShape(2, 1));
-
- DocCollection coll = getCollectionState("policiesTest");
- assertEquals("c1", coll.getPolicyName());
- assertEquals(2,coll.getReplicas().size());
- coll.forEachReplica((s, replica) -> assertEquals(nodeId, replica.getNodeName()));
- CollectionAdminRequest.createShard("policiesTest", "s3").process(solrClient);
- CloudTestUtils.waitForState(cluster, "Timeout waiting for collection to become active", "policiesTest",
- CloudTestUtils.clusterShape(3, 1));
-
- coll = getCollectionState("policiesTest");
- assertEquals(1, coll.getSlice("s3").getReplicas().size());
- coll.getSlice("s3").forEach(replica -> assertEquals(nodeId, replica.getNodeName()));
- }
-
- public void testDataProvider() throws IOException, SolrServerException, KeeperException, InterruptedException {
- SolrClient solrClient = cluster.simGetSolrClient();
- CollectionAdminRequest.createCollectionWithImplicitRouter("policiesTest", "conf", "shard1", 2)
- .process(solrClient);
- CloudTestUtils.waitForState(cluster, "Timeout waiting for collection to become active", "policiesTest",
- CloudTestUtils.clusterShape(1, 2, false, true));
- DocCollection rulesCollection = getCollectionState("policiesTest");
-
- Map<String, Object> val = cluster.getNodeStateProvider().getNodeValues(rulesCollection.getReplicas().get(0).getNodeName(), Arrays.asList(
- "freedisk",
- "cores",
- "heapUsage",
- "sysLoadAvg"));
- assertNotNull(val.get("freedisk"));
- assertNotNull(val.get("heapUsage"));
- assertNotNull(val.get("sysLoadAvg"));
- assertTrue(((Number) val.get("cores")).intValue() > 0);
- assertTrue("freedisk value is " + ((Number) val.get("freedisk")).doubleValue(), Double.compare(((Number) val.get("freedisk")).doubleValue(), 0.0d) > 0);
- assertTrue("heapUsage value is " + ((Number) val.get("heapUsage")).doubleValue(), Double.compare(((Number) val.get("heapUsage")).doubleValue(), 0.0d) > 0);
- if (!Constants.WINDOWS) {
- // the system load average metrics is not available on windows platform
- assertTrue("sysLoadAvg value is " + ((Number) val.get("sysLoadAvg")).doubleValue(), Double.compare(((Number) val.get("sysLoadAvg")).doubleValue(), 0.0d) > 0);
- }
- // simulator doesn't have Overseer, so just pick a random node
- String overseerNode = cluster.getSimClusterStateProvider().simGetRandomNode(random());
- solrClient.request(CollectionAdminRequest.addRole(overseerNode, "overseer"));
- for (int i = 0; i < 10; i++) {
- Map<String, Object> data = Utils.getJson(cluster.getDistribStateManager(), ZkStateReader.ROLES);
- if (i >= 9 && data.isEmpty()) {
- throw new RuntimeException("NO overseer node created");
- }
- cluster.getTimeSource().sleep(100);
- }
- val = cluster.getNodeStateProvider().getNodeValues(overseerNode, Arrays.asList(
- "nodeRole",
- "ip_1", "ip_2", "ip_3", "ip_4",
- "sysprop.java.version",
- "sysprop.java.vendor"));
- assertEquals("overseer", val.get("nodeRole"));
- assertNotNull(val.get("ip_1"));
- assertNotNull(val.get("ip_2"));
- assertNotNull(val.get("ip_3"));
- assertNotNull(val.get("ip_4"));
- assertNotNull(val.get("sysprop.java.version"));
- assertNotNull(val.get("sysprop.java.vendor"));
- }
-}