You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2020/10/20 13:20:21 UTC
[lucene-solr] branch branch_8x updated: SOLR-14948: Add more
optional debugging info and a unit test.
This is an automated email from the ASF dual-hosted git repository.
ab pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/branch_8x by this push:
new 13fda69 SOLR-14948: Add more optional debugging info and a unit test.
13fda69 is described below
commit 13fda69305ed342a19d03ac99703dcf1d19e42f6
Author: Andrzej Bialecki <ab...@apache.org>
AuthorDate: Tue Oct 20 15:19:52 2020 +0200
SOLR-14948: Add more optional debugging info and a unit test.
---
.../solr/cloud/autoscaling/ComputePlanAction.java | 35 +++++++++--
.../cloud/autoscaling/ComputePlanActionTest.java | 71 +++++++++++++++++-----
2 files changed, 85 insertions(+), 21 deletions(-)
diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/ComputePlanAction.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/ComputePlanAction.java
index 1f6728a..b76ddb0 100644
--- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/ComputePlanAction.java
+++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/ComputePlanAction.java
@@ -53,6 +53,8 @@ import static org.apache.solr.cloud.autoscaling.TriggerEvent.NODE_NAMES;
public class ComputePlanAction extends TriggerActionBase {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+ public static final String DIAGNOSTICS = "__compute_diag__";
+
// accept all collections by default
Predicate<String> collectionsPredicate = s -> true;
@@ -132,6 +134,9 @@ public class ComputePlanAction extends TriggerActionBase {
log.debug("-- adjusting limit due to explicitly requested number of ops={}", requestedOperations);
opLimit = requestedOperations;
}
+ addDiagnostics(event, "maxOperations", maxOperations);
+ addDiagnostics(event, "requestedOperations", requestedOperations);
+ addDiagnostics(event, "opLimit", opLimit);
do {
// computing changes in large clusters may take a long time
if (Thread.currentThread().isInterrupted()) {
@@ -158,6 +163,7 @@ public class ComputePlanAction extends TriggerActionBase {
//uncomment the following to log zero operations
// PolicyHelper.logState(cloudManager, initialSuggester);
log.debug("-- no more operations suggested, stopping after {} ops...", (opCount - 1));
+ addDiagnostics(event, "noSuggestionsStopAfter", (opCount - 1));
break;
} else {
log.info("Computed plan empty, remained {} requested ops to try.", opCount - opLimit);
@@ -175,10 +181,11 @@ public class ComputePlanAction extends TriggerActionBase {
operations.add(operation);
return operations;
});
- if (opCount > opLimit) {
+ if (opCount >= opLimit) {
log.debug("-- reached limit of maxOps={}, stopping.", opLimit);
+ addDiagnostics(event, "opLimitReached", true);
}
- } while (opCount <= opLimit);
+ } while (opCount < opLimit);
} finally {
releasePolicySession(sessionWrapper, session);
}
@@ -194,6 +201,14 @@ public class ComputePlanAction extends TriggerActionBase {
}
+ private void addDiagnostics(TriggerEvent event, String key, Object value) {
+ if (log.isDebugEnabled()) {
+ Map<String, Object> diag = (Map<String, Object>) event.getProperties()
+ .computeIfAbsent(DIAGNOSTICS, n -> new HashMap<>());
+ diag.put(key, value);
+ }
+ }
+
protected int getMaxNumOps(TriggerEvent event, AutoScalingConfig autoScalingConfig, ClusterState clusterState) {
// estimate a maximum default limit that should be sufficient for most purposes:
// number of nodes * total number of replicas * 3
@@ -210,6 +225,7 @@ public class ComputePlanAction extends TriggerActionBase {
totalRF.addAndGet(rf * coll.getSlices().size());
});
int totalMax = clusterState.getLiveNodes().size() * totalRF.get() * 3;
+ addDiagnostics(event, "estimatedMaxOps", totalMax);
int maxOp = ((Number) autoScalingConfig.getProperties().getOrDefault(AutoScalingParams.MAX_COMPUTE_OPERATIONS, totalMax)).intValue();
Object o = event.getProperty(AutoScalingParams.MAX_COMPUTE_OPERATIONS, maxOp);
if (o != null) {
@@ -219,8 +235,11 @@ public class ComputePlanAction extends TriggerActionBase {
log.warn("Invalid '{}' event property: {}, using default {}", AutoScalingParams.MAX_COMPUTE_OPERATIONS, o, maxOp);
}
}
- // try at least one operation
- if (maxOp < 1) {
+ if (maxOp < 0) {
+ // unlimited
+ maxOp = Integer.MAX_VALUE;
+ } else if (maxOp < 1) {
+ // try at least one operation
log.debug("-- estimated maxOp={}, resetting to 1...", maxOp);
maxOp = 1;
}
@@ -291,13 +310,17 @@ public class ComputePlanAction extends TriggerActionBase {
case MOVEREPLICA:
Suggester s = session.getSuggester(action)
.hint(Suggester.Hint.SRC_NODE, event.getProperty(NODE_NAMES));
- if (applyCollectionHints(cloudManager, s) == 0) return NoneSuggester.get(session);
+ if (applyCollectionHints(cloudManager, s) == 0) {
+ addDiagnostics(event, "noRelevantCollections", true);
+ return NoneSuggester.get(session);
+ }
return s;
case DELETENODE:
int start = (Integer)event.getProperty(START, 0);
@SuppressWarnings({"unchecked"})
List<String> srcNodes = (List<String>) event.getProperty(NODE_NAMES);
if (srcNodes.isEmpty() || start >= srcNodes.size()) {
+ addDiagnostics(event, "noSourceNodes", true);
return NoneSuggester.get(session);
}
String sourceNode = srcNodes.get(start);
@@ -305,6 +328,7 @@ public class ComputePlanAction extends TriggerActionBase {
.hint(Suggester.Hint.SRC_NODE, event.getProperty(NODE_NAMES));
if (applyCollectionHints(cloudManager, s) == 0) {
log.debug("-- no relevant collections on {}, no operations computed.", srcNodes);
+ addDiagnostics(event, "noRelevantCollections", true);
return NoneSuggester.get(session);
}
s.hint(Suggester.Hint.SRC_NODE, Collections.singletonList(sourceNode));
@@ -359,6 +383,7 @@ public class ComputePlanAction extends TriggerActionBase {
}
});
log.debug("-- NODE_ADDED: ADDREPLICA suggester configured with {} collection/shard hints.", collShards.size());
+ addDiagnostics(event, "relevantCollShard", collShards);
suggester.hint(Suggester.Hint.COLL_SHARD, collShards);
suggester.hint(Suggester.Hint.REPLICATYPE, replicaType);
break;
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/ComputePlanActionTest.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/ComputePlanActionTest.java
index 2526292..e7a317d 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/ComputePlanActionTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/ComputePlanActionTest.java
@@ -583,7 +583,7 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
int numShards = 1;
int numCollections = 5;
- nodeAddedTriggerWithAddReplicaPreferredOp(collectionNamePrefix, numShards, numCollections);
+ nodeAddedTriggerWithAddReplicaPreferredOp(collectionNamePrefix, numShards, numCollections, null);
}
@Test
@@ -592,7 +592,7 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
int numShards = 1;
int numCollections = 5;
- nodeAddedTriggerWithAddReplicaPreferredOpReplicaType(collectionNamePrefix, numShards, numCollections);
+ nodeAddedTriggerWithAddReplicaPreferredOpReplicaType(collectionNamePrefix, numShards, numCollections, null);
}
@Test
@@ -602,9 +602,19 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
int numShards = 2;
int numCollections = 5;
- nodeAddedTriggerWithAddReplicaPreferredOp(collectionNamePrefix, numShards, numCollections);
+ nodeAddedTriggerWithAddReplicaPreferredOp(collectionNamePrefix, numShards, numCollections, null);
}
- private void nodeAddedTriggerWithAddReplicaPreferredOp(String collectionNamePrefix, int numShards, int numCollections) throws Exception {
+
+ @Test
+ public void testNodeAddedTriggerWithAddReplicaPreferredOp_2Shard_OpLimit() throws Exception {
+ String collectionNamePrefix = "testNodeAddedTriggerWithAddReplicaPreferredOp_2Shard";
+ int numShards = 2;
+ int numCollections = 5;
+
+ nodeAddedTriggerWithAddReplicaPreferredOp(collectionNamePrefix, numShards, numCollections, 1);
+ }
+
+ private void nodeAddedTriggerWithAddReplicaPreferredOp(String collectionNamePrefix, int numShards, int numCollections, Integer maxOps) throws Exception {
String setTriggerCommand = "{" +
"'set-trigger' : {" +
"'name' : 'node_added_trigger'," +
@@ -624,10 +634,10 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
" ]" +
"}";
- nodeAddedTriggerWithAddReplicaPreferredOp(collectionNamePrefix, numShards, numCollections, setTriggerCommand, setClusterPolicyCommand);
+ nodeAddedTriggerWithAddReplicaPreferredOp(collectionNamePrefix, numShards, numCollections, setTriggerCommand, setClusterPolicyCommand, maxOps);
}
- private void nodeAddedTriggerWithAddReplicaPreferredOpReplicaType(String collectionNamePrefix, int numShards, int numCollections) throws Exception {
+ private void nodeAddedTriggerWithAddReplicaPreferredOpReplicaType(String collectionNamePrefix, int numShards, int numCollections, Integer maxOps) throws Exception {
String setTriggerCommand = "{" +
"'set-trigger' : {" +
"'name' : 'node_added_trigger'," +
@@ -648,13 +658,15 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
" ]" +
"}";
- nodeAddedTriggerWithAddReplicaPreferredOp(collectionNamePrefix, numShards, numCollections, setTriggerCommand, setClusterPolicyCommand, 0, 1, 0);
+ nodeAddedTriggerWithAddReplicaPreferredOp(collectionNamePrefix, numShards, numCollections, setTriggerCommand, setClusterPolicyCommand, maxOps, 0, 1, 0);
}
- private void nodeAddedTriggerWithAddReplicaPreferredOp(String collectionNamePrefix, int numShards, int numCollections, String setTriggerCommand, String setClusterPolicyCommand) throws Exception {
- nodeAddedTriggerWithAddReplicaPreferredOp(collectionNamePrefix, numShards, numCollections, setTriggerCommand, setClusterPolicyCommand, 1, null, null);
+ private void nodeAddedTriggerWithAddReplicaPreferredOp(String collectionNamePrefix, int numShards, int numCollections, String setTriggerCommand, String setClusterPolicyCommand, Integer maxOps) throws Exception {
+ nodeAddedTriggerWithAddReplicaPreferredOp(collectionNamePrefix, numShards, numCollections, setTriggerCommand, setClusterPolicyCommand, maxOps, 1, null, null);
}
- private void nodeAddedTriggerWithAddReplicaPreferredOp(String collectionNamePrefix, int numShards, int numCollections, String setTriggerCommand, String setClusterPolicyCommand, Integer nNrtReplicas, Integer nTlogReplicas, Integer nPullReplicas) throws Exception {
+ private void nodeAddedTriggerWithAddReplicaPreferredOp(String collectionNamePrefix, int numShards, int numCollections, String setTriggerCommand, String setClusterPolicyCommand,
+ Integer maxOps,
+ Integer nNrtReplicas, Integer nTlogReplicas, Integer nPullReplicas) throws Exception {
CloudSolrClient solrClient = cluster.getSolrClient();
@SuppressWarnings({"rawtypes"})
SolrRequest req = AutoScalingRequest.create(SolrRequest.METHOD.POST, setTriggerCommand);
@@ -665,6 +677,16 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
response = solrClient.request(req);
assertEquals(response.get("result").toString(), "success");
+ if (maxOps != null) {
+ String setMaxOpsCommand = "{" +
+ " 'set-properties': {" +
+ " 'maxComputeOperations': " + maxOps +
+ " }" +
+ "}";
+ req = AutoScalingRequest.create(SolrRequest.METHOD.POST, setMaxOpsCommand);
+ response = solrClient.request(req);
+ assertEquals(response.get("result").toString(), "success");
+ }
CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionNamePrefix + "_0",
"conf", numShards, nNrtReplicas, nTlogReplicas, nPullReplicas).setMaxShardsPerNode(2);
@@ -683,7 +705,13 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
@SuppressWarnings({"rawtypes"})
List operations = (List) actionContext.get("operations");
assertNotNull(operations);
- assertEquals(numShards, operations.size());
+ int numExpectedOps;
+ if (maxOps != null && maxOps > 0) {
+ numExpectedOps = maxOps;
+ } else {
+ numExpectedOps = numShards;
+ }
+ assertEquals(numExpectedOps, operations.size());
Set<String> affectedShards = new HashSet<>(2);
for (Object operation : operations) {
assertTrue(operation instanceof CollectionAdminRequest.AddReplica);
@@ -692,7 +720,7 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
assertEquals(collectionNamePrefix + "_0", addReplica.getCollection());
affectedShards.add(addReplica.getShard());
}
- assertEquals(numShards, affectedShards.size());
+ assertEquals(numExpectedOps, affectedShards.size());
for (int i = 1; i < numCollections; i++) {
create = CollectionAdminRequest.createCollection(collectionNamePrefix + "_" + i,
@@ -712,7 +740,12 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
actionContext = actionContextPropsRef.get();
operations = (List) actionContext.get("operations");
assertNotNull(operations);
- assertEquals(numCollections * numShards, operations.size());
+ if (maxOps != null && maxOps > 0) {
+ numExpectedOps = maxOps;
+ } else {
+ numExpectedOps = numCollections * numShards;
+ }
+ assertEquals(numExpectedOps, operations.size());
Set<String> affectedCollections = new HashSet<>(numCollections);
affectedShards = new HashSet<>(numShards);
Set<Pair<String, String>> affectedCollShards = new HashSet<>(numCollections * numShards);
@@ -724,9 +757,15 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
affectedShards.add(addReplica.getShard());
affectedCollShards.add(new Pair<>(addReplica.getCollection(), addReplica.getShard()));
}
- assertEquals(numCollections, affectedCollections.size());
- assertEquals(numShards, affectedShards.size());
- assertEquals(numCollections * numShards, affectedCollShards.size());
+ if (maxOps != null && maxOps > 0) {
+ assertEquals(numExpectedOps, affectedCollections.size());
+ assertEquals(numExpectedOps, affectedShards.size());
+ assertEquals(numExpectedOps, affectedCollShards.size());
+ } else {
+ assertEquals(numCollections, affectedCollections.size());
+ assertEquals(numShards, affectedShards.size());
+ assertEquals(numCollections * numShards, affectedCollShards.size());
+ }
}
@Test