You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2020/10/20 13:20:21 UTC

[lucene-solr] branch branch_8x updated: SOLR-14948: Add more optional debugging info and a unit test.

This is an automated email from the ASF dual-hosted git repository.

ab pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/branch_8x by this push:
     new 13fda69  SOLR-14948: Add more optional debugging info and a unit test.
13fda69 is described below

commit 13fda69305ed342a19d03ac99703dcf1d19e42f6
Author: Andrzej Bialecki <ab...@apache.org>
AuthorDate: Tue Oct 20 15:19:52 2020 +0200

    SOLR-14948: Add more optional debugging info and a unit test.
---
 .../solr/cloud/autoscaling/ComputePlanAction.java  | 35 +++++++++--
 .../cloud/autoscaling/ComputePlanActionTest.java   | 71 +++++++++++++++++-----
 2 files changed, 85 insertions(+), 21 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/ComputePlanAction.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/ComputePlanAction.java
index 1f6728a..b76ddb0 100644
--- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/ComputePlanAction.java
+++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/ComputePlanAction.java
@@ -53,6 +53,8 @@ import static org.apache.solr.cloud.autoscaling.TriggerEvent.NODE_NAMES;
 public class ComputePlanAction extends TriggerActionBase {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
+  public static final String DIAGNOSTICS = "__compute_diag__";
+
   // accept all collections by default
   Predicate<String> collectionsPredicate = s -> true;
 
@@ -132,6 +134,9 @@ public class ComputePlanAction extends TriggerActionBase {
           log.debug("-- adjusting limit due to explicitly requested number of ops={}", requestedOperations);
           opLimit = requestedOperations;
         }
+        addDiagnostics(event, "maxOperations", maxOperations);
+        addDiagnostics(event, "requestedOperations", requestedOperations);
+        addDiagnostics(event, "opLimit", opLimit);
         do {
           // computing changes in large clusters may take a long time
           if (Thread.currentThread().isInterrupted()) {
@@ -158,6 +163,7 @@ public class ComputePlanAction extends TriggerActionBase {
               //uncomment the following to log zero operations
 //              PolicyHelper.logState(cloudManager, initialSuggester);
               log.debug("-- no more operations suggested, stopping after {} ops...", (opCount - 1));
+              addDiagnostics(event, "noSuggestionsStopAfter", (opCount - 1));
               break;
             } else {
               log.info("Computed plan empty, remained {} requested ops to try.", opCount - opLimit);
@@ -175,10 +181,11 @@ public class ComputePlanAction extends TriggerActionBase {
             operations.add(operation);
             return operations;
           });
-          if (opCount > opLimit) {
+          if (opCount >= opLimit) {
             log.debug("-- reached limit of maxOps={}, stopping.", opLimit);
+            addDiagnostics(event, "opLimitReached", true);
           }
-        } while (opCount <= opLimit);
+        } while (opCount < opLimit);
       } finally {
         releasePolicySession(sessionWrapper, session);
       }
@@ -194,6 +201,14 @@ public class ComputePlanAction extends TriggerActionBase {
 
   }
 
+  private void addDiagnostics(TriggerEvent event, String key, Object value) {
+    if (log.isDebugEnabled()) {
+      Map<String, Object> diag = (Map<String, Object>) event.getProperties()
+          .computeIfAbsent(DIAGNOSTICS, n -> new HashMap<>());
+      diag.put(key, value);
+    }
+  }
+
   protected int getMaxNumOps(TriggerEvent event, AutoScalingConfig autoScalingConfig, ClusterState clusterState) {
     // estimate a maximum default limit that should be sufficient for most purposes:
     // number of nodes * total number of replicas * 3
@@ -210,6 +225,7 @@ public class ComputePlanAction extends TriggerActionBase {
       totalRF.addAndGet(rf * coll.getSlices().size());
     });
     int totalMax = clusterState.getLiveNodes().size() * totalRF.get() * 3;
+    addDiagnostics(event, "estimatedMaxOps", totalMax);
     int maxOp = ((Number) autoScalingConfig.getProperties().getOrDefault(AutoScalingParams.MAX_COMPUTE_OPERATIONS, totalMax)).intValue();
     Object o = event.getProperty(AutoScalingParams.MAX_COMPUTE_OPERATIONS, maxOp);
     if (o != null) {
@@ -219,8 +235,11 @@ public class ComputePlanAction extends TriggerActionBase {
         log.warn("Invalid '{}' event property: {}, using default {}", AutoScalingParams.MAX_COMPUTE_OPERATIONS, o, maxOp);
       }
     }
-    // try at least one operation
-    if (maxOp < 1) {
+    if (maxOp < 0) {
+      // unlimited
+      maxOp = Integer.MAX_VALUE;
+    } else if (maxOp < 1) {
+      // try at least one operation
       log.debug("-- estimated maxOp={}, resetting to 1...", maxOp);
       maxOp = 1;
     }
@@ -291,13 +310,17 @@ public class ComputePlanAction extends TriggerActionBase {
       case MOVEREPLICA:
         Suggester s = session.getSuggester(action)
                 .hint(Suggester.Hint.SRC_NODE, event.getProperty(NODE_NAMES));
-        if (applyCollectionHints(cloudManager, s) == 0) return NoneSuggester.get(session);
+        if (applyCollectionHints(cloudManager, s) == 0) {
+          addDiagnostics(event, "noRelevantCollections", true);
+          return NoneSuggester.get(session);
+        }
         return s;
       case DELETENODE:
         int start = (Integer)event.getProperty(START, 0);
         @SuppressWarnings({"unchecked"})
         List<String> srcNodes = (List<String>) event.getProperty(NODE_NAMES);
         if (srcNodes.isEmpty() || start >= srcNodes.size()) {
+          addDiagnostics(event, "noSourceNodes", true);
           return NoneSuggester.get(session);
         }
         String sourceNode = srcNodes.get(start);
@@ -305,6 +328,7 @@ public class ComputePlanAction extends TriggerActionBase {
                 .hint(Suggester.Hint.SRC_NODE, event.getProperty(NODE_NAMES));
         if (applyCollectionHints(cloudManager, s) == 0) {
           log.debug("-- no relevant collections on {}, no operations computed.", srcNodes);
+          addDiagnostics(event, "noRelevantCollections", true);
           return NoneSuggester.get(session);
         }
         s.hint(Suggester.Hint.SRC_NODE, Collections.singletonList(sourceNode));
@@ -359,6 +383,7 @@ public class ComputePlanAction extends TriggerActionBase {
                   }
                 });
         log.debug("-- NODE_ADDED: ADDREPLICA suggester configured with {} collection/shard hints.", collShards.size());
+        addDiagnostics(event, "relevantCollShard", collShards);
         suggester.hint(Suggester.Hint.COLL_SHARD, collShards);
         suggester.hint(Suggester.Hint.REPLICATYPE, replicaType);
         break;
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/ComputePlanActionTest.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/ComputePlanActionTest.java
index 2526292..e7a317d 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/ComputePlanActionTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/ComputePlanActionTest.java
@@ -583,7 +583,7 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
     int numShards = 1;
     int numCollections = 5;
 
-    nodeAddedTriggerWithAddReplicaPreferredOp(collectionNamePrefix, numShards, numCollections);
+    nodeAddedTriggerWithAddReplicaPreferredOp(collectionNamePrefix, numShards, numCollections, null);
   }
 
   @Test
@@ -592,7 +592,7 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
     int numShards = 1;
     int numCollections = 5;
 
-    nodeAddedTriggerWithAddReplicaPreferredOpReplicaType(collectionNamePrefix, numShards, numCollections);
+    nodeAddedTriggerWithAddReplicaPreferredOpReplicaType(collectionNamePrefix, numShards, numCollections, null);
   }
 
   @Test
@@ -602,9 +602,19 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
     int numShards = 2;
     int numCollections = 5;
 
-    nodeAddedTriggerWithAddReplicaPreferredOp(collectionNamePrefix, numShards, numCollections);
+    nodeAddedTriggerWithAddReplicaPreferredOp(collectionNamePrefix, numShards, numCollections, null);
   }
-  private void nodeAddedTriggerWithAddReplicaPreferredOp(String collectionNamePrefix, int numShards, int numCollections) throws Exception {
+
+  @Test
+  public void testNodeAddedTriggerWithAddReplicaPreferredOp_2Shard_OpLimit() throws Exception {
+    String collectionNamePrefix = "testNodeAddedTriggerWithAddReplicaPreferredOp_2Shard";
+    int numShards = 2;
+    int numCollections = 5;
+
+    nodeAddedTriggerWithAddReplicaPreferredOp(collectionNamePrefix, numShards, numCollections, 1);
+  }
+
+  private void nodeAddedTriggerWithAddReplicaPreferredOp(String collectionNamePrefix, int numShards, int numCollections, Integer maxOps) throws Exception {
     String setTriggerCommand = "{" +
         "'set-trigger' : {" +
         "'name' : 'node_added_trigger'," +
@@ -624,10 +634,10 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
         "    ]" +
         "}";
 
-    nodeAddedTriggerWithAddReplicaPreferredOp(collectionNamePrefix, numShards, numCollections, setTriggerCommand, setClusterPolicyCommand);
+    nodeAddedTriggerWithAddReplicaPreferredOp(collectionNamePrefix, numShards, numCollections, setTriggerCommand, setClusterPolicyCommand, maxOps);
   }
 
-  private void nodeAddedTriggerWithAddReplicaPreferredOpReplicaType(String collectionNamePrefix, int numShards, int numCollections) throws Exception {
+  private void nodeAddedTriggerWithAddReplicaPreferredOpReplicaType(String collectionNamePrefix, int numShards, int numCollections, Integer maxOps) throws Exception {
     String setTriggerCommand = "{" +
         "'set-trigger' : {" +
         "'name' : 'node_added_trigger'," +
@@ -648,13 +658,15 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
         "    ]" +
         "}";
 
-    nodeAddedTriggerWithAddReplicaPreferredOp(collectionNamePrefix, numShards, numCollections, setTriggerCommand, setClusterPolicyCommand, 0, 1, 0);
+    nodeAddedTriggerWithAddReplicaPreferredOp(collectionNamePrefix, numShards, numCollections, setTriggerCommand, setClusterPolicyCommand, maxOps, 0, 1, 0);
   }
 
-  private void nodeAddedTriggerWithAddReplicaPreferredOp(String collectionNamePrefix, int numShards, int numCollections, String setTriggerCommand, String setClusterPolicyCommand) throws Exception {
-    nodeAddedTriggerWithAddReplicaPreferredOp(collectionNamePrefix, numShards, numCollections, setTriggerCommand, setClusterPolicyCommand, 1, null, null);
+  private void nodeAddedTriggerWithAddReplicaPreferredOp(String collectionNamePrefix, int numShards, int numCollections, String setTriggerCommand, String setClusterPolicyCommand, Integer maxOps) throws Exception {
+    nodeAddedTriggerWithAddReplicaPreferredOp(collectionNamePrefix, numShards, numCollections, setTriggerCommand, setClusterPolicyCommand, maxOps, 1, null, null);
   }
-  private void nodeAddedTriggerWithAddReplicaPreferredOp(String collectionNamePrefix, int numShards, int numCollections, String setTriggerCommand, String setClusterPolicyCommand, Integer nNrtReplicas, Integer nTlogReplicas, Integer nPullReplicas) throws Exception {
+  private void nodeAddedTriggerWithAddReplicaPreferredOp(String collectionNamePrefix, int numShards, int numCollections, String setTriggerCommand, String setClusterPolicyCommand,
+                                                         Integer maxOps,
+                                                         Integer nNrtReplicas, Integer nTlogReplicas, Integer nPullReplicas) throws Exception {
     CloudSolrClient solrClient = cluster.getSolrClient();
     @SuppressWarnings({"rawtypes"})
     SolrRequest req = AutoScalingRequest.create(SolrRequest.METHOD.POST, setTriggerCommand);
@@ -665,6 +677,16 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
     response = solrClient.request(req);
     assertEquals(response.get("result").toString(), "success");
 
+    if (maxOps != null) {
+      String setMaxOpsCommand = "{" +
+          " 'set-properties': {" +
+          "   'maxComputeOperations': " + maxOps +
+          "  }" +
+          "}";
+      req = AutoScalingRequest.create(SolrRequest.METHOD.POST, setMaxOpsCommand);
+      response = solrClient.request(req);
+      assertEquals(response.get("result").toString(), "success");
+    }
 
     CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionNamePrefix + "_0",
         "conf", numShards, nNrtReplicas, nTlogReplicas, nPullReplicas).setMaxShardsPerNode(2);
@@ -683,7 +705,13 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
     @SuppressWarnings({"rawtypes"})
     List operations = (List) actionContext.get("operations");
     assertNotNull(operations);
-    assertEquals(numShards, operations.size());
+    int numExpectedOps;
+    if (maxOps != null && maxOps > 0) {
+      numExpectedOps = maxOps;
+    } else {
+      numExpectedOps = numShards;
+    }
+    assertEquals(numExpectedOps, operations.size());
     Set<String> affectedShards = new HashSet<>(2);
     for (Object operation : operations) {
       assertTrue(operation instanceof CollectionAdminRequest.AddReplica);
@@ -692,7 +720,7 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
       assertEquals(collectionNamePrefix + "_0", addReplica.getCollection());
       affectedShards.add(addReplica.getShard());
     }
-    assertEquals(numShards, affectedShards.size());
+    assertEquals(numExpectedOps, affectedShards.size());
 
     for (int i = 1; i < numCollections; i++) {
       create = CollectionAdminRequest.createCollection(collectionNamePrefix + "_" + i,
@@ -712,7 +740,12 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
     actionContext = actionContextPropsRef.get();
     operations = (List) actionContext.get("operations");
     assertNotNull(operations);
-    assertEquals(numCollections * numShards, operations.size());
+    if (maxOps != null && maxOps > 0) {
+      numExpectedOps = maxOps;
+    } else {
+      numExpectedOps = numCollections * numShards;
+    }
+    assertEquals(numExpectedOps, operations.size());
     Set<String> affectedCollections = new HashSet<>(numCollections);
     affectedShards = new HashSet<>(numShards);
     Set<Pair<String, String>> affectedCollShards = new HashSet<>(numCollections * numShards);
@@ -724,9 +757,15 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
       affectedShards.add(addReplica.getShard());
       affectedCollShards.add(new Pair<>(addReplica.getCollection(), addReplica.getShard()));
     }
-    assertEquals(numCollections, affectedCollections.size());
-    assertEquals(numShards, affectedShards.size());
-    assertEquals(numCollections * numShards, affectedCollShards.size());
+    if (maxOps != null && maxOps > 0) {
+      assertEquals(numExpectedOps, affectedCollections.size());
+      assertEquals(numExpectedOps, affectedShards.size());
+      assertEquals(numExpectedOps, affectedCollShards.size());
+    } else {
+      assertEquals(numCollections, affectedCollections.size());
+      assertEquals(numShards, affectedShards.size());
+      assertEquals(numCollections * numShards, affectedCollShards.size());
+    }
   }
 
   @Test