You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2018/01/23 13:28:23 UTC

[51/51] lucene-solr:jira/solr-11714: SOLR-11714: AddReplicaSuggester / ComputePlanAction infinite loop

SOLR-11714: AddReplicaSuggester  / ComputePlanAction infinite loop


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/e671f58f
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/e671f58f
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/e671f58f

Branch: refs/heads/jira/solr-11714
Commit: e671f58fb7ad455c365d3a2cbf30e24abe295607
Parents: bf6ac14
Author: Andrzej Bialecki <ab...@apache.org>
Authored: Tue Jan 23 14:27:15 2018 +0100
Committer: Andrzej Bialecki <ab...@apache.org>
Committed: Tue Jan 23 14:27:15 2018 +0100

----------------------------------------------------------------------
 solr/CHANGES.txt                                |  2 +
 .../autoscaling/AutoAddReplicasPlanAction.java  |  4 +-
 .../cloud/autoscaling/ComputePlanAction.java    |  9 ++--
 .../cloud/autoscaling/sim/TestLargeCluster.java | 29 ++++++++++--
 .../cloud/autoscaling/AddReplicaSuggester.java  | 46 ++++++++++----------
 .../solrj/cloud/autoscaling/NoneSuggester.java  |  7 ++-
 .../solrj/cloud/autoscaling/PolicyHelper.java   |  2 +-
 7 files changed, 62 insertions(+), 37 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e671f58f/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 7f63679..ddb92b7 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -118,6 +118,8 @@ Bug Fixes
 
 * SOLR-11794: PULL replicas stop replicating after collection RELOAD (Samuel Tatipamula, Tomás Fernández Löbbe)
 
+* SOLR-11714: AddReplicaSuggester  / ComputePlanAction infinite loop. (ab)
+
 Optimizations
 ----------------------
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e671f58f/solr/core/src/java/org/apache/solr/cloud/autoscaling/AutoAddReplicasPlanAction.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/AutoAddReplicasPlanAction.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/AutoAddReplicasPlanAction.java
index 011bf9f..e051ccb 100644
--- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/AutoAddReplicasPlanAction.java
+++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/AutoAddReplicasPlanAction.java
@@ -38,7 +38,7 @@ public class AutoAddReplicasPlanAction extends ComputePlanAction {
     ClusterStateProvider stateProvider = cloudManager.getClusterStateProvider();
     String autoAddReplicas = stateProvider.getClusterProperty(ZkStateReader.AUTO_ADD_REPLICAS, (String) null);
     if (autoAddReplicas != null && autoAddReplicas.equals("false")) {
-      return new NoneSuggester();
+      return NoneSuggester.get(session);
     }
 
     Suggester suggester = super.getSuggester(session, event, cloudManager);
@@ -57,7 +57,7 @@ public class AutoAddReplicasPlanAction extends ComputePlanAction {
       }
     }
 
-    if (!anyCollections) return new NoneSuggester();
+    if (!anyCollections) return NoneSuggester.get(session);
     return suggester;
   }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e671f58f/solr/core/src/java/org/apache/solr/cloud/autoscaling/ComputePlanAction.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/ComputePlanAction.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/ComputePlanAction.java
index dcc22eb..e913440 100644
--- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/ComputePlanAction.java
+++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/ComputePlanAction.java
@@ -36,12 +36,9 @@ import org.apache.solr.common.SolrException;
 import org.apache.solr.common.cloud.ClusterState;
 import org.apache.solr.common.params.AutoScalingParams;
 import org.apache.solr.common.params.CollectionParams;
-import org.apache.solr.common.util.Pair;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import static org.apache.solr.common.params.AutoScalingParams.PREFERRED_OP;
-
 /**
  * This class is responsible for using the configured policy and preferences
  * with the hints provided by the trigger event to compute the required cluster operations.
@@ -85,7 +82,9 @@ public class ComputePlanAction extends TriggerActionBase {
           SolrRequest operation = suggester.getSuggestion();
           opCount++;
           // prepare suggester for the next iteration
-          session = suggester.getSession();
+          if (suggester.getSession() != null) {
+            session = suggester.getSession();
+          }
           suggester = getSuggester(session, event, cloudManager);
 
           // break on first null op
@@ -165,7 +164,7 @@ public class ComputePlanAction extends TriggerActionBase {
         List<TriggerEvent.Op> ops = (List<TriggerEvent.Op>)event.getProperty(TriggerEvent.REQUESTED_OPS, Collections.emptyList());
         int start = (Integer)event.getProperty(START, 0);
         if (ops.isEmpty() || start >= ops.size()) {
-          return NoneSuggester.INSTANCE;
+          return NoneSuggester.get(session);
         }
         TriggerEvent.Op op = ops.get(start);
         suggester = session.getSuggester(op.getAction());

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e671f58f/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestLargeCluster.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestLargeCluster.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestLargeCluster.java
index e9c686b..523076a 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestLargeCluster.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestLargeCluster.java
@@ -36,7 +36,9 @@ import org.apache.solr.client.solrj.SolrClient;
 import org.apache.solr.client.solrj.SolrRequest;
 import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig;
 import org.apache.solr.client.solrj.cloud.autoscaling.SolrCloudManager;
+import org.apache.solr.client.solrj.cloud.autoscaling.Suggester;
 import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventProcessorStage;
+import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventType;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
 import org.apache.solr.cloud.autoscaling.ActionContext;
 import org.apache.solr.cloud.autoscaling.ComputePlanAction;
@@ -49,6 +51,7 @@ import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.cloud.Replica;
 import org.apache.solr.common.params.CollectionParams;
 import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.Pair;
 import org.apache.solr.common.util.TimeSource;
 import org.apache.solr.util.LogLevel;
 import org.junit.Before;
@@ -490,7 +493,7 @@ public class TestLargeCluster extends SimSolrCloudTestCase {
   }
 
   @Test
-  @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-11714")
+  //@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-11714")
   public void testSearchRate() throws Exception {
     SolrClient solrClient = cluster.simGetSolrClient();
     String setTriggerCommand = "{" +
@@ -529,9 +532,10 @@ public class TestLargeCluster extends SimSolrCloudTestCase {
 
     log.info("Ready after " + waitForState(collectionName, 300, TimeUnit.SECONDS, clusterShape(2, 10)) + " ms");
 
-    // collect the node names
+    // collect the node names for shard1
     Set<String> nodes = new HashSet<>();
     cluster.getSimClusterStateProvider().getClusterState().getCollection(collectionName)
+        .getSlice("shard1")
         .getReplicas()
         .forEach(r -> nodes.add(r.getNodeName()));
 
@@ -539,11 +543,28 @@ public class TestLargeCluster extends SimSolrCloudTestCase {
     // simulate search traffic
     cluster.getSimClusterStateProvider().simSetShardValue(collectionName, "shard1", metricName, 40, true);
 
-//    boolean await = triggerFiredLatch.await(20000 / SPEED, TimeUnit.MILLISECONDS);
-//    assertTrue("The trigger did not fire at all", await);
+    boolean await = triggerFiredLatch.await(20000 / SPEED, TimeUnit.MILLISECONDS);
+    assertTrue("The trigger did not fire at all", await);
     // wait for listener to capture the SUCCEEDED stage
     cluster.getTimeSource().sleep(2000);
     assertEquals(listenerEvents.toString(), 1, listenerEvents.get("srt").size());
     CapturedEvent ev = listenerEvents.get("srt").get(0);
+    assertEquals(TriggerEventType.SEARCHRATE, ev.event.getEventType());
+    Map<String, Number> m = (Map<String, Number>)ev.event.getProperty("node");
+    assertNotNull(m);
+    assertEquals(nodes.size(), m.size());
+    assertEquals(nodes, m.keySet());
+    m.forEach((k, v) -> assertEquals(4.0, v.doubleValue(), 0.01));
+    List<TriggerEvent.Op> ops = (List<TriggerEvent.Op>)ev.event.getProperty(TriggerEvent.REQUESTED_OPS);
+    assertNotNull(ops);
+    assertEquals(3, ops.size());
+    ops.forEach(op -> {
+      assertEquals(CollectionParams.CollectionAction.ADDREPLICA, op.getAction());
+      assertEquals(1, op.getHints().size());
+      Pair<String, String> hint = (Pair<String, String>)op.getHints().get(Suggester.Hint.COLL_SHARD);
+      assertNotNull(hint);
+      assertEquals(collectionName, hint.first());
+      assertEquals("shard1", hint.second());
+    });
   }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e671f58f/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/AddReplicaSuggester.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/AddReplicaSuggester.java b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/AddReplicaSuggester.java
index 5ebf761..3f96f3e 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/AddReplicaSuggester.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/AddReplicaSuggester.java
@@ -41,35 +41,33 @@ class AddReplicaSuggester extends Suggester {
     Set<Pair<String, String>> shards = (Set<Pair<String, String>>) hints.getOrDefault(Hint.COLL_SHARD, Collections.emptySet());
     if (shards.isEmpty()) {
       throw new RuntimeException("add-replica requires 'collection' and 'shard'");
-    } else if (shards.size() > 1) {
-      throw new RuntimeException("add-replica requires exactly one hint of type " + Hint.COLL_SHARD);
     }
-    Pair<String, String> shard = shards.iterator().next();
-    Replica.Type type = Replica.Type.get((String) hints.get(Hint.REPLICATYPE));
-    //iterate through elements and identify the least loaded
-    List<Violation> leastSeriousViolation = null;
-    Integer targetNodeIndex = null;
-    for (int i = getMatrix().size() - 1; i >= 0; i--) {
-      Row row = getMatrix().get(i);
-      if (!row.isLive) continue;
-      if (!isAllowed(row.node, Hint.TARGET_NODE)) continue;
-      Row tmpRow = row.addReplica(shard.first(), shard.second(), type);
+    for (Pair<String,String> shard : shards) {
+      Replica.Type type = Replica.Type.get((String) hints.get(Hint.REPLICATYPE));
+      //iterate through elements and identify the least loaded
+      List<Violation> leastSeriousViolation = null;
+      Integer targetNodeIndex = null;
+      for (int i = getMatrix().size() - 1; i >= 0; i--) {
+        Row row = getMatrix().get(i);
+        if (!isNodeSuitable(row)) continue;
+        Row tmpRow = row.addReplica(shard.first(), shard.second(), type);
 
-      List<Violation> errs = testChangedMatrix(strict, getModifiedMatrix(getMatrix(), tmpRow, i));
-      if (!containsNewErrors(errs)) {
-        if (isLessSerious(errs, leastSeriousViolation)) {
-          leastSeriousViolation = errs;
-          targetNodeIndex = i;
+        List<Violation> errs = testChangedMatrix(strict, getModifiedMatrix(getMatrix(), tmpRow, i));
+        if (!containsNewErrors(errs)) {
+          if (isLessSerious(errs, leastSeriousViolation)) {
+            leastSeriousViolation = errs;
+            targetNodeIndex = i;
+          }
         }
       }
-    }
 
-    if (targetNodeIndex != null) {// there are no rule violations
-      getMatrix().set(targetNodeIndex, getMatrix().get(targetNodeIndex).addReplica(shard.first(), shard.second(), type));
-      return CollectionAdminRequest
-          .addReplicaToShard(shard.first(), shard.second())
-          .setType(type)
-          .setNode(getMatrix().get(targetNodeIndex).node);
+      if (targetNodeIndex != null) {// there are no rule violations
+        getMatrix().set(targetNodeIndex, getMatrix().get(targetNodeIndex).addReplica(shard.first(), shard.second(), type));
+        return CollectionAdminRequest
+            .addReplicaToShard(shard.first(), shard.second())
+            .setType(type)
+            .setNode(getMatrix().get(targetNodeIndex).node);
+      }
     }
 
     return null;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e671f58f/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/NoneSuggester.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/NoneSuggester.java b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/NoneSuggester.java
index 1a18f26..2f6c369 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/NoneSuggester.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/NoneSuggester.java
@@ -20,7 +20,12 @@ package org.apache.solr.client.solrj.cloud.autoscaling;
 import org.apache.solr.client.solrj.SolrRequest;
 
 public class NoneSuggester extends Suggester {
-  public static final NoneSuggester INSTANCE = new NoneSuggester();
+
+  public static NoneSuggester get(Policy.Session session) {
+    NoneSuggester suggester = new NoneSuggester();
+    suggester._init(session);
+    return suggester;
+  }
 
   @Override
   SolrRequest init() {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e671f58f/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/PolicyHelper.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/PolicyHelper.java b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/PolicyHelper.java
index ae7c9af..a67a4fa 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/PolicyHelper.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/PolicyHelper.java
@@ -273,7 +273,7 @@ public class PolicyHelper {
      *
      */
     private void returnSession(SessionWrapper sessionWrapper) {
-      TimeSource timeSource = sessionWrapper.session.cloudManager.getTimeSource();
+      TimeSource timeSource = sessionWrapper.session != null ? sessionWrapper.session.cloudManager.getTimeSource() : TimeSource.NANO_TIME;
       synchronized (lockObj) {
         sessionWrapper.status = Status.EXECUTING;
         log.info("returnSession, curr-time {} sessionWrapper.createTime {}, this.sessionWrapper.createTime {} ", time(timeSource, MILLISECONDS),