You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2018/01/23 13:28:23 UTC
[51/51] lucene-solr:jira/solr-11714: SOLR-11714: AddReplicaSuggester
/ ComputePlanAction infinite loop
SOLR-11714: AddReplicaSuggester / ComputePlanAction infinite loop
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/e671f58f
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/e671f58f
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/e671f58f
Branch: refs/heads/jira/solr-11714
Commit: e671f58fb7ad455c365d3a2cbf30e24abe295607
Parents: bf6ac14
Author: Andrzej Bialecki <ab...@apache.org>
Authored: Tue Jan 23 14:27:15 2018 +0100
Committer: Andrzej Bialecki <ab...@apache.org>
Committed: Tue Jan 23 14:27:15 2018 +0100
----------------------------------------------------------------------
solr/CHANGES.txt | 2 +
.../autoscaling/AutoAddReplicasPlanAction.java | 4 +-
.../cloud/autoscaling/ComputePlanAction.java | 9 ++--
.../cloud/autoscaling/sim/TestLargeCluster.java | 29 ++++++++++--
.../cloud/autoscaling/AddReplicaSuggester.java | 46 ++++++++++----------
.../solrj/cloud/autoscaling/NoneSuggester.java | 7 ++-
.../solrj/cloud/autoscaling/PolicyHelper.java | 2 +-
7 files changed, 62 insertions(+), 37 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e671f58f/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 7f63679..ddb92b7 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -118,6 +118,8 @@ Bug Fixes
* SOLR-11794: PULL replicas stop replicating after collection RELOAD (Samuel Tatipamula, Tomás Fernández Löbbe)
+* SOLR-11714: AddReplicaSuggester / ComputePlanAction infinite loop. (ab)
+
Optimizations
----------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e671f58f/solr/core/src/java/org/apache/solr/cloud/autoscaling/AutoAddReplicasPlanAction.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/AutoAddReplicasPlanAction.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/AutoAddReplicasPlanAction.java
index 011bf9f..e051ccb 100644
--- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/AutoAddReplicasPlanAction.java
+++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/AutoAddReplicasPlanAction.java
@@ -38,7 +38,7 @@ public class AutoAddReplicasPlanAction extends ComputePlanAction {
ClusterStateProvider stateProvider = cloudManager.getClusterStateProvider();
String autoAddReplicas = stateProvider.getClusterProperty(ZkStateReader.AUTO_ADD_REPLICAS, (String) null);
if (autoAddReplicas != null && autoAddReplicas.equals("false")) {
- return new NoneSuggester();
+ return NoneSuggester.get(session);
}
Suggester suggester = super.getSuggester(session, event, cloudManager);
@@ -57,7 +57,7 @@ public class AutoAddReplicasPlanAction extends ComputePlanAction {
}
}
- if (!anyCollections) return new NoneSuggester();
+ if (!anyCollections) return NoneSuggester.get(session);
return suggester;
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e671f58f/solr/core/src/java/org/apache/solr/cloud/autoscaling/ComputePlanAction.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/ComputePlanAction.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/ComputePlanAction.java
index dcc22eb..e913440 100644
--- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/ComputePlanAction.java
+++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/ComputePlanAction.java
@@ -36,12 +36,9 @@ import org.apache.solr.common.SolrException;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.params.AutoScalingParams;
import org.apache.solr.common.params.CollectionParams;
-import org.apache.solr.common.util.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import static org.apache.solr.common.params.AutoScalingParams.PREFERRED_OP;
-
/**
* This class is responsible for using the configured policy and preferences
* with the hints provided by the trigger event to compute the required cluster operations.
@@ -85,7 +82,9 @@ public class ComputePlanAction extends TriggerActionBase {
SolrRequest operation = suggester.getSuggestion();
opCount++;
// prepare suggester for the next iteration
- session = suggester.getSession();
+ if (suggester.getSession() != null) {
+ session = suggester.getSession();
+ }
suggester = getSuggester(session, event, cloudManager);
// break on first null op
@@ -165,7 +164,7 @@ public class ComputePlanAction extends TriggerActionBase {
List<TriggerEvent.Op> ops = (List<TriggerEvent.Op>)event.getProperty(TriggerEvent.REQUESTED_OPS, Collections.emptyList());
int start = (Integer)event.getProperty(START, 0);
if (ops.isEmpty() || start >= ops.size()) {
- return NoneSuggester.INSTANCE;
+ return NoneSuggester.get(session);
}
TriggerEvent.Op op = ops.get(start);
suggester = session.getSuggester(op.getAction());
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e671f58f/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestLargeCluster.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestLargeCluster.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestLargeCluster.java
index e9c686b..523076a 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestLargeCluster.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestLargeCluster.java
@@ -36,7 +36,9 @@ import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig;
import org.apache.solr.client.solrj.cloud.autoscaling.SolrCloudManager;
+import org.apache.solr.client.solrj.cloud.autoscaling.Suggester;
import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventProcessorStage;
+import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventType;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.cloud.autoscaling.ActionContext;
import org.apache.solr.cloud.autoscaling.ComputePlanAction;
@@ -49,6 +51,7 @@ import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.params.CollectionParams;
import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.Pair;
import org.apache.solr.common.util.TimeSource;
import org.apache.solr.util.LogLevel;
import org.junit.Before;
@@ -490,7 +493,7 @@ public class TestLargeCluster extends SimSolrCloudTestCase {
}
@Test
- @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-11714")
+ //@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-11714")
public void testSearchRate() throws Exception {
SolrClient solrClient = cluster.simGetSolrClient();
String setTriggerCommand = "{" +
@@ -529,9 +532,10 @@ public class TestLargeCluster extends SimSolrCloudTestCase {
log.info("Ready after " + waitForState(collectionName, 300, TimeUnit.SECONDS, clusterShape(2, 10)) + " ms");
- // collect the node names
+ // collect the node names for shard1
Set<String> nodes = new HashSet<>();
cluster.getSimClusterStateProvider().getClusterState().getCollection(collectionName)
+ .getSlice("shard1")
.getReplicas()
.forEach(r -> nodes.add(r.getNodeName()));
@@ -539,11 +543,28 @@ public class TestLargeCluster extends SimSolrCloudTestCase {
// simulate search traffic
cluster.getSimClusterStateProvider().simSetShardValue(collectionName, "shard1", metricName, 40, true);
-// boolean await = triggerFiredLatch.await(20000 / SPEED, TimeUnit.MILLISECONDS);
-// assertTrue("The trigger did not fire at all", await);
+ boolean await = triggerFiredLatch.await(20000 / SPEED, TimeUnit.MILLISECONDS);
+ assertTrue("The trigger did not fire at all", await);
// wait for listener to capture the SUCCEEDED stage
cluster.getTimeSource().sleep(2000);
assertEquals(listenerEvents.toString(), 1, listenerEvents.get("srt").size());
CapturedEvent ev = listenerEvents.get("srt").get(0);
+ assertEquals(TriggerEventType.SEARCHRATE, ev.event.getEventType());
+ Map<String, Number> m = (Map<String, Number>)ev.event.getProperty("node");
+ assertNotNull(m);
+ assertEquals(nodes.size(), m.size());
+ assertEquals(nodes, m.keySet());
+ m.forEach((k, v) -> assertEquals(4.0, v.doubleValue(), 0.01));
+ List<TriggerEvent.Op> ops = (List<TriggerEvent.Op>)ev.event.getProperty(TriggerEvent.REQUESTED_OPS);
+ assertNotNull(ops);
+ assertEquals(3, ops.size());
+ ops.forEach(op -> {
+ assertEquals(CollectionParams.CollectionAction.ADDREPLICA, op.getAction());
+ assertEquals(1, op.getHints().size());
+ Pair<String, String> hint = (Pair<String, String>)op.getHints().get(Suggester.Hint.COLL_SHARD);
+ assertNotNull(hint);
+ assertEquals(collectionName, hint.first());
+ assertEquals("shard1", hint.second());
+ });
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e671f58f/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/AddReplicaSuggester.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/AddReplicaSuggester.java b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/AddReplicaSuggester.java
index 5ebf761..3f96f3e 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/AddReplicaSuggester.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/AddReplicaSuggester.java
@@ -41,35 +41,33 @@ class AddReplicaSuggester extends Suggester {
Set<Pair<String, String>> shards = (Set<Pair<String, String>>) hints.getOrDefault(Hint.COLL_SHARD, Collections.emptySet());
if (shards.isEmpty()) {
throw new RuntimeException("add-replica requires 'collection' and 'shard'");
- } else if (shards.size() > 1) {
- throw new RuntimeException("add-replica requires exactly one hint of type " + Hint.COLL_SHARD);
}
- Pair<String, String> shard = shards.iterator().next();
- Replica.Type type = Replica.Type.get((String) hints.get(Hint.REPLICATYPE));
- //iterate through elements and identify the least loaded
- List<Violation> leastSeriousViolation = null;
- Integer targetNodeIndex = null;
- for (int i = getMatrix().size() - 1; i >= 0; i--) {
- Row row = getMatrix().get(i);
- if (!row.isLive) continue;
- if (!isAllowed(row.node, Hint.TARGET_NODE)) continue;
- Row tmpRow = row.addReplica(shard.first(), shard.second(), type);
+ for (Pair<String,String> shard : shards) {
+ Replica.Type type = Replica.Type.get((String) hints.get(Hint.REPLICATYPE));
+ //iterate through elements and identify the least loaded
+ List<Violation> leastSeriousViolation = null;
+ Integer targetNodeIndex = null;
+ for (int i = getMatrix().size() - 1; i >= 0; i--) {
+ Row row = getMatrix().get(i);
+ if (!isNodeSuitable(row)) continue;
+ Row tmpRow = row.addReplica(shard.first(), shard.second(), type);
- List<Violation> errs = testChangedMatrix(strict, getModifiedMatrix(getMatrix(), tmpRow, i));
- if (!containsNewErrors(errs)) {
- if (isLessSerious(errs, leastSeriousViolation)) {
- leastSeriousViolation = errs;
- targetNodeIndex = i;
+ List<Violation> errs = testChangedMatrix(strict, getModifiedMatrix(getMatrix(), tmpRow, i));
+ if (!containsNewErrors(errs)) {
+ if (isLessSerious(errs, leastSeriousViolation)) {
+ leastSeriousViolation = errs;
+ targetNodeIndex = i;
+ }
}
}
- }
- if (targetNodeIndex != null) {// there are no rule violations
- getMatrix().set(targetNodeIndex, getMatrix().get(targetNodeIndex).addReplica(shard.first(), shard.second(), type));
- return CollectionAdminRequest
- .addReplicaToShard(shard.first(), shard.second())
- .setType(type)
- .setNode(getMatrix().get(targetNodeIndex).node);
+ if (targetNodeIndex != null) {// there are no rule violations
+ getMatrix().set(targetNodeIndex, getMatrix().get(targetNodeIndex).addReplica(shard.first(), shard.second(), type));
+ return CollectionAdminRequest
+ .addReplicaToShard(shard.first(), shard.second())
+ .setType(type)
+ .setNode(getMatrix().get(targetNodeIndex).node);
+ }
}
return null;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e671f58f/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/NoneSuggester.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/NoneSuggester.java b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/NoneSuggester.java
index 1a18f26..2f6c369 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/NoneSuggester.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/NoneSuggester.java
@@ -20,7 +20,12 @@ package org.apache.solr.client.solrj.cloud.autoscaling;
import org.apache.solr.client.solrj.SolrRequest;
public class NoneSuggester extends Suggester {
- public static final NoneSuggester INSTANCE = new NoneSuggester();
+
+ public static NoneSuggester get(Policy.Session session) {
+ NoneSuggester suggester = new NoneSuggester();
+ suggester._init(session);
+ return suggester;
+ }
@Override
SolrRequest init() {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e671f58f/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/PolicyHelper.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/PolicyHelper.java b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/PolicyHelper.java
index ae7c9af..a67a4fa 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/PolicyHelper.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/PolicyHelper.java
@@ -273,7 +273,7 @@ public class PolicyHelper {
*
*/
private void returnSession(SessionWrapper sessionWrapper) {
- TimeSource timeSource = sessionWrapper.session.cloudManager.getTimeSource();
+ TimeSource timeSource = sessionWrapper.session != null ? sessionWrapper.session.cloudManager.getTimeSource() : TimeSource.NANO_TIME;
synchronized (lockObj) {
sessionWrapper.status = Status.EXECUTING;
log.info("returnSession, curr-time {} sessionWrapper.createTime {}, this.sessionWrapper.createTime {} ", time(timeSource, MILLISECONDS),