You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by da...@apache.org on 2018/12/11 09:28:13 UTC
[44/47] lucene-solr:jira/http2: SOLR-13045: Harden TestSimPolicyCloud
SOLR-13045: Harden TestSimPolicyCloud
This commit fixes a race condition in SimClusterStateProvider, fixing
several fails in TestSimPolicyCloud.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/f89f109e
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/f89f109e
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/f89f109e
Branch: refs/heads/jira/http2
Commit: f89f109ec1d1b9a19034d17345d41d3dd2c34852
Parents: af6e15e
Author: Jason Gerlowski <ge...@apache.org>
Authored: Fri Dec 7 11:07:42 2018 -0500
Committer: Jason Gerlowski <ge...@apache.org>
Committed: Mon Dec 10 17:39:12 2018 -0500
----------------------------------------------------------------------
.../cloud/autoscaling/sim/SimCloudManager.java | 2 +-
.../sim/SimClusterStateProvider.java | 139 ++++++++++---------
.../autoscaling/sim/TestSimExtremeIndexing.java | 2 -
.../autoscaling/sim/TestSimPolicyCloud.java | 6 +-
4 files changed, 75 insertions(+), 74 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f89f109e/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java
index dad9463..fd622c1 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java
@@ -838,7 +838,7 @@ public class SimCloudManager implements SolrCloudManager {
results.add("success", "");
break;
case ADDROLE:
- nodeStateProvider.simAddNodeValue(req.getParams().get("node"), "nodeRole", req.getParams().get("role"));
+ nodeStateProvider.simSetNodeValue(req.getParams().get("node"), "nodeRole", req.getParams().get("role"));
break;
case CREATESHARD:
try {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f89f109e/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java
index d81d92c..6197c41 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java
@@ -691,79 +691,84 @@ public class SimClusterStateProvider implements ClusterStateProvider {
private void simRunLeaderElection(String collection, Slice s, boolean saveState) throws Exception {
AtomicBoolean stateChanged = new AtomicBoolean(Boolean.FALSE);
- Replica leader = s.getLeader();
- if (leader == null || !liveNodes.contains(leader.getNodeName())) {
- log.trace("Running leader election for {} / {}", collection, s.getName());
- if (s.getReplicas().isEmpty()) { // no replicas - punt
- log.trace("-- no replicas in {} / {}", collection, s.getName());
- return;
- }
- ActionThrottle lt = getThrottle(collection, s.getName());
- synchronized (lt) {
- // collect all active and live
- List<ReplicaInfo> active = new ArrayList<>();
- AtomicBoolean alreadyHasLeader = new AtomicBoolean(false);
- s.getReplicas().forEach(r -> {
- // find our ReplicaInfo for this replica
- ReplicaInfo ri = getReplicaInfo(r);
- if (ri == null) {
- throw new IllegalStateException("-- could not find ReplicaInfo for replica " + r);
- }
- synchronized (ri) {
- if (r.isActive(liveNodes.get())) {
- if (ri.getVariables().get(ZkStateReader.LEADER_PROP) != null) {
- log.trace("-- found existing leader {} / {}: {}, {}", collection, s.getName(), ri, r);
- alreadyHasLeader.set(true);
- return;
- } else {
- active.add(ri);
- }
- } else { // if it's on a node that is not live mark it down
- log.trace("-- replica not active on live nodes: {}, {}", liveNodes.get(), r);
- if (!liveNodes.contains(r.getNodeName())) {
- ri.getVariables().put(ZkStateReader.STATE_PROP, Replica.State.DOWN.toString());
- ri.getVariables().remove(ZkStateReader.LEADER_PROP);
- stateChanged.set(true);
+ lock.lockInterruptibly();
+ try {
+ Replica leader = s.getLeader();
+ if (leader == null || !liveNodes.contains(leader.getNodeName())) {
+ log.trace("Running leader election for {} / {}", collection, s.getName());
+ if (s.getReplicas().isEmpty()) { // no replicas - punt
+ log.trace("-- no replicas in {} / {}", collection, s.getName());
+ return;
+ }
+ ActionThrottle lt = getThrottle(collection, s.getName());
+ synchronized (lt) {
+ // collect all active and live
+ List<ReplicaInfo> active = new ArrayList<>();
+ AtomicBoolean alreadyHasLeader = new AtomicBoolean(false);
+ s.getReplicas().forEach(r -> {
+ // find our ReplicaInfo for this replica
+ ReplicaInfo ri = getReplicaInfo(r);
+ if (ri == null) {
+ throw new IllegalStateException("-- could not find ReplicaInfo for replica " + r);
+ }
+ synchronized (ri) {
+ if (r.isActive(liveNodes.get())) {
+ if (ri.getVariables().get(ZkStateReader.LEADER_PROP) != null) {
+ log.trace("-- found existing leader {} / {}: {}, {}", collection, s.getName(), ri, r);
+ alreadyHasLeader.set(true);
+ return;
+ } else {
+ active.add(ri);
+ }
+ } else { // if it's on a node that is not live mark it down
+ log.trace("-- replica not active on live nodes: {}, {}", liveNodes.get(), r);
+ if (!liveNodes.contains(r.getNodeName())) {
+ ri.getVariables().put(ZkStateReader.STATE_PROP, Replica.State.DOWN.toString());
+ ri.getVariables().remove(ZkStateReader.LEADER_PROP);
+ stateChanged.set(true);
+ }
}
}
+ });
+ if (alreadyHasLeader.get()) {
+ log.trace("-- already has leader {} / {}: {}", collection, s.getName(), s);
+ return;
}
- });
- if (alreadyHasLeader.get()) {
- log.trace("-- already has leader {} / {}: {}", collection, s.getName(), s);
- return;
- }
- if (active.isEmpty()) {
- log.warn("Can't find any active replicas for {} / {}: {}", collection, s.getName(), s);
- log.debug("-- liveNodes: {}", liveNodes.get());
- return;
- }
- // pick first active one
- ReplicaInfo ri = null;
- for (ReplicaInfo a : active) {
- if (!a.getType().equals(Replica.Type.PULL)) {
- ri = a;
- break;
+ if (active.isEmpty()) {
+ log.warn("Can't find any active replicas for {} / {}: {}", collection, s.getName(), s);
+ log.debug("-- liveNodes: {}", liveNodes.get());
+ return;
}
+ // pick first active one
+ ReplicaInfo ri = null;
+ for (ReplicaInfo a : active) {
+ if (!a.getType().equals(Replica.Type.PULL)) {
+ ri = a;
+ break;
+ }
+ }
+ if (ri == null) {
+ log.warn("-- can't find any suitable replica type for {} / {}: {}", collection, s.getName(), s);
+ return;
+ }
+ // now mark the leader election throttle
+ lt.minimumWaitBetweenActions();
+ lt.markAttemptingAction();
+ synchronized (ri) {
+ ri.getVariables().put(ZkStateReader.LEADER_PROP, "true");
+ }
+ log.debug("-- elected new leader for {} / {} (currentVersion={}): {}", collection,
+ s.getName(), clusterStateVersion, ri);
+ stateChanged.set(true);
}
- if (ri == null) {
- log.warn("-- can't find any suitable replica type for {} / {}: {}", collection, s.getName(), s);
- return;
- }
- // now mark the leader election throttle
- lt.minimumWaitBetweenActions();
- lt.markAttemptingAction();
- synchronized (ri) {
- ri.getVariables().put(ZkStateReader.LEADER_PROP, "true");
- }
- log.debug("-- elected new leader for {} / {} (currentVersion={}): {}", collection,
- s.getName(), clusterStateVersion, ri);
- stateChanged.set(true);
+ } else {
+ log.trace("-- already has leader for {} / {}", collection, s.getName());
}
- } else {
- log.trace("-- already has leader for {} / {}", collection, s.getName());
- }
- if (stateChanged.get() || saveState) {
- collectionsStatesRef.set(null);
+ } finally {
+ if (stateChanged.get() || saveState) {
+ collectionsStatesRef.set(null);
+ }
+ lock.unlock();
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f89f109e/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimExtremeIndexing.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimExtremeIndexing.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimExtremeIndexing.java
index 6c5f9e6..15d676b 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimExtremeIndexing.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimExtremeIndexing.java
@@ -23,7 +23,6 @@ import java.util.concurrent.TimeUnit;
import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
-import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
@@ -52,7 +51,6 @@ import static org.apache.solr.cloud.autoscaling.AutoScalingHandlerTest.createAut
@TimeoutSuite(millis = 48 * 3600 * 1000)
@LogLevel("org.apache.solr.cloud.autoscaling=DEBUG;org.apache.solr.cloud.autoscaling.NodeLostTrigger=INFO;org.apache.client.solrj.cloud.autoscaling=DEBUG;org.apache.solr.cloud.autoscaling.ComputePlanAction=INFO;org.apache.solr.cloud.autoscaling.ExecutePlanAction=DEBUG;org.apache.solr.cloud.autoscaling.ScheduledTriggers=DEBUG")
//@LogLevel("org.apache.solr.cloud.autoscaling=DEBUG;org.apache.solr.cloud.autoscaling.NodeLostTrigger=INFO;org.apache.client.solrj.cloud.autoscaling=DEBUG;org.apache.solr.cloud.CloudTestUtils=TRACE")
-@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // this test can fail to elect a leader, seems to be common among sim tests
public class TestSimExtremeIndexing extends SimSolrCloudTestCase {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f89f109e/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimPolicyCloud.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimPolicyCloud.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimPolicyCloud.java
index e70cefb..eb251010 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimPolicyCloud.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimPolicyCloud.java
@@ -108,7 +108,6 @@ public class TestSimPolicyCloud extends SimSolrCloudTestCase {
}
- @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-12028")
public void testCreateCollectionAddReplica() throws Exception {
SolrClient solrClient = cluster.simGetSolrClient();
String nodeId = cluster.getSimClusterStateProvider().simGetRandomNode();
@@ -134,8 +133,7 @@ public class TestSimPolicyCloud extends SimSolrCloudTestCase {
getCollectionState(collectionName).forEachReplica((s, replica) -> assertEquals(nodeId, replica.getNodeName()));
}
-
- @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028")
+
public void testCreateCollectionSplitShard() throws Exception {
SolrClient solrClient = cluster.simGetSolrClient();
String firstNode = cluster.getSimClusterStateProvider().simGetRandomNode();
@@ -294,7 +292,7 @@ public class TestSimPolicyCloud extends SimSolrCloudTestCase {
assertEquals(3, coll.getSlice("s3").getReplicas().size());
coll.forEachReplica(verifyReplicas);
}
- @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 14-Oct-2018
+
public void testCreateCollectionAddShardUsingPolicy() throws Exception {
SolrClient solrClient = cluster.simGetSolrClient();
String nodeId = cluster.getSimClusterStateProvider().simGetRandomNode();