You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by da...@apache.org on 2018/12/11 09:28:13 UTC

[44/47] lucene-solr:jira/http2: SOLR-13045: Harden TestSimPolicyCloud

SOLR-13045: Harden TestSimPolicyCloud

This commit fixes a race condition in SimClusterStateProvider, fixing
several fails in TestSimPolicyCloud.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/f89f109e
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/f89f109e
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/f89f109e

Branch: refs/heads/jira/http2
Commit: f89f109ec1d1b9a19034d17345d41d3dd2c34852
Parents: af6e15e
Author: Jason Gerlowski <ge...@apache.org>
Authored: Fri Dec 7 11:07:42 2018 -0500
Committer: Jason Gerlowski <ge...@apache.org>
Committed: Mon Dec 10 17:39:12 2018 -0500

----------------------------------------------------------------------
 .../cloud/autoscaling/sim/SimCloudManager.java  |   2 +-
 .../sim/SimClusterStateProvider.java            | 139 ++++++++++---------
 .../autoscaling/sim/TestSimExtremeIndexing.java |   2 -
 .../autoscaling/sim/TestSimPolicyCloud.java     |   6 +-
 4 files changed, 75 insertions(+), 74 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f89f109e/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java
index dad9463..fd622c1 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimCloudManager.java
@@ -838,7 +838,7 @@ public class SimCloudManager implements SolrCloudManager {
           results.add("success", "");
           break;
         case ADDROLE:
-          nodeStateProvider.simAddNodeValue(req.getParams().get("node"), "nodeRole", req.getParams().get("role"));
+          nodeStateProvider.simSetNodeValue(req.getParams().get("node"), "nodeRole", req.getParams().get("role"));
           break;
         case CREATESHARD:
           try {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f89f109e/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java
index d81d92c..6197c41 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java
@@ -691,79 +691,84 @@ public class SimClusterStateProvider implements ClusterStateProvider {
 
   private void simRunLeaderElection(String collection, Slice s, boolean saveState) throws Exception {
     AtomicBoolean stateChanged = new AtomicBoolean(Boolean.FALSE);
-    Replica leader = s.getLeader();
-    if (leader == null || !liveNodes.contains(leader.getNodeName())) {
-      log.trace("Running leader election for {} / {}", collection, s.getName());
-      if (s.getReplicas().isEmpty()) { // no replicas - punt
-        log.trace("-- no replicas in {} / {}", collection, s.getName());
-        return;
-      }
-      ActionThrottle lt = getThrottle(collection, s.getName());
-      synchronized (lt) {
-        // collect all active and live
-        List<ReplicaInfo> active = new ArrayList<>();
-        AtomicBoolean alreadyHasLeader = new AtomicBoolean(false);
-        s.getReplicas().forEach(r -> {
-          // find our ReplicaInfo for this replica
-          ReplicaInfo ri = getReplicaInfo(r);
-          if (ri == null) {
-            throw new IllegalStateException("-- could not find ReplicaInfo for replica " + r);
-          }
-          synchronized (ri) {
-            if (r.isActive(liveNodes.get())) {
-              if (ri.getVariables().get(ZkStateReader.LEADER_PROP) != null) {
-                log.trace("-- found existing leader {} / {}: {}, {}", collection, s.getName(), ri, r);
-                alreadyHasLeader.set(true);
-                return;
-              } else {
-                active.add(ri);
-              }
-            } else { // if it's on a node that is not live mark it down
-              log.trace("-- replica not active on live nodes: {}, {}", liveNodes.get(), r);
-              if (!liveNodes.contains(r.getNodeName())) {
-                ri.getVariables().put(ZkStateReader.STATE_PROP, Replica.State.DOWN.toString());
-                ri.getVariables().remove(ZkStateReader.LEADER_PROP);
-                stateChanged.set(true);
+    lock.lockInterruptibly();
+    try {
+      Replica leader = s.getLeader();
+      if (leader == null || !liveNodes.contains(leader.getNodeName())) {
+        log.trace("Running leader election for {} / {}", collection, s.getName());
+        if (s.getReplicas().isEmpty()) { // no replicas - punt
+          log.trace("-- no replicas in {} / {}", collection, s.getName());
+          return;
+        }
+        ActionThrottle lt = getThrottle(collection, s.getName());
+        synchronized (lt) {
+          // collect all active and live
+          List<ReplicaInfo> active = new ArrayList<>();
+          AtomicBoolean alreadyHasLeader = new AtomicBoolean(false);
+          s.getReplicas().forEach(r -> {
+            // find our ReplicaInfo for this replica
+            ReplicaInfo ri = getReplicaInfo(r);
+            if (ri == null) {
+              throw new IllegalStateException("-- could not find ReplicaInfo for replica " + r);
+            }
+            synchronized (ri) {
+              if (r.isActive(liveNodes.get())) {
+                if (ri.getVariables().get(ZkStateReader.LEADER_PROP) != null) {
+                  log.trace("-- found existing leader {} / {}: {}, {}", collection, s.getName(), ri, r);
+                  alreadyHasLeader.set(true);
+                  return;
+                } else {
+                  active.add(ri);
+                }
+              } else { // if it's on a node that is not live mark it down
+                log.trace("-- replica not active on live nodes: {}, {}", liveNodes.get(), r);
+                if (!liveNodes.contains(r.getNodeName())) {
+                  ri.getVariables().put(ZkStateReader.STATE_PROP, Replica.State.DOWN.toString());
+                  ri.getVariables().remove(ZkStateReader.LEADER_PROP);
+                  stateChanged.set(true);
+                }
               }
             }
+          });
+          if (alreadyHasLeader.get()) {
+            log.trace("-- already has leader {} / {}: {}", collection, s.getName(), s);
+            return;
           }
-        });
-        if (alreadyHasLeader.get()) {
-          log.trace("-- already has leader {} / {}: {}", collection, s.getName(), s);
-          return;
-        }
-        if (active.isEmpty()) {
-          log.warn("Can't find any active replicas for {} / {}: {}", collection, s.getName(), s);
-          log.debug("-- liveNodes: {}", liveNodes.get());
-          return;
-        }
-        // pick first active one
-        ReplicaInfo ri = null;
-        for (ReplicaInfo a : active) {
-          if (!a.getType().equals(Replica.Type.PULL)) {
-            ri = a;
-            break;
+          if (active.isEmpty()) {
+            log.warn("Can't find any active replicas for {} / {}: {}", collection, s.getName(), s);
+            log.debug("-- liveNodes: {}", liveNodes.get());
+            return;
           }
+          // pick first active one
+          ReplicaInfo ri = null;
+          for (ReplicaInfo a : active) {
+            if (!a.getType().equals(Replica.Type.PULL)) {
+              ri = a;
+              break;
+            }
+          }
+          if (ri == null) {
+            log.warn("-- can't find any suitable replica type for {} / {}: {}", collection, s.getName(), s);
+            return;
+          }
+          // now mark the leader election throttle
+          lt.minimumWaitBetweenActions();
+          lt.markAttemptingAction();
+          synchronized (ri) {
+            ri.getVariables().put(ZkStateReader.LEADER_PROP, "true");
+          }
+          log.debug("-- elected new leader for {} / {} (currentVersion={}): {}", collection,
+              s.getName(), clusterStateVersion, ri);
+          stateChanged.set(true);
         }
-        if (ri == null) {
-          log.warn("-- can't find any suitable replica type for {} / {}: {}", collection, s.getName(), s);
-          return;
-        }
-        // now mark the leader election throttle
-        lt.minimumWaitBetweenActions();
-        lt.markAttemptingAction();
-        synchronized (ri) {
-          ri.getVariables().put(ZkStateReader.LEADER_PROP, "true");
-        }
-        log.debug("-- elected new leader for {} / {} (currentVersion={}): {}", collection,
-            s.getName(), clusterStateVersion, ri);
-        stateChanged.set(true);
+      } else {
+        log.trace("-- already has leader for {} / {}", collection, s.getName());
       }
-    } else {
-      log.trace("-- already has leader for {} / {}", collection, s.getName());
-    }
-    if (stateChanged.get() || saveState) {
-      collectionsStatesRef.set(null);
+    } finally {
+      if (stateChanged.get() || saveState) {
+        collectionsStatesRef.set(null);
+      }
+      lock.unlock();
     }
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f89f109e/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimExtremeIndexing.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimExtremeIndexing.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimExtremeIndexing.java
index 6c5f9e6..15d676b 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimExtremeIndexing.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimExtremeIndexing.java
@@ -23,7 +23,6 @@ import java.util.concurrent.TimeUnit;
 
 import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
 
-import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
 import org.apache.solr.client.solrj.SolrClient;
 import org.apache.solr.client.solrj.SolrRequest;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
@@ -52,7 +51,6 @@ import static org.apache.solr.cloud.autoscaling.AutoScalingHandlerTest.createAut
 @TimeoutSuite(millis = 48 * 3600 * 1000)
 @LogLevel("org.apache.solr.cloud.autoscaling=DEBUG;org.apache.solr.cloud.autoscaling.NodeLostTrigger=INFO;org.apache.client.solrj.cloud.autoscaling=DEBUG;org.apache.solr.cloud.autoscaling.ComputePlanAction=INFO;org.apache.solr.cloud.autoscaling.ExecutePlanAction=DEBUG;org.apache.solr.cloud.autoscaling.ScheduledTriggers=DEBUG")
 //@LogLevel("org.apache.solr.cloud.autoscaling=DEBUG;org.apache.solr.cloud.autoscaling.NodeLostTrigger=INFO;org.apache.client.solrj.cloud.autoscaling=DEBUG;org.apache.solr.cloud.CloudTestUtils=TRACE")
-@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // this test can fail to elect a leader, seems to be common among sim tests
 public class TestSimExtremeIndexing extends SimSolrCloudTestCase {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f89f109e/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimPolicyCloud.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimPolicyCloud.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimPolicyCloud.java
index e70cefb..eb251010 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimPolicyCloud.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimPolicyCloud.java
@@ -108,7 +108,6 @@ public class TestSimPolicyCloud extends SimSolrCloudTestCase {
 
   }
 
-  @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-12028")
   public void testCreateCollectionAddReplica() throws Exception  {
     SolrClient solrClient = cluster.simGetSolrClient();
     String nodeId = cluster.getSimClusterStateProvider().simGetRandomNode();
@@ -134,8 +133,7 @@ public class TestSimPolicyCloud extends SimSolrCloudTestCase {
 
     getCollectionState(collectionName).forEachReplica((s, replica) -> assertEquals(nodeId, replica.getNodeName()));
   }
-  
-  @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028")
+
   public void testCreateCollectionSplitShard() throws Exception  {
     SolrClient solrClient = cluster.simGetSolrClient();
     String firstNode = cluster.getSimClusterStateProvider().simGetRandomNode();
@@ -294,7 +292,7 @@ public class TestSimPolicyCloud extends SimSolrCloudTestCase {
     assertEquals(3, coll.getSlice("s3").getReplicas().size());
     coll.forEachReplica(verifyReplicas);
   }
-  @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 14-Oct-2018
+  
   public void testCreateCollectionAddShardUsingPolicy() throws Exception {
     SolrClient solrClient = cluster.simGetSolrClient();
     String nodeId = cluster.getSimClusterStateProvider().simGetRandomNode();