You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by tf...@apache.org on 2017/05/03 20:26:27 UTC

[3/4] lucene-solr:jira/solr-10233: Minor improvements to ChaosMonkey tests

Minor improvements to ChaosMonkey tests


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/688f12df
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/688f12df
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/688f12df

Branch: refs/heads/jira/solr-10233
Commit: 688f12df4bb2d6588f43a5a67c9752a116b888aa
Parents: e9f3b3a
Author: Tomas Fernandez Lobbe <tf...@apache.org>
Authored: Wed May 3 11:23:31 2017 -0700
Committer: Tomas Fernandez Lobbe <tf...@apache.org>
Committed: Wed May 3 11:23:31 2017 -0700

----------------------------------------------------------------------
 ...keyNothingIsSafeWithPassiveReplicasTest.java | 22 +++++++++++++++++---
 .../FullThrottleStoppableIndexingThread.java    | 12 ++++++-----
 .../java/org/apache/solr/SolrTestCaseJ4.java    | 17 ++++++++-------
 .../cloud/AbstractFullDistribZkTestBase.java    | 16 +++++++++-----
 .../java/org/apache/solr/cloud/ChaosMonkey.java |  2 +-
 5 files changed, 48 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/688f12df/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyNothingIsSafeWithPassiveReplicasTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyNothingIsSafeWithPassiveReplicasTest.java b/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyNothingIsSafeWithPassiveReplicasTest.java
index f75294b..db45392 100644
--- a/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyNothingIsSafeWithPassiveReplicasTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyNothingIsSafeWithPassiveReplicasTest.java
@@ -28,12 +28,14 @@ import org.apache.lucene.util.LuceneTestCase.Slow;
 import org.apache.solr.SolrTestCaseJ4.SuppressObjectReleaseTracker;
 import org.apache.solr.SolrTestCaseJ4.SuppressSSL;
 import org.apache.solr.client.solrj.SolrQuery;
+import org.apache.solr.client.solrj.embedded.JettySolrRunner;
 import org.apache.solr.client.solrj.impl.CloudSolrClient;
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.cloud.DocCollection;
 import org.apache.solr.common.cloud.Replica;
 import org.apache.solr.common.cloud.Slice;
 import org.apache.solr.common.cloud.ZkStateReader;
+import org.apache.solr.core.SolrCore;
 import org.apache.solr.util.TimeOut;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
@@ -97,8 +99,8 @@ public class ChaosMonkeyNothingIsSafeWithPassiveReplicasTest extends AbstractFul
   
   public ChaosMonkeyNothingIsSafeWithPassiveReplicasTest() {
     super();
-    numPassiveReplicas = random().nextInt(TEST_NIGHTLY ? 3 : 2) + 1;;
-    numRealtimeOrAppendReplicas = random().nextInt(TEST_NIGHTLY ? 3 : 2) + 1;;
+    numPassiveReplicas = random().nextInt(TEST_NIGHTLY ? 2 : 1) + 1;
+    numRealtimeOrAppendReplicas = random().nextInt(TEST_NIGHTLY ? 4 : 3) + 1;
     sliceCount = Integer.parseInt(System.getProperty("solr.tests.cloud.cm.slicecount", "-1"));
     if (sliceCount == -1) {
       sliceCount = random().nextInt(TEST_NIGHTLY ? 3 : 2) + 1;
@@ -211,7 +213,7 @@ public class ChaosMonkeyNothingIsSafeWithPassiveReplicasTest extends AbstractFul
       
       // try and wait for any replications and what not to finish...
       
-      Thread.sleep(2000);
+      ChaosMonkey.wait(2000, DEFAULT_COLLECTION, zkStateReader);
       
       // wait until there are no recoveries...
       waitForThingsToLevelOut(Integer.MAX_VALUE);//Math.round((runLength / 1000.0f / 3.0f)));
@@ -239,6 +241,7 @@ public class ChaosMonkeyNothingIsSafeWithPassiveReplicasTest extends AbstractFul
       }
       
       waitForReplicationFromReplicas(DEFAULT_COLLECTION, zkStateReader, new TimeOut(30, TimeUnit.SECONDS));
+      waitForAllWarmingSearchers();
       
       Set<String> addFails = getAddFails(indexTreads);
       Set<String> deleteFails = getDeleteFails(indexTreads);
@@ -285,10 +288,22 @@ public class ChaosMonkeyNothingIsSafeWithPassiveReplicasTest extends AbstractFul
     }
   }
 
+  private void waitForAllWarmingSearchers() throws InterruptedException {
+    for (JettySolrRunner jetty:jettys) {
+      if (!jetty.isRunning()) {
+        continue;
+      }
+      for (SolrCore core:jetty.getCoreContainer().getCores()) {
+        waitForWarming(core);
+      }
+    }
+  }
+
   private Set<String> getAddFails(List<StoppableIndexingThread> threads) {
     Set<String> addFails = new HashSet<String>();
     for (StoppableIndexingThread thread : threads)   {
       addFails.addAll(thread.getAddFails());
+//      addFails.addAll(thread.getAddFailsMinRf());
     }
     return addFails;
   }
@@ -297,6 +312,7 @@ public class ChaosMonkeyNothingIsSafeWithPassiveReplicasTest extends AbstractFul
     Set<String> deleteFails = new HashSet<String>();
     for (StoppableIndexingThread thread : threads)   {
       deleteFails.addAll(thread.getDeleteFails());
+//      deleteFails.addAll(thread.getDeleteFailsMinRf());
     }
     return deleteFails;
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/688f12df/solr/core/src/test/org/apache/solr/cloud/FullThrottleStoppableIndexingThread.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/FullThrottleStoppableIndexingThread.java b/solr/core/src/test/org/apache/solr/cloud/FullThrottleStoppableIndexingThread.java
index e12a75e..b9e177a 100644
--- a/solr/core/src/test/org/apache/solr/cloud/FullThrottleStoppableIndexingThread.java
+++ b/solr/core/src/test/org/apache/solr/cloud/FullThrottleStoppableIndexingThread.java
@@ -24,7 +24,9 @@ import java.util.concurrent.atomic.AtomicInteger;
 
 import org.apache.http.client.HttpClient;
 import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.lucene.util.LuceneTestCase;
 import org.apache.solr.client.solrj.SolrClient;
+import org.apache.solr.client.solrj.impl.CloudSolrClient;
 import org.apache.solr.client.solrj.impl.ConcurrentUpdateSolrClient;
 import org.apache.solr.client.solrj.impl.HttpClientUtil;
 import org.apache.solr.client.solrj.impl.HttpSolrClient;
@@ -46,7 +48,7 @@ class FullThrottleStoppableIndexingThread extends StoppableIndexingThread {
   private List<SolrClient> clients;
   private AtomicInteger fails = new AtomicInteger();
   
-  public FullThrottleStoppableIndexingThread(SolrClient controlClient, SolrClient cloudClient, List<SolrClient> clients,
+  public FullThrottleStoppableIndexingThread(SolrClient controlClient, CloudSolrClient cloudClient, List<SolrClient> clients,
                                              String id, boolean doDeletes, int clientSoTimeout) {
     super(controlClient, cloudClient, id, doDeletes);
     setName("FullThrottleStopableIndexingThread");
@@ -68,7 +70,7 @@ class FullThrottleStoppableIndexingThread extends StoppableIndexingThread {
       String id = this.id + "-" + i;
       ++i;
       
-      if (doDeletes && ChaosMonkeyNothingIsSafeWithPassiveReplicasTest.random().nextBoolean() && deletes.size() > 0) {
+      if (doDeletes && LuceneTestCase.random().nextBoolean() && deletes.size() > 0) {
         String delete = deletes.remove(0);
         try {
           numDeletes++;
@@ -81,7 +83,7 @@ class FullThrottleStoppableIndexingThread extends StoppableIndexingThread {
       
       try {
         numAdds++;
-        if (numAdds > (ChaosMonkeyNothingIsSafeWithPassiveReplicasTest.TEST_NIGHTLY ? 4002 : 197))
+        if (numAdds > (LuceneTestCase.TEST_NIGHTLY ? 4002 : 197))
           continue;
         SolrInputDocument doc = AbstractFullDistribZkTestBase.getDoc(
             "id",
@@ -96,13 +98,13 @@ class FullThrottleStoppableIndexingThread extends StoppableIndexingThread {
         fails.incrementAndGet();
       }
       
-      if (doDeletes && ChaosMonkeyNothingIsSafeWithPassiveReplicasTest.random().nextBoolean()) {
+      if (doDeletes && LuceneTestCase.random().nextBoolean()) {
         deletes.add(id);
       }
       
     }
 
-    ChaosMonkeyNothingIsSafeWithPassiveReplicasTest.log.info("FT added docs:" + numAdds + " with " + fails + " fails" + " deletes:" + numDeletes);
+    log.info("FT added docs:" + numAdds + " with " + fails + " fails" + " deletes:" + numDeletes);
   }
 
   private void changeUrlOnError(Exception e) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/688f12df/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java
----------------------------------------------------------------------
diff --git a/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java b/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java
index c94f24c..7e443bb 100644
--- a/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java
+++ b/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java
@@ -2423,24 +2423,27 @@ public abstract class SolrTestCaseJ4 extends LuceneTestCase {
     }
     return result;
   }
-
-  protected void waitForWarming() throws InterruptedException {
-    RefCounted<SolrIndexSearcher> registeredSearcher = h.getCore().getRegisteredSearcher();
-    RefCounted<SolrIndexSearcher> newestSearcher = h.getCore().getNewestSearcher(false);
-    ;
+  
+  protected static void waitForWarming(SolrCore core) throws InterruptedException {
+    RefCounted<SolrIndexSearcher> registeredSearcher = core.getRegisteredSearcher();
+    RefCounted<SolrIndexSearcher> newestSearcher = core.getNewestSearcher(false);
     while (registeredSearcher == null || registeredSearcher.get() != newestSearcher.get()) {
       if (registeredSearcher != null) {
         registeredSearcher.decref();
       }
       newestSearcher.decref();
       Thread.sleep(50);
-      registeredSearcher = h.getCore().getRegisteredSearcher();
-      newestSearcher = h.getCore().getNewestSearcher(false);
+      registeredSearcher = core.getRegisteredSearcher();
+      newestSearcher = core.getNewestSearcher(false);
     }
     registeredSearcher.decref();
     newestSearcher.decref();
   }
 
+  protected void waitForWarming() throws InterruptedException {
+    waitForWarming(h.getCore());
+  }
+
   @BeforeClass
   public static void chooseMPForMP() throws Exception {
     if (random().nextBoolean()) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/688f12df/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java
----------------------------------------------------------------------
diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java b/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java
index f48ac3a..4d36286 100644
--- a/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java
+++ b/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java
@@ -2032,6 +2032,7 @@ public abstract class AbstractFullDistribZkTestBase extends AbstractDistribZkTes
 
   protected void logReplicaTypesReplicationInfo(String collectionName, ZkStateReader zkStateReader) throws KeeperException, InterruptedException, IOException {
     log.info("## Collecting extra Replica.Type information of the cluster");
+    zkStateReader.updateLiveNodes();
     StringBuilder builder = new StringBuilder();
     zkStateReader.forceUpdateCollection(collectionName);
     DocCollection collection = zkStateReader.getClusterState().getCollection(collectionName);
@@ -2057,18 +2058,22 @@ public abstract class AbstractFullDistribZkTestBase extends AbstractDistribZkTes
     for(Slice s:collection.getSlices()) {
       Replica leader = s.getLeader();
       long leaderIndexVersion = -1;
-      while (leaderIndexVersion == -1 && !timeout.hasTimedOut()) {
+      while (!timeout.hasTimedOut()) {
         leaderIndexVersion = getIndexVersion(leader);
-        if (leaderIndexVersion < 0) {
-          Thread.sleep(1000);
+        if (leaderIndexVersion >= 0) {
+          break;
         }
+        Thread.sleep(1000);
+      }
+      if (timeout.hasTimedOut()) {
+        fail("Unable to get leader indexVersion");
       }
       for (Replica passiveReplica:s.getReplicas(EnumSet.of(Replica.Type.PASSIVE,Replica.Type.APPEND))) {
         if (!zkStateReader.getClusterState().liveNodesContain(passiveReplica.getNodeName())) {
           continue;
         }
         while (true) {
-          long replicaIndexVersion = getIndexVersion(passiveReplica);
+          long replicaIndexVersion = getIndexVersion(passiveReplica); 
           if (leaderIndexVersion == replicaIndexVersion) {
             log.debug("Leader replica's version ({}) in sync with replica({}): {} == {}", leader.getName(), passiveReplica.getName(), leaderIndexVersion, replicaIndexVersion);
             break;
@@ -2098,7 +2103,8 @@ public abstract class AbstractFullDistribZkTestBase extends AbstractDistribZkTes
         QueryResponse response = client.query(params);
         @SuppressWarnings("unchecked")
         List<NamedList<Object>> commits = (List<NamedList<Object>>)response.getResponse().get(ReplicationHandler.CMD_SHOW_COMMITS);
-        return (Long)commits.get(commits.size() - 1).get("indexVersion");
+        Collections.max(commits, (a,b)->((Long)a.get("indexVersion")).compareTo((Long)b.get("indexVersion")));
+        return (long) Collections.max(commits, (a,b)->((Long)a.get("indexVersion")).compareTo((Long)b.get("indexVersion"))).get("indexVersion");
       } catch (SolrServerException e) {
         log.warn("Exception getting version from {}, will return an invalid version to retry.", replica.getName(), e);
         return -1;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/688f12df/solr/test-framework/src/java/org/apache/solr/cloud/ChaosMonkey.java
----------------------------------------------------------------------
diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/ChaosMonkey.java b/solr/test-framework/src/java/org/apache/solr/cloud/ChaosMonkey.java
index 689f0ee..05a42ee 100644
--- a/solr/test-framework/src/java/org/apache/solr/cloud/ChaosMonkey.java
+++ b/solr/test-framework/src/java/org/apache/solr/cloud/ChaosMonkey.java
@@ -698,7 +698,7 @@ public class ChaosMonkey {
   public static void wait(long runLength, String collectionName, ZkStateReader zkStateReader) throws InterruptedException {
     TimeOut t = new TimeOut(runLength, TimeUnit.MILLISECONDS);
     while (!t.hasTimedOut()) {
-      Thread.sleep(Math.min(1000, runLength));
+      Thread.sleep(Math.min(1000, t.timeLeft(TimeUnit.MILLISECONDS)));
       logCollectionStateSummary(collectionName, zkStateReader);
     }
   }