You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2012/07/24 07:06:01 UTC

svn commit: r1364890 - in /lucene/dev/branches/branch_4x: ./ solr/ solr/core/ solr/core/src/test/org/apache/solr/cloud/

Author: markrmiller
Date: Tue Jul 24 05:06:00 2012
New Revision: 1364890

URL: http://svn.apache.org/viewvc?rev=1364890&view=rev
Log:
improve zk tests vs blackhole

Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/solr/   (props changed)
    lucene/dev/branches/branch_4x/solr/core/   (props changed)
    lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java
    lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/FullSolrCloudTest.java
    lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/RecoveryZkTest.java
    lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/SyncSliceTest.java

Modified: lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java?rev=1364890&r1=1364889&r2=1364890&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java Tue Jul 24 05:06:00 2012
@@ -66,18 +66,15 @@ public class ChaosMonkey {
   private boolean expireSessions;
   private boolean causeConnectionLoss;
   private boolean aggressivelyKillLeaders;
-  private Map<String,SolrServer> shardToLeaderClient;
   private Map<String,CloudJettyRunner> shardToLeaderJetty;
   private long startTime;
   
   public ChaosMonkey(ZkTestServer zkServer, ZkStateReader zkStateReader,
       String collection, Map<String,List<CloudJettyRunner>> shardToJetty,
       Map<String,List<SolrServer>> shardToClient,
-      Map<String,SolrServer> shardToLeaderClient,
       Map<String,CloudJettyRunner> shardToLeaderJetty) {
     this.shardToJetty = shardToJetty;
     this.shardToClient = shardToClient;
-    this.shardToLeaderClient = shardToLeaderClient;
     this.shardToLeaderJetty = shardToLeaderJetty;
     this.zkServer = zkServer;
     this.zkStateReader = zkStateReader;
@@ -104,7 +101,7 @@ public class ChaosMonkey {
   public void expireRandomSession() throws KeeperException, InterruptedException {
     String sliceName = getRandomSlice();
     
-    JettySolrRunner jetty = getRandomJetty(sliceName, aggressivelyKillLeaders);
+    JettySolrRunner jetty = getRandomJetty(sliceName, aggressivelyKillLeaders).jetty;
     if (jetty != null) {
       expireSession(jetty);
       expires.incrementAndGet();
@@ -115,7 +112,7 @@ public class ChaosMonkey {
     monkeyLog("cause connection loss!");
     
     String sliceName = getRandomSlice();
-    JettySolrRunner jetty = getRandomJetty(sliceName, aggressivelyKillLeaders);
+    JettySolrRunner jetty = getRandomJetty(sliceName, aggressivelyKillLeaders).jetty;
     if (jetty != null) {
       causeConnectionLoss(jetty);
       connloss.incrementAndGet();
@@ -135,23 +132,29 @@ public class ChaosMonkey {
     }
   }
 
-  public JettySolrRunner stopShard(String slice, int index) throws Exception {
-    JettySolrRunner jetty = shardToJetty.get(slice).get(index).jetty;
-    stopJetty(jetty);
-    return jetty;
+  public CloudJettyRunner stopShard(String slice, int index) throws Exception {
+    CloudJettyRunner cjetty = shardToJetty.get(slice).get(index);
+    stopJetty(cjetty);
+    return cjetty;
   }
 
-  public void stopJetty(JettySolrRunner jetty) throws Exception {
-    stop(jetty);
+  public void stopJetty(CloudJettyRunner cjetty) throws Exception {
+    stop(cjetty.jetty);
     stops.incrementAndGet();
   }
 
-  public void killJetty(JettySolrRunner jetty) throws Exception {
-    kill(jetty);
+  public void killJetty(CloudJettyRunner cjetty) throws Exception {
+    kill(cjetty);
     stops.incrementAndGet();
   }
   
-  public static void stop(JettySolrRunner jetty) throws Exception {
+  public void stopJetty(JettySolrRunner jetty) throws Exception {
+    stops.incrementAndGet();
+    stopJettySolrRunner(jetty);
+  }
+  
+  private static void stopJettySolrRunner(JettySolrRunner jetty) throws Exception {
+    
     monkeyLog("stop shard! " + jetty.getLocalPort());
     // get a clean shutdown so that no dirs are left open...
     FilterHolder fh = jetty.getDispatchFilter();
@@ -168,7 +171,8 @@ public class ChaosMonkey {
     }
   }
   
-  public static void kill(JettySolrRunner jetty) throws Exception {
+  public static void kill(CloudJettyRunner cjetty) throws Exception {
+    JettySolrRunner jetty = cjetty.jetty;
     monkeyLog("kill shard! " + jetty.getLocalPort());
     FilterHolder fh = jetty.getDispatchFilter();
     SolrDispatchFilter sdf = null;
@@ -189,7 +193,7 @@ public class ChaosMonkey {
   public void stopShard(String slice) throws Exception {
     List<CloudJettyRunner> jetties = shardToJetty.get(slice);
     for (CloudJettyRunner jetty : jetties) {
-      stopJetty(jetty.jetty);
+      stopJetty(jetty);
     }
   }
   
@@ -197,7 +201,7 @@ public class ChaosMonkey {
     List<CloudJettyRunner> jetties = shardToJetty.get(slice);
     for (CloudJettyRunner jetty : jetties) {
       if (!jetty.nodeName.equals(shardName)) {
-        stopJetty(jetty.jetty);
+        stopJetty(jetty);
       }
     }
   }
@@ -207,22 +211,22 @@ public class ChaosMonkey {
     return jetty;
   }
   
-  public JettySolrRunner stopRandomShard() throws Exception {
+  public CloudJettyRunner stopRandomShard() throws Exception {
     String sliceName = getRandomSlice();
     
     return stopRandomShard(sliceName);
   }
   
-  public JettySolrRunner stopRandomShard(String slice) throws Exception {
-    JettySolrRunner jetty = getRandomJetty(slice, aggressivelyKillLeaders);
-    if (jetty != null) {
-      stopJetty(jetty);
+  public CloudJettyRunner stopRandomShard(String slice) throws Exception {
+    CloudJettyRunner cjetty = getRandomJetty(slice, aggressivelyKillLeaders);
+    if (cjetty != null) {
+      stopJetty(cjetty);
     }
-    return jetty;
+    return cjetty;
   }
   
   
-  public JettySolrRunner killRandomShard() throws Exception {
+  public CloudJettyRunner killRandomShard() throws Exception {
     // add all the shards to a list
     String sliceName = getRandomSlice();
     
@@ -238,15 +242,15 @@ public class ChaosMonkey {
     return sliceName;
   }
   
-  public JettySolrRunner killRandomShard(String slice) throws Exception {
-    JettySolrRunner jetty = getRandomJetty(slice, aggressivelyKillLeaders);
-    if (jetty != null) {
-      killJetty(jetty);
+  public CloudJettyRunner killRandomShard(String slice) throws Exception {
+    CloudJettyRunner cjetty = getRandomJetty(slice, aggressivelyKillLeaders);
+    if (cjetty != null) {
+      killJetty(cjetty);
     }
-    return jetty;
+    return cjetty;
   }
   
-  public JettySolrRunner getRandomJetty(String slice, boolean aggressivelyKillLeaders) throws KeeperException, InterruptedException {
+  public CloudJettyRunner getRandomJetty(String slice, boolean aggressivelyKillLeaders) throws KeeperException, InterruptedException {
     
 
     int numRunning = 0;
@@ -301,15 +305,15 @@ public class ChaosMonkey {
     }
     Random random = LuceneTestCase.random();
     int chance = random.nextInt(10);
-    JettySolrRunner jetty;
+    CloudJettyRunner cjetty;
     if (chance <= 5 && aggressivelyKillLeaders) {
       // if killLeader, really aggressively go after leaders
-      jetty = shardToLeaderJetty.get(slice).jetty;
+      cjetty = shardToLeaderJetty.get(slice);
     } else {
       // get random shard
       List<CloudJettyRunner> jetties = shardToJetty.get(slice);
       int index = random.nextInt(jetties.size());
-      jetty = jetties.get(index).jetty;
+      cjetty = jetties.get(index);
       
       ZkNodeProps leader = zkStateReader.getLeaderProps(collection, slice);
       boolean isLeader = leader.get(ZkStateReader.NODE_NAME_PROP).equals(jetties.get(index).nodeName);
@@ -320,15 +324,16 @@ public class ChaosMonkey {
       } 
     }
 
-    if (jetty.getLocalPort() == -1) {
+    if (cjetty.jetty.getLocalPort() == -1) {
       // we can't kill the dead
       monkeyLog("abort! This guy is already dead");
       return null;
     }
     
     //System.out.println("num active:" + numActive + " for " + slice + " sac:" + jetty.getLocalPort());
-    monkeyLog("chose a victim! " + jetty.getLocalPort());
-    return jetty;
+    monkeyLog("chose a victim! " + cjetty.jetty.getLocalPort());
+  
+    return cjetty;
   }
   
   public SolrServer getRandomClient(String slice) throws KeeperException, InterruptedException {
@@ -353,7 +358,7 @@ public class ChaosMonkey {
     
     stop = false;
     new Thread() {
-      private List<JettySolrRunner> deadPool = new ArrayList<JettySolrRunner>();
+      private List<CloudJettyRunner> deadPool = new ArrayList<CloudJettyRunner>();
 
       @Override
       public void run() {
@@ -364,25 +369,9 @@ public class ChaosMonkey {
             if (random.nextBoolean()) {
              if (!deadPool.isEmpty()) {
                int index = random.nextInt(deadPool.size());
-               JettySolrRunner jetty = deadPool.get(index);
-               try {
-                 jetty.start();
-               } catch (BindException e) {
-                 jetty.stop();
-                 sleep(2000);
-                 try {
-                   jetty.start();
-                 } catch (BindException e2) {
-                   jetty.stop();
-                   sleep(5000);
-                   try {
-                     jetty.start();
-                   } catch (BindException e3) {
-                     // we coud not get the port
-                     jetty.stop();
-                     continue;
-                   }
-                 }
+               JettySolrRunner jetty = deadPool.get(index).jetty;
+               if (!ChaosMonkey.start(jetty)) {
+                 continue;
                }
                //System.out.println("started on port:" + jetty.getLocalPort());
                deadPool.remove(index);
@@ -402,16 +391,16 @@ public class ChaosMonkey {
               randomConnectionLoss();
             }
             
-            JettySolrRunner jetty;
+            CloudJettyRunner cjetty;
             if (random.nextBoolean()) {
-              jetty = stopRandomShard();
+              cjetty = stopRandomShard();
             } else {
-              jetty = killRandomShard();
+              cjetty = killRandomShard();
             }
-            if (jetty == null) {
+            if (cjetty == null) {
               // we cannot kill
             } else {
-              deadPool.add(jetty);
+              deadPool.add(cjetty);
             }
             
           } catch (InterruptedException e) {
@@ -441,4 +430,31 @@ public class ChaosMonkey {
     return starts.get();
   }
 
+  public static void stop(JettySolrRunner jetty) throws Exception {
+    stopJettySolrRunner(jetty);
+  }
+  
+  public static boolean start(JettySolrRunner jetty) throws Exception {
+    try {
+      jetty.start();
+    } catch (BindException e) {
+      jetty.stop();
+      Thread.sleep(2000);
+      try {
+        jetty.start();
+      } catch (BindException e2) {
+        jetty.stop();
+        Thread.sleep(5000);
+        try {
+          jetty.start();
+        } catch (BindException e3) {
+          // we coud not get the port
+          jetty.stop();
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
 }
\ No newline at end of file

Modified: lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/FullSolrCloudTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/FullSolrCloudTest.java?rev=1364890&r1=1364889&r2=1364890&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/FullSolrCloudTest.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/FullSolrCloudTest.java Tue Jul 24 05:06:00 2012
@@ -109,28 +109,30 @@ public class FullSolrCloudTest extends A
   protected Map<String,SolrServer> shardToLeaderClient = new HashMap<String,SolrServer>();
   protected Map<String,CloudJettyRunner> shardToLeaderJetty = new HashMap<String,CloudJettyRunner>();
   
-  class CloudJettyRunner {
+  static class CloudJettyRunner {
     JettySolrRunner jetty;
     String nodeName;
     String coreNodeName;
     String url;
+    CloudSolrServerClient client;
   }
   
   static class CloudSolrServerClient {
-    SolrServer client;
+    SolrServer solrClient;
     String shardName;
+    int port;
     
     public CloudSolrServerClient() {}
     
     public CloudSolrServerClient(SolrServer client) {
-      this.client = client;
+      this.solrClient = client;
     }
     
     @Override
     public int hashCode() {
       final int prime = 31;
       int result = 1;
-      result = prime * result + ((client == null) ? 0 : client.hashCode());
+      result = prime * result + ((solrClient == null) ? 0 : solrClient.hashCode());
       return result;
     }
     
@@ -140,9 +142,9 @@ public class FullSolrCloudTest extends A
       if (obj == null) return false;
       if (getClass() != obj.getClass()) return false;
       CloudSolrServerClient other = (CloudSolrServerClient) obj;
-      if (client == null) {
-        if (other.client != null) return false;
-      } else if (!client.equals(other.client)) return false;
+      if (solrClient == null) {
+        if (other.solrClient != null) return false;
+      } else if (!solrClient.equals(other.solrClient)) return false;
       return true;
     }
     
@@ -189,7 +191,7 @@ public class FullSolrCloudTest extends A
       }
       
       chaosMonkey = new ChaosMonkey(zkServer, zkStateReader,
-          DEFAULT_COLLECTION, shardToJetty, shardToClient, shardToLeaderClient,
+          DEFAULT_COLLECTION, shardToJetty, shardToClient,
           shardToLeaderJetty);
     }
     
@@ -359,7 +361,8 @@ public class FullSolrCloudTest extends A
           
           if (shard.getKey().contains(":" + port + "_")) {
             CloudSolrServerClient csc = new CloudSolrServerClient();
-            csc.client = client;
+            csc.solrClient = client;
+            csc.port = port;
             csc.shardName = shard.getValue().get(ZkStateReader.NODE_NAME_PROP);
             boolean isLeader = shard.getValue().containsKey(
                 ZkStateReader.LEADER_PROP);
@@ -409,6 +412,7 @@ public class FullSolrCloudTest extends A
             cjr.nodeName = shard.getValue().get(ZkStateReader.NODE_NAME_PROP);
             cjr.coreNodeName = shard.getKey();
             cjr.url = shard.getValue().get(ZkStateReader.BASE_URL_PROP) + "/" + shard.getValue().get(ZkStateReader.CORE_NAME_PROP);
+            cjr.client = findClientByPort(port);
             list.add(cjr);
             if (isLeader) {
               shardToLeaderJetty.put(slice.getKey(), cjr);
@@ -431,6 +435,16 @@ public class FullSolrCloudTest extends A
     }
   }
   
+  private CloudSolrServerClient findClientByPort(int port) {
+    Set<CloudSolrServerClient> theClients = clientToInfo.keySet();
+    for (CloudSolrServerClient client : theClients) {
+      if (client.port == port) {
+        return client;
+      }
+    }
+    throw new IllegalArgumentException("Client with the give port does not exist:" + port);
+  }
+
   @Override
   protected void setDistributedParams(ModifiableSolrParams params) {
     
@@ -678,7 +692,7 @@ public class FullSolrCloudTest extends A
     query("q", "*:*", "sort", "n_tl1 desc");
     
     // kill a shard
-    JettySolrRunner deadShard = chaosMonkey.stopShard(SHARD2, 0);
+    CloudJettyRunner deadShard = chaosMonkey.stopShard(SHARD2, 0);
     cloudClient.connect();
     int tries = 0;
     while (cloudClient.getZkStateReader().getCloudState().liveNodesContain(clientToInfo.get(new CloudSolrServerClient(shardToClient.get(SHARD2).get(0))).get(ZkStateReader.NODE_NAME_PROP))) {
@@ -690,7 +704,7 @@ public class FullSolrCloudTest extends A
 	
     // ensure shard is dead
     try {
-      index_specific(shardToClient.get(SHARD2).get(0), id, 999, i1, 107, t1,
+      index_specific(deadShard.client.solrClient, id, 999, i1, 107, t1,
           "specific doc!");
       fail("This server should be down and this update should have failed");
     } catch (SolrServerException e) {
@@ -777,7 +791,7 @@ public class FullSolrCloudTest extends A
     // query("q","matchesnothing","fl","*,score", "debugQuery", "true");
     
     // this should trigger a recovery phase on deadShard
-    deadShard.start(true);
+    ChaosMonkey.start(deadShard.jetty);
     
     // make sure we have published we are recovering
     Thread.sleep(1500);
@@ -791,7 +805,7 @@ public class FullSolrCloudTest extends A
     
     
     // recover over 100 docs so we do more than just peer sync (replicate recovery)
-    deadShard = chaosMonkey.stopShard(SHARD2, 0);
+    chaosMonkey.stopJetty(deadShard);
     
     for (int i = 0; i < 226; i++) {
       doc = new SolrInputDocument();
@@ -804,7 +818,9 @@ public class FullSolrCloudTest extends A
     }
     commit();
     
-    deadShard.start(true);
+    Thread.sleep(1500);
+    
+    ChaosMonkey.start(deadShard.jetty);
     
     // make sure we have published we are recovering
     Thread.sleep(1500);
@@ -1182,7 +1198,7 @@ public class FullSolrCloudTest extends A
   private SolrServer getClient(String nodeName) {
     for (CloudSolrServerClient client : clientToInfo.keySet()) {
       if (client.shardName.equals(nodeName)) {
-        return client.client;
+        return client.solrClient;
       }
     }
     return null;
@@ -1233,7 +1249,8 @@ public class FullSolrCloudTest extends A
       long count = 0;
       String currentState = clientToInfo.get(new CloudSolrServerClient(client))
           .get(ZkStateReader.STATE_PROP);
-      if (currentState != null && currentState.equals(ZkStateReader.ACTIVE)) {
+      if (currentState != null && currentState.equals(ZkStateReader.ACTIVE) && zkStateReader.getCloudState().liveNodesContain(clientToInfo.get(new CloudSolrServerClient(client))
+          .get(ZkStateReader.NODE_NAME_PROP))) {
         SolrQuery query = new SolrQuery("*:*");
         query.set("distrib", false);
         count = client.query(query).getResults().getNumFound();

Modified: lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/RecoveryZkTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/RecoveryZkTest.java?rev=1364890&r1=1364889&r2=1364890&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/RecoveryZkTest.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/RecoveryZkTest.java Tue Jul 24 05:06:00 2012
@@ -72,7 +72,7 @@ public class RecoveryZkTest extends Full
     Thread.sleep(atLeast(2000));   
     
     // bring shard replica down
-    JettySolrRunner replica = chaosMonkey.stopShard("shard1", 1);
+    JettySolrRunner replica = chaosMonkey.stopShard("shard1", 1).jetty;
 
     
     // wait a moment - lets allow some docs to be indexed so replication time is non 0

Modified: lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/SyncSliceTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/SyncSliceTest.java?rev=1364890&r1=1364889&r2=1364890&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/SyncSliceTest.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/SyncSliceTest.java Tue Jul 24 05:06:00 2012
@@ -32,6 +32,7 @@ import org.apache.solr.client.solrj.embe
 import org.apache.solr.client.solrj.impl.HttpSolrServer;
 import org.apache.solr.client.solrj.request.QueryRequest;
 import org.apache.solr.client.solrj.request.UpdateRequest;
+import org.apache.solr.cloud.FullSolrCloudTest.CloudSolrServerClient;
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.solr.common.params.CollectionParams.CollectionAction;
@@ -143,8 +144,8 @@ public class SyncSliceTest extends FullS
         "to come to the aid of their country.");
     
     // kill the leader - new leader could have all the docs or be missing one
-    JettySolrRunner leaderJetty = shardToLeaderJetty.get("shard1").jetty;
-    SolrServer leaderClient = shardToLeaderClient.get("shard1");
+    CloudJettyRunner leaderJetty = shardToLeaderJetty.get("shard1");
+    CloudSolrServerClient leaderClient = leaderJetty.client;
     Set<JettySolrRunner> jetties = new HashSet<JettySolrRunner>();
     for (int i = 0; i < shardCount; i++) {
       jetties.add(shardToJetty.get("shard1").get(i).jetty);
@@ -158,7 +159,7 @@ public class SyncSliceTest extends FullS
     // because on some systems (especially freebsd w/ blackhole enabled), trying
     // to talk to a downed node causes grief
     int tries = 0;
-    while (((SolrDispatchFilter) upJetty.getDispatchFilter().getFilter()).getCores().getZkController().getZkStateReader().getCloudState().liveNodesContain(clientToInfo.get(new CloudSolrServerClient(leaderClient)).get(ZkStateReader.NODE_NAME_PROP))) {
+    while (((SolrDispatchFilter) upJetty.getDispatchFilter().getFilter()).getCores().getZkController().getZkStateReader().getCloudState().liveNodesContain(clientToInfo.get(leaderClient).get(ZkStateReader.NODE_NAME_PROP))) {
       if (tries++ == 120) {
         fail("Shard still reported as live in zk");
       }