You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2012/01/03 22:16:12 UTC

svn commit: r1226948 - in /lucene/dev/branches/solrcloud/solr: core/src/java/org/apache/solr/cloud/ core/src/test/org/apache/solr/cloud/ test-framework/src/java/org/apache/solr/

Author: markrmiller
Date: Tue Jan  3 21:16:12 2012
New Revision: 1226948

URL: http://svn.apache.org/viewvc?rev=1226948&view=rev
Log:
some tests tweaks and start towards resolving a nocommit - also add a nocommit

Modified:
    lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/ZkController.java
    lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java
    lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySolrCloudTest.java
    lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullSolrCloudTest.java
    lucene/dev/branches/solrcloud/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java

Modified: lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/ZkController.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/ZkController.java?rev=1226948&r1=1226947&r2=1226948&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/ZkController.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/ZkController.java Tue Jan  3 21:16:12 2012
@@ -158,10 +158,10 @@ public final class ZkController {
                   .getCurrentDescriptors();
               if (descriptors != null) {
                 for (CoreDescriptor descriptor : descriptors) {
-                  // nocommit: non reloaded cores will try and
-                  // recover - reloaded cores will not - but in the case
-                  // of reconnect like this, *everyone* should re register
-                  register(descriptor.getName(), descriptor);
+                  // nocommit: we need to think carefully about what happens when it was
+                  // a leader that was expired - as well as what to do about leaders/overseers
+                  // with connection loss
+                  register(descriptor.getName(), descriptor, true);
                 }
               }
 
@@ -420,6 +420,20 @@ public final class ZkController {
    * @throws Exception 
    */
   public String register(String coreName, final CoreDescriptor desc) throws Exception {  
+    return register(coreName, desc, false);
+  }
+  
+
+  /**
+   * Register shard with ZooKeeper.
+   * 
+   * @param coreName
+   * @param desc
+   * @param recoverReloadedCores
+   * @return
+   * @throws Exception
+   */
+  public String register(String coreName, final CoreDescriptor desc, boolean recoverReloadedCores) throws Exception {  
     final String baseUrl = getBaseUrl();
     
     final CloudDescriptor cloudDesc = desc.getCloudDescriptor();
@@ -512,7 +526,7 @@ public final class ZkController {
         if (cc != null) {
           core = cc.getCore(desc.getName());
           
-          if (core.isReloaded()) {
+          if (core.isReloaded() && !recoverReloadedCores) {
             doRecovery = false;
           }
           

Modified: lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java?rev=1226948&r1=1226947&r2=1226948&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java Tue Jan  3 21:16:12 2012
@@ -51,6 +51,7 @@ public class ChaosMonkey {
   private volatile boolean stop = false;
   private AtomicInteger stops = new AtomicInteger();
   private AtomicInteger starts = new AtomicInteger();
+  private AtomicInteger expires = new AtomicInteger();
   
   public ChaosMonkey(ZkTestServer zkServer, ZkStateReader zkStateReader,
       String collection, Map<String,List<CloudJettyRunner>> shardToJetty,
@@ -62,12 +63,24 @@ public class ChaosMonkey {
     this.random = random;
   }
   
-  public void expireSession(CloudJettyRunner cloudJetty) {
-    SolrDispatchFilter solrDispatchFilter = (SolrDispatchFilter) cloudJetty.jetty.getDispatchFilter().getFilter();
+  public void expireSession(JettySolrRunner jetty) {
+    SolrDispatchFilter solrDispatchFilter = (SolrDispatchFilter) jetty.getDispatchFilter().getFilter();
     long sessionId = solrDispatchFilter.getCores().getZkController().getZkClient().getSolrZooKeeper().getSessionId();
     zkServer.expire(sessionId);
   }
   
+  public void expireRandomSession() throws KeeperException, InterruptedException {
+    Map<String,Slice> slices = zkStateReader.getCloudState().getSlices(collection);
+    List<String> sliceKeyList = new ArrayList<String>(slices.size());
+    sliceKeyList.addAll(slices.keySet());
+    String sliceName = sliceKeyList.get(random.nextInt(sliceKeyList.size()));
+    
+    JettySolrRunner jetty = getRandomSacraficialShard(sliceName, DONTKILLLEADER);
+    if (jetty != null) {
+      expireSession(jetty);
+    }
+  }
+  
   public JettySolrRunner stopShard(String slice, int index) throws Exception {
     JettySolrRunner jetty = shardToJetty.get(slice).get(index).jetty;
     stopJetty(jetty);
@@ -100,8 +113,10 @@ public class ChaosMonkey {
         sdf.destroy();
       }
     }
-
-    jetty.stop();
+   
+    if (!jetty.isStopped()) {
+      jetty.stop();
+    }
     
     if (!jetty.isStopped()) {
       throw new RuntimeException("could not stop jetty");
@@ -109,7 +124,9 @@ public class ChaosMonkey {
   }
   
   public static void kill(JettySolrRunner jetty) throws Exception {
-    jetty.stop();
+    if (!jetty.isStopped()) {
+      jetty.stop();
+    }
     
     FilterHolder fh = jetty.getDispatchFilter();
     if (fh != null) {
@@ -157,7 +174,7 @@ public class ChaosMonkey {
   }
   
   public JettySolrRunner stopRandomShard(String slice) throws Exception {
-    JettySolrRunner jetty = getRandomSacraficialShard(slice);
+    JettySolrRunner jetty = getRandomSacraficialShard(slice, DONTKILLLEADER);
     if (jetty != null) {
       stopJetty(jetty);
     }
@@ -177,14 +194,14 @@ public class ChaosMonkey {
   }
   
   public JettySolrRunner killRandomShard(String slice) throws Exception {
-    JettySolrRunner jetty = getRandomSacraficialShard(slice);
+    JettySolrRunner jetty = getRandomSacraficialShard(slice, DONTKILLLEADER);
     if (jetty != null) {
       killJetty(jetty);
     }
     return jetty;
   }
   
-  public JettySolrRunner getRandomSacraficialShard(String slice) throws KeeperException, InterruptedException {
+  public JettySolrRunner getRandomSacraficialShard(String slice, boolean dontkillleader) throws KeeperException, InterruptedException {
     // get latest cloud state
     zkStateReader.updateCloudState(true);
     Slice theShards = zkStateReader.getCloudState().getSlices(collection)
@@ -237,12 +254,10 @@ public class ChaosMonkey {
     List<CloudJettyRunner> jetties = shardToJetty.get(slice);
     int index = random.nextInt(jetties.size() - 1);
     JettySolrRunner jetty = jetties.get(index).jetty;
-    System.out.println("sac shard "+ jetty.getLocalPort());
-    
     
     ZkNodeProps leader = zkStateReader.getLeaderProps(collection, slice);
     
-    if (DONTKILLLEADER && leader.get(ZkStateReader.NODE_NAME_PROP).equals(jetties.get(index).nodeName)) {
+    if (dontkillleader && leader.get(ZkStateReader.NODE_NAME_PROP).equals(jetties.get(index).nodeName)) {
       // we don't kill leaders...
       System.out.println("dont kill the leader");
       return null;
@@ -290,16 +305,23 @@ public class ChaosMonkey {
              }
             }
             
-            JettySolrRunner jetty;
-            if (random.nextBoolean()) {
-              jetty = stopRandomShard();
-            } else {
-              jetty = killRandomShard();
-            }
-            if (jetty == null) {
-              System.out.println("we cannot kill");
+            int rnd = random.nextInt(10);
+            // nocommit: we dont randomly expire yet
+            if (false && rnd < 2) {
+              expireRandomSession();
+              expires.incrementAndGet();
             } else {
-              deadPool.add(jetty);
+              JettySolrRunner jetty;
+              if (random.nextBoolean()) {
+                jetty = stopRandomShard();
+              } else {
+                jetty = killRandomShard();
+              }
+              if (jetty == null) {
+                System.out.println("we cannot kill");
+              } else {
+                deadPool.add(jetty);
+              }
             }
           } catch (InterruptedException e) {
             //
@@ -309,7 +331,8 @@ public class ChaosMonkey {
           }
         }
         
-        System.out.println("I stopped " + stops + " and I started " + starts);
+        System.out.println("I stopped " + stops + " and I started " + starts
+            + ". I also expired " + expires.get());
       }
     }.start();
   }

Modified: lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySolrCloudTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySolrCloudTest.java?rev=1226948&r1=1226947&r2=1226948&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySolrCloudTest.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySolrCloudTest.java Tue Jan  3 21:16:12 2012
@@ -83,9 +83,9 @@ public class ChaosMonkeySolrCloudTest ex
     
     
     // does not always pass yet
-    checkShardConsistency();
+    checkShardConsistency(true);
     
-    System.out.println("control docs:" + controlClient.query(new SolrQuery("*:*")).getResults().getNumFound() + "\n\n");
+    if (VERBOSE) System.out.println("control docs:" + controlClient.query(new SolrQuery("*:*")).getResults().getNumFound() + "\n\n");
   }
   
   @Override

Modified: lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullSolrCloudTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullSolrCloudTest.java?rev=1226948&r1=1226947&r2=1226948&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullSolrCloudTest.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullSolrCloudTest.java Tue Jan  3 21:16:12 2012
@@ -480,7 +480,7 @@ public class FullSolrCloudTest extends A
     
     // expire a session...
     CloudJettyRunner cloudJetty = shardToJetty.get("shard1").get(0);
-    chaosMonkey.expireSession(cloudJetty);
+    chaosMonkey.expireSession(cloudJetty.jetty);
     
     indexr("id", docId + 1, t1, "slip this doc in");
     
@@ -814,6 +814,10 @@ public class FullSolrCloudTest extends A
   }
 
   protected void checkShardConsistency(String shard) throws Exception {
+    checkShardConsistency(shard, false);
+  }
+  
+  protected void checkShardConsistency(String shard, boolean verbose) throws Exception {
     
     List<SolrServer> solrClients = shardToClient.get(shard);
     if (solrClients == null) {
@@ -822,21 +826,22 @@ public class FullSolrCloudTest extends A
     long num = -1;
     long lastNum = -1;
     String failMessage = null;
-    if (VERBOSE) System.out.println("check const of " + shard);
+    if (verbose) System.out.println("check const of " + shard);
     int cnt = 0;
     for (SolrServer client : solrClients) {
+      ZkNodeProps props = clientToInfo.get(new CloudSolrServerClient(client));
+      if (verbose) System.out.println("client" + cnt++);
+      if (verbose) System.out.println("PROPS:" + props);
+      
       try {
         num = client.query(new SolrQuery("*:*")).getResults().getNumFound();
       } catch (SolrServerException e) {
-        if (VERBOSE) System.err.println("error contacting client:" + e.getMessage());
+        if (verbose) System.out.println("error contacting client: " + e.getMessage() + "\n");
         continue;
       }
-      ZkNodeProps props = clientToInfo.get(new CloudSolrServerClient(client));
-      if (VERBOSE) System.out.println("client" + cnt++);
-      if (VERBOSE) System.out.println("PROPS:" + props);
-      
+   
       boolean recovering = props.get(ZkStateReader.STATE_PROP).equals(ZkStateReader.RECOVERING);
-      if (VERBOSE) System.out.println(" num:" + num + "\n" + (recovering ? "recovering" : ""));
+      if (verbose) System.out.println(" num:" + num + "\n" + (recovering ? "recovering" : ""));
       
       if (!recovering) {
         if (lastNum > -1 && lastNum != num && failMessage == null) {
@@ -854,14 +859,18 @@ public class FullSolrCloudTest extends A
   }
   
   protected void checkShardConsistency() throws Exception {
+    checkShardConsistency(false);
+  }
+  
+  protected void checkShardConsistency(boolean verbose) throws Exception {
     long docs = controlClient.query(new SolrQuery("*:*")).getResults().getNumFound();
-    if (VERBOSE) System.out.println("Control Docs:" + docs);
+    if (verbose) System.out.println("Control Docs:" + docs);
     
     updateMappingsFromZk(jettys, clients);
     
     Set<String> theShards = shardToClient.keySet();
     for (String shard : theShards) {
-      checkShardConsistency(shard);
+      checkShardConsistency(shard, verbose);
     }
     
     // now check that the right # are on each shard
@@ -873,7 +882,6 @@ public class FullSolrCloudTest extends A
         try {
           SolrServer client = shardToClient.get(s).get(i);
           ZkNodeProps props = clientToInfo.get(new CloudSolrServerClient(client));
-          if (VERBOSE) System.out.println("PROPS:" + props);
           boolean recovering = props.get(ZkStateReader.STATE_PROP).equals(ZkStateReader.RECOVERING);
           if (!recovering) {
             cnt += client.query(new SolrQuery("*:*")).getResults()
@@ -1078,7 +1086,13 @@ public class FullSolrCloudTest extends A
   
   protected void destroyServers() throws Exception {
     ChaosMonkey.stop(controlJetty);
-    for (JettySolrRunner jetty : jettys) ChaosMonkey.stop(jetty);
+    for (JettySolrRunner jetty : jettys) {
+      try {
+        ChaosMonkey.stop(jetty);
+      } catch (Exception e) {
+        log.error("", e);
+      }
+    }
     clients.clear();
     jettys.clear();
     Thread.sleep(10000);

Modified: lucene/dev/branches/solrcloud/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java?rev=1226948&r1=1226947&r2=1226948&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java (original)
+++ lucene/dev/branches/solrcloud/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java Tue Jan  3 21:16:12 2012
@@ -229,7 +229,7 @@ public abstract class BaseDistributedSea
     for (JettySolrRunner jetty : jettys) jetty.stop();
     clients.clear();
     jettys.clear();
-    Thread.sleep(10000);
+    Thread.sleep(5000);
   }
   
   public JettySolrRunner createJetty(File baseDir, String dataDir) throws Exception {