You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2011/12/13 02:31:11 UTC

svn commit: r1213535 - in /lucene/dev/branches/solrcloud/solr/core/src: java/org/apache/solr/update/SolrCmdDistributor.java test/org/apache/solr/cloud/ChaosMonkey.java test/org/apache/solr/cloud/FullDistributedZkTest.java

Author: markrmiller
Date: Tue Dec 13 01:31:10 2011
New Revision: 1213535

URL: http://svn.apache.org/viewvc?rev=1213535&view=rev
Log:
factor chaos monkey out into its own class and start finishing the zk connection expiration test work

Added:
    lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java   (with props)
Modified:
    lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/update/SolrCmdDistributor.java
    lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullDistributedZkTest.java

Modified: lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/update/SolrCmdDistributor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/update/SolrCmdDistributor.java?rev=1213535&r1=1213534&r2=1213535&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/update/SolrCmdDistributor.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/update/SolrCmdDistributor.java Tue Dec 13 01:31:10 2011
@@ -391,8 +391,9 @@ public class SolrCmdDistributor {
             "interrupted waiting for shard update response", e);
       }
     }
-    //System.out.println("expected:" + expectedResponses + " failed:" + failed + " failedAfterConnect:" + failedAfterConnect);
-
+//    if (failed > 0) {
+//      System.out.println("expected:" + expectedResponses + " failed:" + failed + " failedAfterConnect:" + failedAfterConnect);
+//    }
     // TODO: this is a somewhat weak success guarantee - if the request was successful on every replica considered up
     // and that does not return a connect exception, it was successful.
     //should we optionally fail when there is only a single leader for a shard? (no replication)

Added: lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java?rev=1213535&view=auto
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java (added)
+++ lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java Tue Dec 13 01:31:10 2011
@@ -0,0 +1,134 @@
+package org.apache.solr.cloud;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+import org.apache.solr.client.solrj.embedded.JettySolrRunner;
+import org.apache.solr.cloud.FullDistributedZkTest.CloudJettyRunner;
+import org.apache.solr.common.cloud.Slice;
+import org.apache.solr.common.cloud.ZkNodeProps;
+import org.apache.solr.common.cloud.ZkStateReader;
+import org.apache.solr.servlet.SolrDispatchFilter;
+
+public class ChaosMonkey {
+
+  private Map<String,List<CloudJettyRunner>> shardToJetty;
+  private ZkTestServer zkServer;
+  private ZkStateReader zkStateReader;
+  private String collection;
+  private Random random;
+
+  public ChaosMonkey(ZkTestServer zkServer, ZkStateReader zkStateReader,
+      String collection, Map<String,List<CloudJettyRunner>> shardToJetty,
+      Random random) {
+    this.shardToJetty = shardToJetty;
+    this.zkServer = zkServer;
+    this.zkStateReader = zkStateReader;
+    this.collection = collection;
+    this.random = random;
+  }
+  
+  public void expireSession(CloudJettyRunner cloudJetty) {
+    SolrDispatchFilter solrDispatchFilter = (SolrDispatchFilter) cloudJetty.jetty.getDispatchFilter().getFilter();
+    long sessionId = solrDispatchFilter.getCores().getZkController().getZkClient().getSolrZooKeeper().getSessionId();
+    zkServer.expire(sessionId);
+  }
+  
+  public JettySolrRunner stopShard(String slice, int index) throws Exception {
+    JettySolrRunner jetty = shardToJetty.get(slice).get(index).jetty;
+    stopJetty(jetty);
+    return jetty;
+  }
+
+  private void stopJetty(JettySolrRunner jetty) throws Exception {
+    // get a clean shutdown so that no dirs are left open...
+    ((SolrDispatchFilter)jetty.getDispatchFilter().getFilter()).destroy();
+    jetty.stop();
+  }
+  
+  public void stopShard(String slice) throws Exception {
+    List<CloudJettyRunner> jetties = shardToJetty.get(slice);
+    for (CloudJettyRunner jetty : jetties) {
+      stopJetty(jetty.jetty);
+    }
+  }
+  
+  public void stopShardExcept(String slice, String shardName) throws Exception {
+    List<CloudJettyRunner> jetties = shardToJetty.get(slice);
+    for (CloudJettyRunner jetty : jetties) {
+      if (!jetty.shardName.equals(shardName)) {
+        stopJetty(jetty.jetty);
+      }
+    }
+  }
+  
+  public JettySolrRunner getShard(String slice, int index) throws Exception {
+    JettySolrRunner jetty = shardToJetty.get(slice).get(index).jetty;
+    return jetty;
+  }
+  
+  public JettySolrRunner stopRandomShard() throws Exception {
+    // add all the shards to a list
+//    CloudState clusterState = zk.getCloudState();
+//    for (String collection : collections)   {
+//    Slice theShards = zk.getCloudState().getSlices(collection);
+    return null;
+  }
+  
+  public JettySolrRunner stopRandomShard(String slice) throws Exception {
+    // get latest cloud state
+    zkStateReader.updateCloudState(true);
+    Slice theShards = zkStateReader.getCloudState().getSlices(collection)
+        .get(slice);
+    int numRunning = 0;
+    
+    for (CloudJettyRunner cloudJetty : shardToJetty.get(slice)) {
+      boolean running = true;
+      
+      ZkNodeProps props = theShards.getShards().get(cloudJetty.shardName);
+      String state = props.get(ZkStateReader.STATE_PROP);
+      String nodeName = props.get(ZkStateReader.NODE_NAME_PROP);
+      
+      if (!cloudJetty.jetty.isRunning()
+          || state.equals(ZkStateReader.RECOVERING)
+          || !zkStateReader.getCloudState().liveNodesContain(nodeName)) {
+        running = false;
+      }
+      
+      if (running) {
+        numRunning++;
+      }
+    }
+    
+    if (numRunning < 2) {
+      // we cannot kill anyone
+      return null;
+    }
+    
+    // kill random shard in shard2
+    List<CloudJettyRunner> jetties = shardToJetty.get(slice);
+    int index = random.nextInt(jetties.size() - 1);
+    JettySolrRunner jetty = jetties.get(index).jetty;
+    jetty.stop();
+    return jetty;
+  }
+
+}
\ No newline at end of file

Modified: lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullDistributedZkTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullDistributedZkTest.java?rev=1213535&r1=1213534&r2=1213535&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullDistributedZkTest.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullDistributedZkTest.java Tue Dec 13 01:31:10 2011
@@ -59,6 +59,8 @@ import org.junit.BeforeClass;
  */
 public class FullDistributedZkTest extends AbstractDistributedZkTestCase {
 
+  private static final String SHARD2 = "shard2";
+
   private static final String DEFAULT_COLLECTION = "collection1";
 
   String t1="a_t";
@@ -128,99 +130,7 @@ public class FullDistributedZkTest exten
 
   }
   
-  class ChaosMonkey {
-    private Map<String,List<CloudJettyRunner>> shardToJetty;
-
-    public ChaosMonkey(Map<String,List<CloudJettyRunner>> shardToJetty) {
-      this.shardToJetty = shardToJetty;
-    }
-    
-    public void expireSession(CloudJettyRunner cloudJetty) {
-      SolrDispatchFilter solrDispatchFilter = (SolrDispatchFilter) cloudJetty.jetty.getDispatchFilter().getFilter();
-      long sessionId = solrDispatchFilter.getCores().getZkController().getZkClient().getSolrZooKeeper().getSessionId();
-      zkServer.expire(sessionId);
-    }
-    
-    public JettySolrRunner stopShard(String slice, int index) throws Exception {
-      JettySolrRunner jetty = shardToJetty.get(slice).get(index).jetty;
-      stopJetty(jetty);
-      return jetty;
-    }
-
-    private void stopJetty(JettySolrRunner jetty) throws Exception {
-      // get a clean shutdown so that no dirs are left open...
-      ((SolrDispatchFilter)jetty.getDispatchFilter().getFilter()).destroy();
-      jetty.stop();
-    }
-    
-    public void stopShard(String slice) throws Exception {
-      List<CloudJettyRunner> jetties = shardToJetty.get(slice);
-      for (CloudJettyRunner jetty : jetties) {
-        stopJetty(jetty.jetty);
-      }
-    }
-    
-    public void stopShardExcept(String slice, String shardName) throws Exception {
-      List<CloudJettyRunner> jetties = shardToJetty.get(slice);
-      for (CloudJettyRunner jetty : jetties) {
-        if (!jetty.shardName.equals(shardName)) {
-          stopJetty(jetty.jetty);
-        }
-      }
-    }
-    
-    public JettySolrRunner getShard(String slice, int index) throws Exception {
-      JettySolrRunner jetty = shardToJetty.get(slice).get(index).jetty;
-      return jetty;
-    }
-    
-    public JettySolrRunner stopRandomShard() throws Exception {
-      // add all the shards to a list
-//      CloudState clusterState = zk.getCloudState();
-//      for (String collection : collections)   {
-//      Slice theShards = zk.getCloudState().getSlices(collection);
-      return null;
-    }
-    
-    public JettySolrRunner stopRandomShard(String slice) throws Exception {
-      // get latest cloud state
-      zkStateReader.updateCloudState(true);
-      Slice theShards = zkStateReader.getCloudState().getSlices(DEFAULT_COLLECTION)
-          .get(slice);
-      int numRunning = 0;
-      
-      for (CloudJettyRunner cloudJetty : shardToJetty.get(slice)) {
-        boolean running = true;
-        
-        ZkNodeProps props = theShards.getShards().get(cloudJetty.shardName);
-        String state = props.get(ZkStateReader.STATE_PROP);
-        String nodeName = props.get(ZkStateReader.NODE_NAME_PROP);
-        
-        if (!cloudJetty.jetty.isRunning()
-            || state.equals(ZkStateReader.RECOVERING)
-            || !zkStateReader.getCloudState().liveNodesContain(nodeName)) {
-          running = false;
-        }
-        
-        if (running) {
-          numRunning++;
-        }
-      }
-      
-      if (numRunning < 2) {
-        // we cannot kill anyone
-        return null;
-      }
-      
-      // kill random shard in shard2
-      List<CloudJettyRunner> jetties = shardToJetty.get(slice);
-      int index = random.nextInt(jetties.size() - 1);
-      JettySolrRunner jetty = jetties.get(index).jetty;
-      jetty.stop();
-      return jetty;
-    }
 
-  }
   
   @Before
   @Override
@@ -228,6 +138,7 @@ public class FullDistributedZkTest exten
     super.setUp();
     System.setProperty("numShards", Integer.toString(sliceCount));
     
+    chaosMonkey = new ChaosMonkey(zkServer, zkStateReader, DEFAULT_COLLECTION, shardToJetty, random);
   }
   
   @BeforeClass
@@ -254,8 +165,6 @@ public class FullDistributedZkTest exten
     // TODO: for now, turn off stress because it uses regular clients, and we 
     // need the cloud client because we kill servers
     stress = 0;
-    chaosMonkey = new ChaosMonkey(shardToJetty);
-
   }
   
   protected void initCloud() throws Exception {
@@ -433,21 +342,24 @@ public class FullDistributedZkTest exten
   @Override
   protected void indexDoc(SolrInputDocument doc) throws IOException, SolrServerException {
     controlClient.add(doc);
- 
-    boolean pick = random.nextBoolean();
-    
-    int which = (doc.getField(id).toString().hashCode() & 0x7fffffff) % sliceCount;
-    
-    if (pick && sliceCount > 1) {
-      which = which + ((shardCount / sliceCount) * random.nextInt(sliceCount-1));
-    }
+
+//    if we wanted to randomly pick a client - but sometimes they may be down...
     
-    CommonsHttpSolrServer client = (CommonsHttpSolrServer) clients.get(which);
+//    boolean pick = random.nextBoolean();
+//    
+//    int which = (doc.getField(id).toString().hashCode() & 0x7fffffff) % sliceCount;
+//    
+//    if (pick && sliceCount > 1) {
+//      which = which + ((shardCount / sliceCount) * random.nextInt(sliceCount-1));
+//    }
+//    
+//    CommonsHttpSolrServer client = (CommonsHttpSolrServer) clients.get(which);
 
+    
     UpdateRequest ureq = new UpdateRequest();
     ureq.add(doc);
     //ureq.setParam(UpdateParams.UPDATE_CHAIN, DISTRIB_UPDATE_CHAIN);
-    ureq.process(client);
+    ureq.process(cloudClient);
   }
   
   protected void index_specific(int serverNumber, Object... fields) throws Exception {
@@ -649,13 +561,12 @@ public class FullDistributedZkTest exten
     query("q", "*:*", "sort", "n_tl1 desc");
     
     // kill a shard
-    JettySolrRunner deadShard = chaosMonkey.stopShard("shard2", 0);
-    //JettySolrRunner deadShard2 = killShard("shard3", 1);
+    JettySolrRunner deadShard = chaosMonkey.stopShard(SHARD2, 0);
     
     // ensure shard is dead
     try {
       // TODO: ignore fail
-      index_specific(shardToClient.get("shard2").get(0), id, 999, i1, 107, t1,
+      index_specific(shardToClient.get(SHARD2).get(0), id, 999, i1, 107, t1,
         "specific doc!");
       fail("This server should be down and this update should have failed");
     } catch (SolrServerException e) {
@@ -663,15 +574,15 @@ public class FullDistributedZkTest exten
     }
     
     // try to index to a living shard at shard2
-    // TODO: this can fail!
-    index_specific(shardToClient.get("shard2").get(1), id, 1000, i1, 108, t1,
+    // TODO: this can fail with connection refused !????
+    index_specific(shardToClient.get(SHARD2).get(1), id, 1000, i1, 108, t1,
         "specific doc!");
 
     commit();
     
     query("q", "*:*", "sort", "n_tl1 desc");
     
-    // TMP: try adding a doc with CloudSolrServer
+    // try adding a doc with CloudSolrServer
     cloudClient.setDefaultCollection(DEFAULT_COLLECTION);
     SolrQuery query = new SolrQuery("*:*");
     query.add("distrib", "true");
@@ -728,7 +639,7 @@ public class FullDistributedZkTest exten
     
     waitForRecovery(deadShard);
     
-    List<SolrServer> s2c = shardToClient.get("shard2");
+    List<SolrServer> s2c = shardToClient.get(SHARD2);
 
     // if we properly recovered, we should now have the couple missing docs that
     // came in while shard was down
@@ -749,17 +660,11 @@ public class FullDistributedZkTest exten
       if (VERBOSE) System.out.println("total:" + client.query(new SolrQuery("*:*")).getResults().getNumFound());
     }
 
-    // assert the new server has the same number of docs as another server in
-    // that shard
-    // TODO: make a new call that checks each shard in slice has equal docs
-    assertEquals(shardToClient.get("shard1").get(0).query(new SolrQuery("*:*"))
-        .getResults().getNumFound(),
-        shardToClient.get("shard1").get(shardToClient.get("shard1").size() - 1)
-            .query(new SolrQuery("*:*")).getResults().getNumFound());
+    checkShardConsistency(SHARD2);
 
     assertDocCounts();
     
-    String docId = "99999999";
+    long docId = 99999999L;
     indexr("id", docId, t1, "originalcontent");
     
     commit();
@@ -786,7 +691,7 @@ public class FullDistributedZkTest exten
     
     UpdateRequest uReq = new UpdateRequest();
     //uReq.setParam(UpdateParams.UPDATE_CHAIN, DISTRIB_UPDATE_CHAIN);
-    uReq.deleteById(docId).process(clients.get(0));
+    uReq.deleteById(Long.toString(docId)).process(clients.get(0));
     
     commit();
     
@@ -806,19 +711,43 @@ public class FullDistributedZkTest exten
     
     // TODO: bring this to it's own method?
     // try indexing to a leader that has no replicas up
-    ZkNodeProps leaderProps = zkStateReader.getLeaderProps(DEFAULT_COLLECTION, "shard2");
+    ZkNodeProps leaderProps = zkStateReader.getLeaderProps(DEFAULT_COLLECTION, SHARD2);
     
     String nodeName = leaderProps.get(ZkStateReader.NODE_NAME_PROP);
-    chaosMonkey.stopShardExcept("shard2", nodeName);
+    chaosMonkey.stopShardExcept(SHARD2, nodeName);
     
     SolrServer client = getClient(nodeName);
     
     System.out.println("what happens here?");
     index_specific(client, "id", docId + 1, t1, "what happens here?");
+    
     // expire a session...
-    //CloudJettyRunner cloudJetty = shardToJetty.get("shard1").get(0);
-    //chaosMonkey.expireSession(cloudJetty);
+    CloudJettyRunner cloudJetty = shardToJetty.get("shard1").get(0);
+    chaosMonkey.expireSession(cloudJetty);
+    
+    indexr("id", docId + 1, t1, "slip this doc in");
+    
+    waitForRecovery(cloudJetty.jetty);
     
+    checkShardConsistency("shard1");
+    
+  }
+
+  private void checkShardConsistency(String shard) throws SolrServerException {
+    List<SolrServer> solrClients = shardToClient.get(shard);
+    long num = -1;
+    long lastNum = -1;
+    for (SolrServer client : solrClients) {
+      num = client.query(new SolrQuery("*:*")).getResults().getNumFound();
+      if (lastNum > -1 && lastNum != num) {
+        fail("shard is not consistent, expected:" + lastNum + " and got:" + num);
+      }
+      lastNum = num;
+    }
+    assertEquals(shardToClient.get("shard1").get(0).query(new SolrQuery("*:*"))
+        .getResults().getNumFound(),
+        shardToClient.get("shard1").get(shardToClient.get("shard1").size() - 1)
+            .query(new SolrQuery("*:*")).getResults().getNumFound());
   }
 
   private SolrServer getClient(String nodeName) {