You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2011/11/25 19:36:09 UTC

svn commit: r1206278 - in /lucene/dev/branches/solrcloud/solr: core/src/java/org/apache/solr/client/solrj/embedded/ core/src/java/org/apache/solr/cloud/ core/src/java/org/apache/solr/servlet/ core/src/test/org/apache/solr/cloud/ solrj/src/java/org/apac...

Author: markrmiller
Date: Fri Nov 25 18:36:07 2011
New Revision: 1206278

URL: http://svn.apache.org/viewvc?rev=1206278&view=rev
Log:
some work on test infra for the feature and other minor forward progression

Added:
    lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonekyDistributedZkTest.java   (with props)
Modified:
    lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java
    lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/ZkController.java
    lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/servlet/SolrDispatchFilter.java
    lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullDistributedZkTest.java
    lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ZkTestServer.java
    lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/ConnectionManager.java
    lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java
    lucene/dev/branches/solrcloud/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java

Modified: lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java?rev=1206278&r1=1206277&r2=1206278&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java Fri Nov 25 18:36:07 2011
@@ -127,6 +127,14 @@ public class JettySolrRunner {
 
   }
 
+  public FilterHolder getDispatchFilter() {
+    return dispatchFilter;
+  }
+
+  public boolean isRunning() {
+    return server.isRunning();
+  }
+
   // ------------------------------------------------------------------------------------------------
   // ------------------------------------------------------------------------------------------------
 

Modified: lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/ZkController.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/ZkController.java?rev=1206278&r1=1206277&r2=1206278&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/ZkController.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/ZkController.java Fri Nov 25 18:36:07 2011
@@ -157,6 +157,8 @@ public final class ZkController {
                   .getCurrentDescriptors();
               if (descriptors != null) {
                 for (CoreDescriptor descriptor : descriptors) {
+                  // nocommit: non reloaded cores will try and
+                  // recover - reloaded cores will not
                   register(descriptor.getName(), descriptor);
                 }
               }
@@ -467,7 +469,7 @@ public final class ZkController {
           core.close();
         }
       } else {
-        log.warn("Cannot recover without access to CoreConatiner");
+        log.warn("Cannot recover without access to CoreContainer");
         return shardId;
       }
 

Modified: lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/servlet/SolrDispatchFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/servlet/SolrDispatchFilter.java?rev=1206278&r1=1206277&r2=1206278&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/servlet/SolrDispatchFilter.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/servlet/SolrDispatchFilter.java Fri Nov 25 18:36:07 2011
@@ -63,6 +63,7 @@ public class SolrDispatchFilter implemen
   final Logger log = LoggerFactory.getLogger(SolrDispatchFilter.class);
 
   protected CoreContainer cores;
+
   protected String pathPrefix = null; // strip this from the beginning of a path
   protected String abortErrorMessage = null;
   protected String solrConfigFilename = null;
@@ -124,6 +125,10 @@ public class SolrDispatchFilter implemen
 
     log.info("SolrDispatchFilter.init() done");
   }
+  
+  public CoreContainer getCores() {
+    return cores;
+  }
 
   /** Method to override to change how CoreContainer initialization is performed. */
   protected CoreContainer.Initializer createInitializer() {

Added: lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonekyDistributedZkTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonekyDistributedZkTest.java?rev=1206278&view=auto
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonekyDistributedZkTest.java (added)
+++ lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonekyDistributedZkTest.java Fri Nov 25 18:36:07 2011
@@ -0,0 +1,61 @@
+package org.apache.solr.cloud;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.junit.BeforeClass;
+
+/**
+ *
+ */
+public class ChaosMonekyDistributedZkTest extends FullDistributedZkTest {
+  
+  
+  @BeforeClass
+  public static void beforeSuperClass() throws Exception {
+    
+  }
+  
+  public ChaosMonekyDistributedZkTest() {
+    super();
+  }
+  
+  @Override
+  public void doTest() throws Exception {
+    initCloud();
+    
+    handle.clear();
+    handle.put("QTime", SKIPVAL);
+    handle.put("timestamp", SKIPVAL);
+    
+    del("*:*");
+    
+    indexr(id, 1, i1, 100, tlong, 100, t1, "now is the time for all good men",
+        "foo_f", 1.414f, "foo_b", "true", "foo_d", 1.414d);
+    
+    commit();
+    
+    // these queries should be exactly ordered and scores should exactly match
+    query("q", "*:*", "sort", i1 + " desc");
+  }
+  
+  @Override
+  public void tearDown() throws Exception {
+    super.tearDown();
+  }
+
+}

Modified: lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullDistributedZkTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullDistributedZkTest.java?rev=1206278&r1=1206277&r2=1206278&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullDistributedZkTest.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullDistributedZkTest.java Fri Nov 25 18:36:07 2011
@@ -42,7 +42,9 @@ import org.apache.solr.common.cloud.ZkNo
 import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.servlet.SolrDispatchFilter;
 import org.apache.zookeeper.KeeperException;
+import org.junit.Before;
 import org.junit.BeforeClass;
 
 /**
@@ -74,8 +76,94 @@ public class FullDistributedZkTest exten
   
   protected Map<SolrServer,ZkNodeProps> clientToInfo = new HashMap<SolrServer,ZkNodeProps>();
   protected Map<String,List<SolrServer>> shardToClient = new HashMap<String,List<SolrServer>>();
-  protected Map<String,List<JettySolrRunner>> shardToJetty = new HashMap<String,List<JettySolrRunner>>();
+  protected Map<String,List<CloudJettyRunner>> shardToJetty = new HashMap<String,List<CloudJettyRunner>>();
   private AtomicInteger i = new AtomicInteger(0);
+  private ChaosMonkey chaosMonkey;
+  private volatile ZkStateReader zk;
+  
+  class CloudJettyRunner {
+    JettySolrRunner jetty;
+    String shardName;
+  }
+  
+  class ChaosMonkey {
+    private Map<String,List<CloudJettyRunner>> shardToJetty;
+
+    public ChaosMonkey(Map<String,List<CloudJettyRunner>> shardToJetty) {
+      this.shardToJetty = shardToJetty;
+    }
+    
+    public void expireSession(CloudJettyRunner cloudJetty) {
+      SolrDispatchFilter solrDispatchFilter = (SolrDispatchFilter) cloudJetty.jetty.getDispatchFilter().getFilter();
+      long sessionId = solrDispatchFilter.getCores().getZkController().getZkClient().getSolrZooKeeper().getSessionId();
+      zkServer.expire(sessionId);
+    }
+    
+    private JettySolrRunner killShard(String slice, int index) throws Exception {
+      // kill
+      System.out.println(" KILL:" + shardToClient);
+      System.out.println(shardToJetty.get(slice));
+      
+      // kill first shard in shard2
+      JettySolrRunner jetty = shardToJetty.get(slice).get(index).jetty;
+      jetty.stop();
+      return jetty;
+    }
+    
+    private JettySolrRunner killRandomShard() throws Exception {
+      // add all the shards to a list
+//      CloudState clusterState = zk.getCloudState();
+//      for (String collection : collections)   {
+//      Slice theShards = zk.getCloudState().getSlices(collection);
+      return null;
+    }
+    
+    private JettySolrRunner killRandomShard(String slice) throws Exception {
+      // get latest cloud state
+      zk.updateCloudState(true);
+      Slice theShards = zk.getCloudState().getSlices(DEFAULT_COLLECTION)
+          .get(slice);
+      int numRunning = 0;
+      
+      for (CloudJettyRunner cloudJetty : shardToJetty.get(slice)) {
+        boolean running = true;
+        
+        ZkNodeProps props = theShards.getShards().get(cloudJetty.shardName);
+        String state = props.get(ZkStateReader.STATE_PROP);
+        String nodeName = props.get(ZkStateReader.NODE_NAME_PROP);
+        
+        if (!cloudJetty.jetty.isRunning()
+            || state.equals(ZkStateReader.RECOVERING)
+            || !zk.getCloudState().liveNodesContain(nodeName)) {
+          running = false;
+        }
+        
+        if (running) {
+          numRunning++;
+        }
+      }
+      
+      if (numRunning < 2) {
+        // we cannot kill anyone
+        return null;
+      }
+      
+      // kill random shard in shard2
+      List<CloudJettyRunner> jetties = shardToJetty.get(slice);
+      int index = random.nextInt(jetties.size() - 1);
+      JettySolrRunner jetty = jetties.get(index).jetty;
+      jetty.stop();
+      return jetty;
+    }
+  }
+  
+  @Before
+  @Override
+  public void setUp() throws Exception {
+    super.setUp();
+    zk = new ZkStateReader(zkServer.getZkAddress(), 10000,
+        AbstractZkTestCase.TIMEOUT);
+  }
   
   @BeforeClass
   public static void beforeClass() throws Exception {
@@ -91,9 +179,11 @@ public class FullDistributedZkTest exten
     // TODO: for now, turn off stress because it uses regular clients, and we 
     // need the cloud client because we kill servers
     stress = 0;
+    chaosMonkey = new ChaosMonkey(shardToJetty);
   }
   
-  private void initCloudClient() {
+  protected void initCloud() throws Exception {
+    
     // use the distributed solrj client
     if (cloudClient == null) {
       synchronized(this) {
@@ -132,15 +222,31 @@ public class FullDistributedZkTest exten
       clients.add(client);
     }
     
+    updateMappingsFromZk(jettys, clients);
+    
+    this.jettys.addAll(jettys);
+    this.clients.addAll(clients);
+    // build the shard string
+    for (int i = 1; i <= numJettys/2; i++) {
+      JettySolrRunner j = this.jettys.get(i);
+      JettySolrRunner j2 = this.jettys.get(i + (numJettys/2 - 1));
+      if (sb.length() > 0) sb.append(',');
+      sb.append("localhost:").append(j.getLocalPort()).append(context);
+      sb.append("|localhost:").append(j2.getLocalPort()).append(context);
+    }
+    shards = sb.toString();
+  }
+
+  private void updateMappingsFromZk(List<JettySolrRunner> jettys,
+      List<SolrServer> clients) throws InterruptedException, TimeoutException,
+      IOException, KeeperException, URISyntaxException {
     for (SolrServer client : clients) {
       // find info for this client in zk
-      ZkStateReader zk = new ZkStateReader(zkServer.getZkAddress(), 10000,
-          AbstractZkTestCase.TIMEOUT);
-      zk.createClusterStateWatchersAndUpdate();
-      
+
+      zk.updateCloudState(true);
       Map<String,Slice> slices = zk.getCloudState().getSlices(
           DEFAULT_COLLECTION);
-      zk.updateCloudState(true);
+
       
       for (Map.Entry<String,Slice> slice : slices.entrySet()) {
         Map<String,ZkNodeProps> theShards = slice.getValue().getShards();
@@ -185,29 +291,20 @@ public class FullDistributedZkTest exten
 //            System.out.println("shard:" + slice.getKey());
 //            System.out.println(shard.getValue());
             
-            List<JettySolrRunner> list = shardToJetty.get(slice.getKey());
+            List<CloudJettyRunner> list = shardToJetty.get(slice.getKey());
             if (list == null) {
-              list = new ArrayList<JettySolrRunner>();
+              list = new ArrayList<CloudJettyRunner>();
               shardToJetty.put(slice.getKey(), list);
             }
-            list.add(jetty);
+            CloudJettyRunner cjr = new CloudJettyRunner();
+            cjr.jetty = jetty;
+            cjr.shardName = shardName;
+            list.add(cjr);
           }
         }
       }
       
     }
-    
-    this.jettys.addAll(jettys);
-    this.clients.addAll(clients);
-    // build the shard string
-    for (int i = 1; i <= numJettys/2; i++) {
-      JettySolrRunner j = this.jettys.get(i);
-      JettySolrRunner j2 = this.jettys.get(i + (numJettys/2 - 1));
-      if (sb.length() > 0) sb.append(',');
-      sb.append("localhost:").append(j.getLocalPort()).append(context);
-      sb.append("|localhost:").append(j2.getLocalPort()).append(context);
-    }
-    shards = sb.toString();
   }
   
   @Override
@@ -295,7 +392,7 @@ public class FullDistributedZkTest exten
    */
   @Override
   public void doTest() throws Exception {
-    initCloudClient();
+    initCloud();
     
     handle.clear();
     handle.put("QTime", SKIPVAL);
@@ -450,7 +547,7 @@ public class FullDistributedZkTest exten
     query("q", "*:*", "sort", "n_tl1 desc");
     
     // kill a shard
-    JettySolrRunner deadShard = killShard("shard2", 0);
+    JettySolrRunner deadShard = chaosMonkey.killShard("shard2", 0);
     //JettySolrRunner deadShard2 = killShard("shard3", 1);
     
     // ensure shard is dead
@@ -551,7 +648,7 @@ public class FullDistributedZkTest exten
     for (SolrServer client : shardToClient.get("shard1")) {
       System.out.println("total:" + client.query(new SolrQuery("*:*")).getResults().getNumFound());
     }
-    Thread.sleep(5000);
+
     // assert the new server has the same number of docs as another server in
     // that shard
     // TODO: make a new call that checks each shard in slice has equal docs
@@ -562,19 +659,14 @@ public class FullDistributedZkTest exten
 
     assertDocCounts();
     
-    // Thread.sleep(10000000000L);
-
-  }
-
-  private JettySolrRunner killShard(String shard, int index) throws Exception {
-    // kill
-    System.out.println(" KILL:" + shardToClient);
-    System.out.println(shardToJetty.get(shard));
+    // expire a session...
+    CloudJettyRunner cloudJetty = shardToJetty.get("shard1").get(0);
+    chaosMonkey.expireSession(cloudJetty);
     
-    // kill first shard in shard2
-    JettySolrRunner jetty = shardToJetty.get(shard).get(index);
-    jetty.stop();
-    return jetty;
+    // should cause another recovery...
+    
+    //Thread.sleep(10000000000L);
+
   }
 
   private void assertDocCounts() throws Exception {

Modified: lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ZkTestServer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ZkTestServer.java?rev=1206278&r1=1206277&r2=1206278&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ZkTestServer.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ZkTestServer.java Fri Nov 25 18:36:07 2011
@@ -174,6 +174,10 @@ public class ZkTestServer {
       @Override
       public int getTimeout() {
         return 4000;
+      }
+      @Override
+      public boolean isClosing() {
+        return false;
       }});
   }
 

Modified: lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/ConnectionManager.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/ConnectionManager.java?rev=1206278&r1=1206277&r2=1206278&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/ConnectionManager.java (original)
+++ lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/ConnectionManager.java Fri Nov 25 18:36:07 2011
@@ -74,7 +74,6 @@ class ConnectionManager implements Watch
       connected = true;
       clientConnected.countDown();
     } else if (state == KeeperState.Expired) {
-      
       connected = false;
       log.info("Attempting to reconnect to ZooKeeper...");
 
@@ -93,11 +92,8 @@ class ConnectionManager implements Watch
       } catch (Exception e) {
         log.error("", e);
       }
-
       log.info("Connected:" + connected);
     } else if (state == KeeperState.Disconnected) {
-      // ZooKeeper client will recover when it can
-      // TODO: this needs to be investigated more
       connected = false;
     } else {
       connected = false;

Modified: lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java?rev=1206278&r1=1206277&r2=1206278&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java (original)
+++ lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java Fri Nov 25 18:36:07 2011
@@ -326,6 +326,8 @@ public class ZkStateReader {
 	}
   
   // nocommit TODO: do this with cloud state or something along those lines
+  // and if we find out we cannot talk to zk anymore, we should probably realize we are not
+  // a leader anymore - we shouldn't accept updates at all??
   public String getLeader(String collection, String shard) throws Exception {
     
     String url = null;

Modified: lucene/dev/branches/solrcloud/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java?rev=1206278&r1=1206277&r2=1206278&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java (original)
+++ lucene/dev/branches/solrcloud/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java Fri Nov 25 18:36:07 2011
@@ -40,7 +40,6 @@ import org.apache.solr.client.solrj.resp
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrDocumentList;
 import org.apache.solr.common.SolrInputDocument;
-import org.apache.solr.common.cloud.ZkNodeProps;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.schema.TrieDateField;