You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2012/01/05 22:43:40 UTC
svn commit: r1227853 - in /lucene/dev/branches/solrcloud/solr:
core/src/java/org/apache/solr/client/solrj/embedded/
core/src/test/org/apache/solr/cloud/
solrj/src/java/org/apache/solr/common/cloud/
Author: markrmiller
Date: Thu Jan 5 21:43:39 2012
New Revision: 1227853
URL: http://svn.apache.org/viewvc?rev=1227853&view=rev
Log:
start doing random connectionloss and expiration during chaos monkey test
Modified:
lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java
lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java
lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullSolrCloudTest.java
lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/ConnectionManager.java
Modified: lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java?rev=1227853&r1=1227852&r2=1227853&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java Thu Jan 5 21:43:39 2012
@@ -97,7 +97,7 @@ public class JettySolrRunner {
connector.setReuseAddress(true);
QueuedThreadPool threadPool = (QueuedThreadPool) connector.getThreadPool();
if (threadPool != null) {
- threadPool.setMaxStopTimeMs(1000);
+ threadPool.setMaxStopTimeMs(200);
}
server.setConnectors(new Connector[] { connector });
server.setSessionIdManager(new HashSessionIdManager(new Random()));
Modified: lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java?rev=1227853&r1=1227852&r2=1227853&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java Thu Jan 5 21:43:39 2012
@@ -17,7 +17,6 @@ package org.apache.solr.cloud;
* limitations under the License.
*/
-import java.io.IOException;
import java.net.BindException;
import java.util.ArrayList;
import java.util.List;
@@ -35,7 +34,6 @@ import org.apache.solr.common.cloud.ZkSt
import org.apache.solr.core.CoreContainer;
import org.apache.solr.servlet.SolrDispatchFilter;
import org.apache.zookeeper.KeeperException;
-import org.apache.zookeeper.ZooKeeper;
import org.mortbay.jetty.servlet.FilterHolder;
/**
@@ -48,8 +46,6 @@ import org.mortbay.jetty.servlet.FilterH
public class ChaosMonkey {
private static final boolean DONTKILLLEADER = true;
- protected static final boolean EXPIRE_SESSIONS = false;
- protected static final boolean CAUSE_CONNECTION_LOSS = false;
private Map<String,List<CloudJettyRunner>> shardToJetty;
private ZkTestServer zkServer;
@@ -63,6 +59,8 @@ public class ChaosMonkey {
private AtomicInteger connloss = new AtomicInteger();
private Map<String,List<SolrServer>> shardToClient;
+ private boolean expireSessions;
+ private boolean causeConnectionLoss;
public ChaosMonkey(ZkTestServer zkServer, ZkStateReader zkStateReader,
String collection, Map<String,List<CloudJettyRunner>> shardToJetty,
@@ -73,12 +71,20 @@ public class ChaosMonkey {
this.zkStateReader = zkStateReader;
this.collection = collection;
this.random = random;
+
+ expireSessions = random.nextBoolean();
+ causeConnectionLoss = random.nextBoolean();
}
public void expireSession(JettySolrRunner jetty) {
SolrDispatchFilter solrDispatchFilter = (SolrDispatchFilter) jetty.getDispatchFilter().getFilter();
- long sessionId = solrDispatchFilter.getCores().getZkController().getZkClient().getSolrZooKeeper().getSessionId();
- zkServer.expire(sessionId);
+ if (solrDispatchFilter != null) {
+ CoreContainer cores = solrDispatchFilter.getCores();
+ if (cores != null) {
+ long sessionId = cores.getZkController().getZkClient().getSolrZooKeeper().getSessionId();
+ zkServer.expire(sessionId);
+ }
+ }
}
public void expireRandomSession() throws KeeperException, InterruptedException {
@@ -87,6 +93,7 @@ public class ChaosMonkey {
JettySolrRunner jetty = getRandomJetty(sliceName, DONTKILLLEADER);
if (jetty != null) {
expireSession(jetty);
+ expires.incrementAndGet();
}
}
@@ -112,22 +119,22 @@ public class ChaosMonkey {
// nocommit: two ways to try to force connectionloss...
// must be at least double tick time...
- // zkClient.getSolrZooKeeper().pauseCnxn(ZkTestServer.TICK_TIME * 2);
+ zkClient.getSolrZooKeeper().pauseCnxn(ZkTestServer.TICK_TIME * 2);
// open a new zk with same id and close it - should cause connection loss
- ZooKeeper zoo2;
- try {
- zoo2 = new ZooKeeper(zkController.getZkServerAddress(), zkClient.getSolrZooKeeper().getSessionTimeout(),
- null,
- zkClient.getSolrZooKeeper().getSessionId(), null);
- zoo2.close();
- } catch (IOException e1) {
- // TODO Auto-generated catch block
- e1.printStackTrace();
- } catch (InterruptedException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
+// ZooKeeper zoo2;
+// try {
+// zoo2 = new ZooKeeper(zkController.getZkServerAddress(), zkClient.getSolrZooKeeper().getSessionTimeout(),
+// null,
+// zkClient.getSolrZooKeeper().getSessionId(), null);
+// zoo2.close();
+// } catch (IOException e1) {
+// // TODO Auto-generated catch block
+// e1.printStackTrace();
+// } catch (InterruptedException e) {
+// // TODO Auto-generated catch block
+// e.printStackTrace();
+// }
}
}
@@ -320,17 +327,12 @@ public class ChaosMonkey {
public SolrServer getRandomClient(String slice) throws KeeperException, InterruptedException {
// get latest cloud state
zkStateReader.updateCloudState(true);
- Slice theShards = zkStateReader.getCloudState().getSlices(collection)
- .get(slice);
-
+
// get random shard
List<SolrServer> clients = shardToClient.get(slice);
int index = random.nextInt(clients.size() - 1);
SolrServer client = clients.get(index);
-
- ZkNodeProps leader = zkStateReader.getLeaderProps(collection, slice);
-
return client;
}
@@ -364,7 +366,12 @@ public class ChaosMonkey {
} catch (BindException e2) {
jetty.stop();
sleep(5000);
- jetty.start();
+ try {
+ jetty.start();
+ } catch (BindException e3) {
+ // we coud not get the port
+ continue;
+ }
}
}
//System.out.println("started on port:" + jetty.getLocalPort());
@@ -375,12 +382,11 @@ public class ChaosMonkey {
int rnd = random.nextInt(10);
// nocommit: we dont randomly expire yet
- if (EXPIRE_SESSIONS && rnd < 4) {
+ if (expireSessions && rnd < 8) {
expireRandomSession();
- expires.incrementAndGet();
}
- if (CAUSE_CONNECTION_LOSS && rnd < 10) {
+ if (causeConnectionLoss && rnd < 10) {
randomConnectionLoss();
randomConnectionLoss();
}
Modified: lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullSolrCloudTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullSolrCloudTest.java?rev=1227853&r1=1227852&r2=1227853&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullSolrCloudTest.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullSolrCloudTest.java Thu Jan 5 21:43:39 2012
@@ -838,10 +838,16 @@ public class FullSolrCloudTest extends A
continue;
}
+ boolean live = false;
+ String nodeName = props.get(ZkStateReader.NODE_NAME_PROP);
+ if (zkStateReader.getCloudState().liveNodesContain(nodeName)) {
+ live = true;
+ }
+ if (verbose) System.out.println(" live:" + live);
boolean recovering = props.get(ZkStateReader.STATE_PROP).equals(ZkStateReader.RECOVERING);
if (verbose) System.out.println(" num:" + num + "\n" + (recovering ? "recovering" : ""));
- if (!recovering) {
+ if (!recovering && live) {
if (lastNum > -1 && lastNum != num && failMessage == null) {
failMessage = "shard is not consistent, expected:" + lastNum
+ " and got:" + num;
Modified: lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/ConnectionManager.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/ConnectionManager.java?rev=1227853&r1=1227852&r2=1227853&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/ConnectionManager.java (original)
+++ lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/ConnectionManager.java Thu Jan 5 21:43:39 2012
@@ -75,7 +75,7 @@ class ConnectionManager implements Watch
clientConnected.countDown();
} else if (state == KeeperState.Expired) {
connected = false;
- log.info("Attempting to reconnect to ZooKeeper...");
+ log.info("Attempting to reconnect to recover relationship with ZooKeeper...");
try {
connectionStrategy.reconnect(zkServerAddress, zkClientTimeout, this, new ZkClientConnectionStrategy.ZkUpdate() {