You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2012/07/24 07:06:01 UTC
svn commit: r1364890 - in /lucene/dev/branches/branch_4x: ./ solr/
solr/core/ solr/core/src/test/org/apache/solr/cloud/
Author: markrmiller
Date: Tue Jul 24 05:06:00 2012
New Revision: 1364890
URL: http://svn.apache.org/viewvc?rev=1364890&view=rev
Log:
improve zk tests vs blackhole
Modified:
lucene/dev/branches/branch_4x/ (props changed)
lucene/dev/branches/branch_4x/solr/ (props changed)
lucene/dev/branches/branch_4x/solr/core/ (props changed)
lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java
lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/FullSolrCloudTest.java
lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/RecoveryZkTest.java
lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/SyncSliceTest.java
Modified: lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java?rev=1364890&r1=1364889&r2=1364890&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java Tue Jul 24 05:06:00 2012
@@ -66,18 +66,15 @@ public class ChaosMonkey {
private boolean expireSessions;
private boolean causeConnectionLoss;
private boolean aggressivelyKillLeaders;
- private Map<String,SolrServer> shardToLeaderClient;
private Map<String,CloudJettyRunner> shardToLeaderJetty;
private long startTime;
public ChaosMonkey(ZkTestServer zkServer, ZkStateReader zkStateReader,
String collection, Map<String,List<CloudJettyRunner>> shardToJetty,
Map<String,List<SolrServer>> shardToClient,
- Map<String,SolrServer> shardToLeaderClient,
Map<String,CloudJettyRunner> shardToLeaderJetty) {
this.shardToJetty = shardToJetty;
this.shardToClient = shardToClient;
- this.shardToLeaderClient = shardToLeaderClient;
this.shardToLeaderJetty = shardToLeaderJetty;
this.zkServer = zkServer;
this.zkStateReader = zkStateReader;
@@ -104,7 +101,7 @@ public class ChaosMonkey {
public void expireRandomSession() throws KeeperException, InterruptedException {
String sliceName = getRandomSlice();
- JettySolrRunner jetty = getRandomJetty(sliceName, aggressivelyKillLeaders);
+ JettySolrRunner jetty = getRandomJetty(sliceName, aggressivelyKillLeaders).jetty;
if (jetty != null) {
expireSession(jetty);
expires.incrementAndGet();
@@ -115,7 +112,7 @@ public class ChaosMonkey {
monkeyLog("cause connection loss!");
String sliceName = getRandomSlice();
- JettySolrRunner jetty = getRandomJetty(sliceName, aggressivelyKillLeaders);
+ JettySolrRunner jetty = getRandomJetty(sliceName, aggressivelyKillLeaders).jetty;
if (jetty != null) {
causeConnectionLoss(jetty);
connloss.incrementAndGet();
@@ -135,23 +132,29 @@ public class ChaosMonkey {
}
}
- public JettySolrRunner stopShard(String slice, int index) throws Exception {
- JettySolrRunner jetty = shardToJetty.get(slice).get(index).jetty;
- stopJetty(jetty);
- return jetty;
+ public CloudJettyRunner stopShard(String slice, int index) throws Exception {
+ CloudJettyRunner cjetty = shardToJetty.get(slice).get(index);
+ stopJetty(cjetty);
+ return cjetty;
}
- public void stopJetty(JettySolrRunner jetty) throws Exception {
- stop(jetty);
+ public void stopJetty(CloudJettyRunner cjetty) throws Exception {
+ stop(cjetty.jetty);
stops.incrementAndGet();
}
- public void killJetty(JettySolrRunner jetty) throws Exception {
- kill(jetty);
+ public void killJetty(CloudJettyRunner cjetty) throws Exception {
+ kill(cjetty);
stops.incrementAndGet();
}
- public static void stop(JettySolrRunner jetty) throws Exception {
+ public void stopJetty(JettySolrRunner jetty) throws Exception {
+ stops.incrementAndGet();
+ stopJettySolrRunner(jetty);
+ }
+
+ private static void stopJettySolrRunner(JettySolrRunner jetty) throws Exception {
+
monkeyLog("stop shard! " + jetty.getLocalPort());
// get a clean shutdown so that no dirs are left open...
FilterHolder fh = jetty.getDispatchFilter();
@@ -168,7 +171,8 @@ public class ChaosMonkey {
}
}
- public static void kill(JettySolrRunner jetty) throws Exception {
+ public static void kill(CloudJettyRunner cjetty) throws Exception {
+ JettySolrRunner jetty = cjetty.jetty;
monkeyLog("kill shard! " + jetty.getLocalPort());
FilterHolder fh = jetty.getDispatchFilter();
SolrDispatchFilter sdf = null;
@@ -189,7 +193,7 @@ public class ChaosMonkey {
public void stopShard(String slice) throws Exception {
List<CloudJettyRunner> jetties = shardToJetty.get(slice);
for (CloudJettyRunner jetty : jetties) {
- stopJetty(jetty.jetty);
+ stopJetty(jetty);
}
}
@@ -197,7 +201,7 @@ public class ChaosMonkey {
List<CloudJettyRunner> jetties = shardToJetty.get(slice);
for (CloudJettyRunner jetty : jetties) {
if (!jetty.nodeName.equals(shardName)) {
- stopJetty(jetty.jetty);
+ stopJetty(jetty);
}
}
}
@@ -207,22 +211,22 @@ public class ChaosMonkey {
return jetty;
}
- public JettySolrRunner stopRandomShard() throws Exception {
+ public CloudJettyRunner stopRandomShard() throws Exception {
String sliceName = getRandomSlice();
return stopRandomShard(sliceName);
}
- public JettySolrRunner stopRandomShard(String slice) throws Exception {
- JettySolrRunner jetty = getRandomJetty(slice, aggressivelyKillLeaders);
- if (jetty != null) {
- stopJetty(jetty);
+ public CloudJettyRunner stopRandomShard(String slice) throws Exception {
+ CloudJettyRunner cjetty = getRandomJetty(slice, aggressivelyKillLeaders);
+ if (cjetty != null) {
+ stopJetty(cjetty);
}
- return jetty;
+ return cjetty;
}
- public JettySolrRunner killRandomShard() throws Exception {
+ public CloudJettyRunner killRandomShard() throws Exception {
// add all the shards to a list
String sliceName = getRandomSlice();
@@ -238,15 +242,15 @@ public class ChaosMonkey {
return sliceName;
}
- public JettySolrRunner killRandomShard(String slice) throws Exception {
- JettySolrRunner jetty = getRandomJetty(slice, aggressivelyKillLeaders);
- if (jetty != null) {
- killJetty(jetty);
+ public CloudJettyRunner killRandomShard(String slice) throws Exception {
+ CloudJettyRunner cjetty = getRandomJetty(slice, aggressivelyKillLeaders);
+ if (cjetty != null) {
+ killJetty(cjetty);
}
- return jetty;
+ return cjetty;
}
- public JettySolrRunner getRandomJetty(String slice, boolean aggressivelyKillLeaders) throws KeeperException, InterruptedException {
+ public CloudJettyRunner getRandomJetty(String slice, boolean aggressivelyKillLeaders) throws KeeperException, InterruptedException {
int numRunning = 0;
@@ -301,15 +305,15 @@ public class ChaosMonkey {
}
Random random = LuceneTestCase.random();
int chance = random.nextInt(10);
- JettySolrRunner jetty;
+ CloudJettyRunner cjetty;
if (chance <= 5 && aggressivelyKillLeaders) {
// if killLeader, really aggressively go after leaders
- jetty = shardToLeaderJetty.get(slice).jetty;
+ cjetty = shardToLeaderJetty.get(slice);
} else {
// get random shard
List<CloudJettyRunner> jetties = shardToJetty.get(slice);
int index = random.nextInt(jetties.size());
- jetty = jetties.get(index).jetty;
+ cjetty = jetties.get(index);
ZkNodeProps leader = zkStateReader.getLeaderProps(collection, slice);
boolean isLeader = leader.get(ZkStateReader.NODE_NAME_PROP).equals(jetties.get(index).nodeName);
@@ -320,15 +324,16 @@ public class ChaosMonkey {
}
}
- if (jetty.getLocalPort() == -1) {
+ if (cjetty.jetty.getLocalPort() == -1) {
// we can't kill the dead
monkeyLog("abort! This guy is already dead");
return null;
}
//System.out.println("num active:" + numActive + " for " + slice + " sac:" + jetty.getLocalPort());
- monkeyLog("chose a victim! " + jetty.getLocalPort());
- return jetty;
+ monkeyLog("chose a victim! " + cjetty.jetty.getLocalPort());
+
+ return cjetty;
}
public SolrServer getRandomClient(String slice) throws KeeperException, InterruptedException {
@@ -353,7 +358,7 @@ public class ChaosMonkey {
stop = false;
new Thread() {
- private List<JettySolrRunner> deadPool = new ArrayList<JettySolrRunner>();
+ private List<CloudJettyRunner> deadPool = new ArrayList<CloudJettyRunner>();
@Override
public void run() {
@@ -364,25 +369,9 @@ public class ChaosMonkey {
if (random.nextBoolean()) {
if (!deadPool.isEmpty()) {
int index = random.nextInt(deadPool.size());
- JettySolrRunner jetty = deadPool.get(index);
- try {
- jetty.start();
- } catch (BindException e) {
- jetty.stop();
- sleep(2000);
- try {
- jetty.start();
- } catch (BindException e2) {
- jetty.stop();
- sleep(5000);
- try {
- jetty.start();
- } catch (BindException e3) {
- // we coud not get the port
- jetty.stop();
- continue;
- }
- }
+ JettySolrRunner jetty = deadPool.get(index).jetty;
+ if (!ChaosMonkey.start(jetty)) {
+ continue;
}
//System.out.println("started on port:" + jetty.getLocalPort());
deadPool.remove(index);
@@ -402,16 +391,16 @@ public class ChaosMonkey {
randomConnectionLoss();
}
- JettySolrRunner jetty;
+ CloudJettyRunner cjetty;
if (random.nextBoolean()) {
- jetty = stopRandomShard();
+ cjetty = stopRandomShard();
} else {
- jetty = killRandomShard();
+ cjetty = killRandomShard();
}
- if (jetty == null) {
+ if (cjetty == null) {
// we cannot kill
} else {
- deadPool.add(jetty);
+ deadPool.add(cjetty);
}
} catch (InterruptedException e) {
@@ -441,4 +430,31 @@ public class ChaosMonkey {
return starts.get();
}
+ public static void stop(JettySolrRunner jetty) throws Exception {
+ stopJettySolrRunner(jetty);
+ }
+
+ public static boolean start(JettySolrRunner jetty) throws Exception {
+ try {
+ jetty.start();
+ } catch (BindException e) {
+ jetty.stop();
+ Thread.sleep(2000);
+ try {
+ jetty.start();
+ } catch (BindException e2) {
+ jetty.stop();
+ Thread.sleep(5000);
+ try {
+ jetty.start();
+ } catch (BindException e3) {
+ // we coud not get the port
+ jetty.stop();
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
}
\ No newline at end of file
Modified: lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/FullSolrCloudTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/FullSolrCloudTest.java?rev=1364890&r1=1364889&r2=1364890&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/FullSolrCloudTest.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/FullSolrCloudTest.java Tue Jul 24 05:06:00 2012
@@ -109,28 +109,30 @@ public class FullSolrCloudTest extends A
protected Map<String,SolrServer> shardToLeaderClient = new HashMap<String,SolrServer>();
protected Map<String,CloudJettyRunner> shardToLeaderJetty = new HashMap<String,CloudJettyRunner>();
- class CloudJettyRunner {
+ static class CloudJettyRunner {
JettySolrRunner jetty;
String nodeName;
String coreNodeName;
String url;
+ CloudSolrServerClient client;
}
static class CloudSolrServerClient {
- SolrServer client;
+ SolrServer solrClient;
String shardName;
+ int port;
public CloudSolrServerClient() {}
public CloudSolrServerClient(SolrServer client) {
- this.client = client;
+ this.solrClient = client;
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
- result = prime * result + ((client == null) ? 0 : client.hashCode());
+ result = prime * result + ((solrClient == null) ? 0 : solrClient.hashCode());
return result;
}
@@ -140,9 +142,9 @@ public class FullSolrCloudTest extends A
if (obj == null) return false;
if (getClass() != obj.getClass()) return false;
CloudSolrServerClient other = (CloudSolrServerClient) obj;
- if (client == null) {
- if (other.client != null) return false;
- } else if (!client.equals(other.client)) return false;
+ if (solrClient == null) {
+ if (other.solrClient != null) return false;
+ } else if (!solrClient.equals(other.solrClient)) return false;
return true;
}
@@ -189,7 +191,7 @@ public class FullSolrCloudTest extends A
}
chaosMonkey = new ChaosMonkey(zkServer, zkStateReader,
- DEFAULT_COLLECTION, shardToJetty, shardToClient, shardToLeaderClient,
+ DEFAULT_COLLECTION, shardToJetty, shardToClient,
shardToLeaderJetty);
}
@@ -359,7 +361,8 @@ public class FullSolrCloudTest extends A
if (shard.getKey().contains(":" + port + "_")) {
CloudSolrServerClient csc = new CloudSolrServerClient();
- csc.client = client;
+ csc.solrClient = client;
+ csc.port = port;
csc.shardName = shard.getValue().get(ZkStateReader.NODE_NAME_PROP);
boolean isLeader = shard.getValue().containsKey(
ZkStateReader.LEADER_PROP);
@@ -409,6 +412,7 @@ public class FullSolrCloudTest extends A
cjr.nodeName = shard.getValue().get(ZkStateReader.NODE_NAME_PROP);
cjr.coreNodeName = shard.getKey();
cjr.url = shard.getValue().get(ZkStateReader.BASE_URL_PROP) + "/" + shard.getValue().get(ZkStateReader.CORE_NAME_PROP);
+ cjr.client = findClientByPort(port);
list.add(cjr);
if (isLeader) {
shardToLeaderJetty.put(slice.getKey(), cjr);
@@ -431,6 +435,16 @@ public class FullSolrCloudTest extends A
}
}
+ private CloudSolrServerClient findClientByPort(int port) {
+ Set<CloudSolrServerClient> theClients = clientToInfo.keySet();
+ for (CloudSolrServerClient client : theClients) {
+ if (client.port == port) {
+ return client;
+ }
+ }
+ throw new IllegalArgumentException("Client with the give port does not exist:" + port);
+ }
+
@Override
protected void setDistributedParams(ModifiableSolrParams params) {
@@ -678,7 +692,7 @@ public class FullSolrCloudTest extends A
query("q", "*:*", "sort", "n_tl1 desc");
// kill a shard
- JettySolrRunner deadShard = chaosMonkey.stopShard(SHARD2, 0);
+ CloudJettyRunner deadShard = chaosMonkey.stopShard(SHARD2, 0);
cloudClient.connect();
int tries = 0;
while (cloudClient.getZkStateReader().getCloudState().liveNodesContain(clientToInfo.get(new CloudSolrServerClient(shardToClient.get(SHARD2).get(0))).get(ZkStateReader.NODE_NAME_PROP))) {
@@ -690,7 +704,7 @@ public class FullSolrCloudTest extends A
// ensure shard is dead
try {
- index_specific(shardToClient.get(SHARD2).get(0), id, 999, i1, 107, t1,
+ index_specific(deadShard.client.solrClient, id, 999, i1, 107, t1,
"specific doc!");
fail("This server should be down and this update should have failed");
} catch (SolrServerException e) {
@@ -777,7 +791,7 @@ public class FullSolrCloudTest extends A
// query("q","matchesnothing","fl","*,score", "debugQuery", "true");
// this should trigger a recovery phase on deadShard
- deadShard.start(true);
+ ChaosMonkey.start(deadShard.jetty);
// make sure we have published we are recovering
Thread.sleep(1500);
@@ -791,7 +805,7 @@ public class FullSolrCloudTest extends A
// recover over 100 docs so we do more than just peer sync (replicate recovery)
- deadShard = chaosMonkey.stopShard(SHARD2, 0);
+ chaosMonkey.stopJetty(deadShard);
for (int i = 0; i < 226; i++) {
doc = new SolrInputDocument();
@@ -804,7 +818,9 @@ public class FullSolrCloudTest extends A
}
commit();
- deadShard.start(true);
+ Thread.sleep(1500);
+
+ ChaosMonkey.start(deadShard.jetty);
// make sure we have published we are recovering
Thread.sleep(1500);
@@ -1182,7 +1198,7 @@ public class FullSolrCloudTest extends A
private SolrServer getClient(String nodeName) {
for (CloudSolrServerClient client : clientToInfo.keySet()) {
if (client.shardName.equals(nodeName)) {
- return client.client;
+ return client.solrClient;
}
}
return null;
@@ -1233,7 +1249,8 @@ public class FullSolrCloudTest extends A
long count = 0;
String currentState = clientToInfo.get(new CloudSolrServerClient(client))
.get(ZkStateReader.STATE_PROP);
- if (currentState != null && currentState.equals(ZkStateReader.ACTIVE)) {
+ if (currentState != null && currentState.equals(ZkStateReader.ACTIVE) && zkStateReader.getCloudState().liveNodesContain(clientToInfo.get(new CloudSolrServerClient(client))
+ .get(ZkStateReader.NODE_NAME_PROP))) {
SolrQuery query = new SolrQuery("*:*");
query.set("distrib", false);
count = client.query(query).getResults().getNumFound();
Modified: lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/RecoveryZkTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/RecoveryZkTest.java?rev=1364890&r1=1364889&r2=1364890&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/RecoveryZkTest.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/RecoveryZkTest.java Tue Jul 24 05:06:00 2012
@@ -72,7 +72,7 @@ public class RecoveryZkTest extends Full
Thread.sleep(atLeast(2000));
// bring shard replica down
- JettySolrRunner replica = chaosMonkey.stopShard("shard1", 1);
+ JettySolrRunner replica = chaosMonkey.stopShard("shard1", 1).jetty;
// wait a moment - lets allow some docs to be indexed so replication time is non 0
Modified: lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/SyncSliceTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/SyncSliceTest.java?rev=1364890&r1=1364889&r2=1364890&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/SyncSliceTest.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/SyncSliceTest.java Tue Jul 24 05:06:00 2012
@@ -32,6 +32,7 @@ import org.apache.solr.client.solrj.embe
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.client.solrj.request.QueryRequest;
import org.apache.solr.client.solrj.request.UpdateRequest;
+import org.apache.solr.cloud.FullSolrCloudTest.CloudSolrServerClient;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.params.CollectionParams.CollectionAction;
@@ -143,8 +144,8 @@ public class SyncSliceTest extends FullS
"to come to the aid of their country.");
// kill the leader - new leader could have all the docs or be missing one
- JettySolrRunner leaderJetty = shardToLeaderJetty.get("shard1").jetty;
- SolrServer leaderClient = shardToLeaderClient.get("shard1");
+ CloudJettyRunner leaderJetty = shardToLeaderJetty.get("shard1");
+ CloudSolrServerClient leaderClient = leaderJetty.client;
Set<JettySolrRunner> jetties = new HashSet<JettySolrRunner>();
for (int i = 0; i < shardCount; i++) {
jetties.add(shardToJetty.get("shard1").get(i).jetty);
@@ -158,7 +159,7 @@ public class SyncSliceTest extends FullS
// because on some systems (especially freebsd w/ blackhole enabled), trying
// to talk to a downed node causes grief
int tries = 0;
- while (((SolrDispatchFilter) upJetty.getDispatchFilter().getFilter()).getCores().getZkController().getZkStateReader().getCloudState().liveNodesContain(clientToInfo.get(new CloudSolrServerClient(leaderClient)).get(ZkStateReader.NODE_NAME_PROP))) {
+ while (((SolrDispatchFilter) upJetty.getDispatchFilter().getFilter()).getCores().getZkController().getZkStateReader().getCloudState().liveNodesContain(clientToInfo.get(leaderClient).get(ZkStateReader.NODE_NAME_PROP))) {
if (tries++ == 120) {
fail("Shard still reported as live in zk");
}