You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2011/12/13 02:31:11 UTC
svn commit: r1213535 - in /lucene/dev/branches/solrcloud/solr/core/src:
java/org/apache/solr/update/SolrCmdDistributor.java
test/org/apache/solr/cloud/ChaosMonkey.java
test/org/apache/solr/cloud/FullDistributedZkTest.java
Author: markrmiller
Date: Tue Dec 13 01:31:10 2011
New Revision: 1213535
URL: http://svn.apache.org/viewvc?rev=1213535&view=rev
Log:
factor chaos monkey out into its own class and start finishing the zk connection expiration test work
Added:
lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java (with props)
Modified:
lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/update/SolrCmdDistributor.java
lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullDistributedZkTest.java
Modified: lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/update/SolrCmdDistributor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/update/SolrCmdDistributor.java?rev=1213535&r1=1213534&r2=1213535&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/update/SolrCmdDistributor.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/update/SolrCmdDistributor.java Tue Dec 13 01:31:10 2011
@@ -391,8 +391,9 @@ public class SolrCmdDistributor {
"interrupted waiting for shard update response", e);
}
}
- //System.out.println("expected:" + expectedResponses + " failed:" + failed + " failedAfterConnect:" + failedAfterConnect);
-
+// if (failed > 0) {
+// System.out.println("expected:" + expectedResponses + " failed:" + failed + " failedAfterConnect:" + failedAfterConnect);
+// }
// TODO: this is a somewhat weak success guarantee - if the request was successful on every replica considered up
// and that does not return a connect exception, it was successful.
//should we optionally fail when there is only a single leader for a shard? (no replication)
Added: lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java?rev=1213535&view=auto
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java (added)
+++ lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java Tue Dec 13 01:31:10 2011
@@ -0,0 +1,134 @@
+package org.apache.solr.cloud;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+import org.apache.solr.client.solrj.embedded.JettySolrRunner;
+import org.apache.solr.cloud.FullDistributedZkTest.CloudJettyRunner;
+import org.apache.solr.common.cloud.Slice;
+import org.apache.solr.common.cloud.ZkNodeProps;
+import org.apache.solr.common.cloud.ZkStateReader;
+import org.apache.solr.servlet.SolrDispatchFilter;
+
+public class ChaosMonkey {
+
+ private Map<String,List<CloudJettyRunner>> shardToJetty;
+ private ZkTestServer zkServer;
+ private ZkStateReader zkStateReader;
+ private String collection;
+ private Random random;
+
+ public ChaosMonkey(ZkTestServer zkServer, ZkStateReader zkStateReader,
+ String collection, Map<String,List<CloudJettyRunner>> shardToJetty,
+ Random random) {
+ this.shardToJetty = shardToJetty;
+ this.zkServer = zkServer;
+ this.zkStateReader = zkStateReader;
+ this.collection = collection;
+ this.random = random;
+ }
+
+ public void expireSession(CloudJettyRunner cloudJetty) {
+ SolrDispatchFilter solrDispatchFilter = (SolrDispatchFilter) cloudJetty.jetty.getDispatchFilter().getFilter();
+ long sessionId = solrDispatchFilter.getCores().getZkController().getZkClient().getSolrZooKeeper().getSessionId();
+ zkServer.expire(sessionId);
+ }
+
+ public JettySolrRunner stopShard(String slice, int index) throws Exception {
+ JettySolrRunner jetty = shardToJetty.get(slice).get(index).jetty;
+ stopJetty(jetty);
+ return jetty;
+ }
+
+ private void stopJetty(JettySolrRunner jetty) throws Exception {
+ // get a clean shutdown so that no dirs are left open...
+ ((SolrDispatchFilter)jetty.getDispatchFilter().getFilter()).destroy();
+ jetty.stop();
+ }
+
+ public void stopShard(String slice) throws Exception {
+ List<CloudJettyRunner> jetties = shardToJetty.get(slice);
+ for (CloudJettyRunner jetty : jetties) {
+ stopJetty(jetty.jetty);
+ }
+ }
+
+ public void stopShardExcept(String slice, String shardName) throws Exception {
+ List<CloudJettyRunner> jetties = shardToJetty.get(slice);
+ for (CloudJettyRunner jetty : jetties) {
+ if (!jetty.shardName.equals(shardName)) {
+ stopJetty(jetty.jetty);
+ }
+ }
+ }
+
+ public JettySolrRunner getShard(String slice, int index) throws Exception {
+ JettySolrRunner jetty = shardToJetty.get(slice).get(index).jetty;
+ return jetty;
+ }
+
+ public JettySolrRunner stopRandomShard() throws Exception {
+ // add all the shards to a list
+// CloudState clusterState = zk.getCloudState();
+// for (String collection : collections) {
+// Slice theShards = zk.getCloudState().getSlices(collection);
+ return null;
+ }
+
+ public JettySolrRunner stopRandomShard(String slice) throws Exception {
+ // get latest cloud state
+ zkStateReader.updateCloudState(true);
+ Slice theShards = zkStateReader.getCloudState().getSlices(collection)
+ .get(slice);
+ int numRunning = 0;
+
+ for (CloudJettyRunner cloudJetty : shardToJetty.get(slice)) {
+ boolean running = true;
+
+ ZkNodeProps props = theShards.getShards().get(cloudJetty.shardName);
+ String state = props.get(ZkStateReader.STATE_PROP);
+ String nodeName = props.get(ZkStateReader.NODE_NAME_PROP);
+
+ if (!cloudJetty.jetty.isRunning()
+ || state.equals(ZkStateReader.RECOVERING)
+ || !zkStateReader.getCloudState().liveNodesContain(nodeName)) {
+ running = false;
+ }
+
+ if (running) {
+ numRunning++;
+ }
+ }
+
+ if (numRunning < 2) {
+ // we cannot kill anyone
+ return null;
+ }
+
+ // kill random shard in shard2
+ List<CloudJettyRunner> jetties = shardToJetty.get(slice);
+ int index = random.nextInt(jetties.size() - 1);
+ JettySolrRunner jetty = jetties.get(index).jetty;
+ jetty.stop();
+ return jetty;
+ }
+
+}
\ No newline at end of file
Modified: lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullDistributedZkTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullDistributedZkTest.java?rev=1213535&r1=1213534&r2=1213535&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullDistributedZkTest.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullDistributedZkTest.java Tue Dec 13 01:31:10 2011
@@ -59,6 +59,8 @@ import org.junit.BeforeClass;
*/
public class FullDistributedZkTest extends AbstractDistributedZkTestCase {
+ private static final String SHARD2 = "shard2";
+
private static final String DEFAULT_COLLECTION = "collection1";
String t1="a_t";
@@ -128,99 +130,7 @@ public class FullDistributedZkTest exten
}
- class ChaosMonkey {
- private Map<String,List<CloudJettyRunner>> shardToJetty;
-
- public ChaosMonkey(Map<String,List<CloudJettyRunner>> shardToJetty) {
- this.shardToJetty = shardToJetty;
- }
-
- public void expireSession(CloudJettyRunner cloudJetty) {
- SolrDispatchFilter solrDispatchFilter = (SolrDispatchFilter) cloudJetty.jetty.getDispatchFilter().getFilter();
- long sessionId = solrDispatchFilter.getCores().getZkController().getZkClient().getSolrZooKeeper().getSessionId();
- zkServer.expire(sessionId);
- }
-
- public JettySolrRunner stopShard(String slice, int index) throws Exception {
- JettySolrRunner jetty = shardToJetty.get(slice).get(index).jetty;
- stopJetty(jetty);
- return jetty;
- }
-
- private void stopJetty(JettySolrRunner jetty) throws Exception {
- // get a clean shutdown so that no dirs are left open...
- ((SolrDispatchFilter)jetty.getDispatchFilter().getFilter()).destroy();
- jetty.stop();
- }
-
- public void stopShard(String slice) throws Exception {
- List<CloudJettyRunner> jetties = shardToJetty.get(slice);
- for (CloudJettyRunner jetty : jetties) {
- stopJetty(jetty.jetty);
- }
- }
-
- public void stopShardExcept(String slice, String shardName) throws Exception {
- List<CloudJettyRunner> jetties = shardToJetty.get(slice);
- for (CloudJettyRunner jetty : jetties) {
- if (!jetty.shardName.equals(shardName)) {
- stopJetty(jetty.jetty);
- }
- }
- }
-
- public JettySolrRunner getShard(String slice, int index) throws Exception {
- JettySolrRunner jetty = shardToJetty.get(slice).get(index).jetty;
- return jetty;
- }
-
- public JettySolrRunner stopRandomShard() throws Exception {
- // add all the shards to a list
-// CloudState clusterState = zk.getCloudState();
-// for (String collection : collections) {
-// Slice theShards = zk.getCloudState().getSlices(collection);
- return null;
- }
-
- public JettySolrRunner stopRandomShard(String slice) throws Exception {
- // get latest cloud state
- zkStateReader.updateCloudState(true);
- Slice theShards = zkStateReader.getCloudState().getSlices(DEFAULT_COLLECTION)
- .get(slice);
- int numRunning = 0;
-
- for (CloudJettyRunner cloudJetty : shardToJetty.get(slice)) {
- boolean running = true;
-
- ZkNodeProps props = theShards.getShards().get(cloudJetty.shardName);
- String state = props.get(ZkStateReader.STATE_PROP);
- String nodeName = props.get(ZkStateReader.NODE_NAME_PROP);
-
- if (!cloudJetty.jetty.isRunning()
- || state.equals(ZkStateReader.RECOVERING)
- || !zkStateReader.getCloudState().liveNodesContain(nodeName)) {
- running = false;
- }
-
- if (running) {
- numRunning++;
- }
- }
-
- if (numRunning < 2) {
- // we cannot kill anyone
- return null;
- }
-
- // kill random shard in shard2
- List<CloudJettyRunner> jetties = shardToJetty.get(slice);
- int index = random.nextInt(jetties.size() - 1);
- JettySolrRunner jetty = jetties.get(index).jetty;
- jetty.stop();
- return jetty;
- }
- }
@Before
@Override
@@ -228,6 +138,7 @@ public class FullDistributedZkTest exten
super.setUp();
System.setProperty("numShards", Integer.toString(sliceCount));
+ chaosMonkey = new ChaosMonkey(zkServer, zkStateReader, DEFAULT_COLLECTION, shardToJetty, random);
}
@BeforeClass
@@ -254,8 +165,6 @@ public class FullDistributedZkTest exten
// TODO: for now, turn off stress because it uses regular clients, and we
// need the cloud client because we kill servers
stress = 0;
- chaosMonkey = new ChaosMonkey(shardToJetty);
-
}
protected void initCloud() throws Exception {
@@ -433,21 +342,24 @@ public class FullDistributedZkTest exten
@Override
protected void indexDoc(SolrInputDocument doc) throws IOException, SolrServerException {
controlClient.add(doc);
-
- boolean pick = random.nextBoolean();
-
- int which = (doc.getField(id).toString().hashCode() & 0x7fffffff) % sliceCount;
-
- if (pick && sliceCount > 1) {
- which = which + ((shardCount / sliceCount) * random.nextInt(sliceCount-1));
- }
+
+// if we wanted to randomly pick a client - but sometimes they may be down...
- CommonsHttpSolrServer client = (CommonsHttpSolrServer) clients.get(which);
+// boolean pick = random.nextBoolean();
+//
+// int which = (doc.getField(id).toString().hashCode() & 0x7fffffff) % sliceCount;
+//
+// if (pick && sliceCount > 1) {
+// which = which + ((shardCount / sliceCount) * random.nextInt(sliceCount-1));
+// }
+//
+// CommonsHttpSolrServer client = (CommonsHttpSolrServer) clients.get(which);
+
UpdateRequest ureq = new UpdateRequest();
ureq.add(doc);
//ureq.setParam(UpdateParams.UPDATE_CHAIN, DISTRIB_UPDATE_CHAIN);
- ureq.process(client);
+ ureq.process(cloudClient);
}
protected void index_specific(int serverNumber, Object... fields) throws Exception {
@@ -649,13 +561,12 @@ public class FullDistributedZkTest exten
query("q", "*:*", "sort", "n_tl1 desc");
// kill a shard
- JettySolrRunner deadShard = chaosMonkey.stopShard("shard2", 0);
- //JettySolrRunner deadShard2 = killShard("shard3", 1);
+ JettySolrRunner deadShard = chaosMonkey.stopShard(SHARD2, 0);
// ensure shard is dead
try {
// TODO: ignore fail
- index_specific(shardToClient.get("shard2").get(0), id, 999, i1, 107, t1,
+ index_specific(shardToClient.get(SHARD2).get(0), id, 999, i1, 107, t1,
"specific doc!");
fail("This server should be down and this update should have failed");
} catch (SolrServerException e) {
@@ -663,15 +574,15 @@ public class FullDistributedZkTest exten
}
// try to index to a living shard at shard2
- // TODO: this can fail!
- index_specific(shardToClient.get("shard2").get(1), id, 1000, i1, 108, t1,
+ // TODO: this can fail with connection refused !????
+ index_specific(shardToClient.get(SHARD2).get(1), id, 1000, i1, 108, t1,
"specific doc!");
commit();
query("q", "*:*", "sort", "n_tl1 desc");
- // TMP: try adding a doc with CloudSolrServer
+ // try adding a doc with CloudSolrServer
cloudClient.setDefaultCollection(DEFAULT_COLLECTION);
SolrQuery query = new SolrQuery("*:*");
query.add("distrib", "true");
@@ -728,7 +639,7 @@ public class FullDistributedZkTest exten
waitForRecovery(deadShard);
- List<SolrServer> s2c = shardToClient.get("shard2");
+ List<SolrServer> s2c = shardToClient.get(SHARD2);
// if we properly recovered, we should now have the couple missing docs that
// came in while shard was down
@@ -749,17 +660,11 @@ public class FullDistributedZkTest exten
if (VERBOSE) System.out.println("total:" + client.query(new SolrQuery("*:*")).getResults().getNumFound());
}
- // assert the new server has the same number of docs as another server in
- // that shard
- // TODO: make a new call that checks each shard in slice has equal docs
- assertEquals(shardToClient.get("shard1").get(0).query(new SolrQuery("*:*"))
- .getResults().getNumFound(),
- shardToClient.get("shard1").get(shardToClient.get("shard1").size() - 1)
- .query(new SolrQuery("*:*")).getResults().getNumFound());
+ checkShardConsistency(SHARD2);
assertDocCounts();
- String docId = "99999999";
+ long docId = 99999999L;
indexr("id", docId, t1, "originalcontent");
commit();
@@ -786,7 +691,7 @@ public class FullDistributedZkTest exten
UpdateRequest uReq = new UpdateRequest();
//uReq.setParam(UpdateParams.UPDATE_CHAIN, DISTRIB_UPDATE_CHAIN);
- uReq.deleteById(docId).process(clients.get(0));
+ uReq.deleteById(Long.toString(docId)).process(clients.get(0));
commit();
@@ -806,19 +711,43 @@ public class FullDistributedZkTest exten
// TODO: bring this to it's own method?
// try indexing to a leader that has no replicas up
- ZkNodeProps leaderProps = zkStateReader.getLeaderProps(DEFAULT_COLLECTION, "shard2");
+ ZkNodeProps leaderProps = zkStateReader.getLeaderProps(DEFAULT_COLLECTION, SHARD2);
String nodeName = leaderProps.get(ZkStateReader.NODE_NAME_PROP);
- chaosMonkey.stopShardExcept("shard2", nodeName);
+ chaosMonkey.stopShardExcept(SHARD2, nodeName);
SolrServer client = getClient(nodeName);
System.out.println("what happens here?");
index_specific(client, "id", docId + 1, t1, "what happens here?");
+
// expire a session...
- //CloudJettyRunner cloudJetty = shardToJetty.get("shard1").get(0);
- //chaosMonkey.expireSession(cloudJetty);
+ CloudJettyRunner cloudJetty = shardToJetty.get("shard1").get(0);
+ chaosMonkey.expireSession(cloudJetty);
+
+ indexr("id", docId + 1, t1, "slip this doc in");
+
+ waitForRecovery(cloudJetty.jetty);
+ checkShardConsistency("shard1");
+
+ }
+
+ private void checkShardConsistency(String shard) throws SolrServerException {
+ List<SolrServer> solrClients = shardToClient.get(shard);
+ long num = -1;
+ long lastNum = -1;
+ for (SolrServer client : solrClients) {
+ num = client.query(new SolrQuery("*:*")).getResults().getNumFound();
+ if (lastNum > -1 && lastNum != num) {
+ fail("shard is not consistent, expected:" + lastNum + " and got:" + num);
+ }
+ lastNum = num;
+ }
+ assertEquals(shardToClient.get("shard1").get(0).query(new SolrQuery("*:*"))
+ .getResults().getNumFound(),
+ shardToClient.get("shard1").get(shardToClient.get("shard1").size() - 1)
+ .query(new SolrQuery("*:*")).getResults().getNumFound());
}
private SolrServer getClient(String nodeName) {