You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2012/09/14 23:49:21 UTC
svn commit: r1384937 [2/2] - in /lucene/dev/branches/branch_4x: ./ lucene/
lucene/test-framework/
lucene/test-framework/src/java/org/apache/lucene/util/ solr/ solr/core/
solr/core/src/java/org/apache/solr/cloud/
solr/core/src/java/org/apache/solr/core/...
Modified: lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/update/SolrCmdDistributorTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/update/SolrCmdDistributorTest.java?rev=1384937&r1=1384936&r2=1384937&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/update/SolrCmdDistributorTest.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/update/SolrCmdDistributorTest.java Fri Sep 14 21:49:19 2012
@@ -36,6 +36,7 @@ import org.apache.solr.common.cloud.ZkCo
import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.util.ExecutorUtil;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.update.SolrCmdDistributor.Node;
import org.apache.solr.update.SolrCmdDistributor.Response;
@@ -43,16 +44,13 @@ import org.apache.solr.update.SolrCmdDis
import org.apache.solr.util.DefaultSolrThreadFactory;
public class SolrCmdDistributorTest extends BaseDistributedSearchTestCase {
- private ThreadPoolExecutor executor = new ThreadPoolExecutor(0, Integer.MAX_VALUE, 5,
- TimeUnit.SECONDS, new SynchronousQueue<Runnable>(),
- new DefaultSolrThreadFactory("cmdDistribExecutor"));
+ private static ThreadPoolExecutor executor;
public SolrCmdDistributorTest() {
fixShardCount = true;
shardCount = 4;
stress = 0;
}
-
public static String getSchemaFile() {
return "schema.xml";
@@ -91,7 +89,7 @@ public class SolrCmdDistributorTest exte
public void doTest() throws Exception {
del("*:*");
- SolrCmdDistributor cmdDistrib = new SolrCmdDistributor(8, executor);
+ SolrCmdDistributor cmdDistrib = new SolrCmdDistributor(5, executor);
ModifiableSolrParams params = new ModifiableSolrParams();
List<Node> nodes = new ArrayList<Node>();
@@ -125,7 +123,7 @@ public class SolrCmdDistributorTest exte
nodes.add(new StdNode(new ZkCoreNodeProps(nodeProps)));
// add another 2 docs to control and 3 to client
- cmdDistrib = new SolrCmdDistributor(8, executor);
+ cmdDistrib = new SolrCmdDistributor(5, executor);
cmd.solrDoc = sdoc("id", 2);
cmdDistrib.distribAdd(cmd, nodes, params);
@@ -158,7 +156,7 @@ public class SolrCmdDistributorTest exte
DeleteUpdateCommand dcmd = new DeleteUpdateCommand(null);
dcmd.id = "2";
- cmdDistrib = new SolrCmdDistributor(8, executor);
+ cmdDistrib = new SolrCmdDistributor(5, executor);
cmdDistrib.distribDelete(dcmd, nodes, params);
cmdDistrib.distribCommit(ccmd, nodes, params);
@@ -177,19 +175,16 @@ public class SolrCmdDistributorTest exte
.getNumFound();
assertEquals(results.toString(), 2, numFound);
- // debug stuff
for (SolrServer c : clients) {
c.optimize();
- // distrib optimize is not working right yet, so call it on each client
//System.out.println(clients.get(0).request(new LukeRequest()));
}
int id = 5;
- cmdDistrib = new SolrCmdDistributor(8, executor);
+ cmdDistrib = new SolrCmdDistributor(5, executor);
- nodes.clear();
- int cnt = atLeast(200);
+ int cnt = atLeast(201);
for (int i = 0; i < cnt; i++) {
nodes.clear();
for (SolrServer c : clients) {
@@ -199,25 +194,52 @@ public class SolrCmdDistributorTest exte
HttpSolrServer httpClient = (HttpSolrServer) c;
nodeProps = new ZkNodeProps(ZkStateReader.BASE_URL_PROP,
httpClient.getBaseURL(), ZkStateReader.CORE_NAME_PROP, "");
+
nodes.add(new StdNode(new ZkCoreNodeProps(nodeProps)));
}
+ AddUpdateCommand c = new AddUpdateCommand(null);
+ c.solrDoc = sdoc("id", id++);
+ if (nodes.size() > 0) {
+ cmdDistrib.distribAdd(c, nodes, params);
+ }
+ }
+
+ nodes.clear();
+
+ for (SolrServer c : clients) {
+ HttpSolrServer httpClient = (HttpSolrServer) c;
+ nodeProps = new ZkNodeProps(ZkStateReader.BASE_URL_PROP,
+ httpClient.getBaseURL(), ZkStateReader.CORE_NAME_PROP, "");
- cmd.solrDoc = sdoc("id", id++);
- cmdDistrib.distribAdd(cmd, nodes, params);
+ nodes.add(new StdNode(new ZkCoreNodeProps(nodeProps)));
}
- cmdDistrib.finish();
-
cmdDistrib.distribCommit(ccmd, nodes, params);
+
+ cmdDistrib.finish();
+
for (SolrServer c : clients) {
NamedList<Object> resp = c.request(new LukeRequest());
- System.out.println(resp);
assertEquals("SOLR-3428: We only did adds - there should be no deletes",
((NamedList<Object>) resp.get("index")).get("numDocs"),
((NamedList<Object>) resp.get("index")).get("maxDoc"));
}
}
+
+ @Override
+ public void setUp() throws Exception {
+ super.setUp();
+ executor = new ThreadPoolExecutor(0, 5 * 16, 5,
+ TimeUnit.SECONDS, new SynchronousQueue<Runnable>(),
+ new DefaultSolrThreadFactory("cmdDistribExecutor"));
+ }
+
+ @Override
+ public void tearDown() throws Exception {
+ ExecutorUtil.shutdownNowAndAwaitTermination(executor);
+ super.tearDown();
+ }
}
Modified: lucene/dev/branches/branch_4x/solr/solrj/src/java/org/apache/solr/client/solrj/request/CoreAdminRequest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/solrj/src/java/org/apache/solr/client/solrj/request/CoreAdminRequest.java?rev=1384937&r1=1384936&r2=1384937&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/solrj/src/java/org/apache/solr/client/solrj/request/CoreAdminRequest.java (original)
+++ lucene/dev/branches/branch_4x/solr/solrj/src/java/org/apache/solr/client/solrj/request/CoreAdminRequest.java Fri Sep 14 21:49:19 2012
@@ -120,7 +120,6 @@ public class CoreAdminRequest extends So
protected String coreNodeName;
protected String state;
protected Boolean checkLive;
- protected Integer pauseFor;
protected Boolean onlyIfLeader;
@@ -160,14 +159,6 @@ public class CoreAdminRequest extends So
this.checkLive = checkLive;
}
- public Integer getPauseFor() {
- return pauseFor;
- }
-
- public void setPauseFor(Integer pauseFor) {
- this.pauseFor = pauseFor;
- }
-
public boolean isOnlyIfLeader() {
return onlyIfLeader;
}
@@ -202,10 +193,6 @@ public class CoreAdminRequest extends So
params.set( "checkLive", checkLive);
}
- if (pauseFor != null) {
- params.set( "pauseFor", pauseFor);
- }
-
if (onlyIfLeader != null) {
params.set( "onlyIfLeader", onlyIfLeader);
}
Modified: lucene/dev/branches/branch_4x/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java?rev=1384937&r1=1384936&r2=1384937&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java (original)
+++ lucene/dev/branches/branch_4x/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java Fri Sep 14 21:49:19 2012
@@ -410,7 +410,7 @@ public class ZkStateReader {
}
Thread.sleep(50);
}
- throw new RuntimeException("No registered leader was found, collection:" + collection + " slice:" + shard);
+ throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "No registered leader was found, collection:" + collection + " slice:" + shard);
}
/**
Modified: lucene/dev/branches/branch_4x/solr/solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java?rev=1384937&r1=1384936&r2=1384937&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java (original)
+++ lucene/dev/branches/branch_4x/solr/solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java Fri Sep 14 21:49:19 2012
@@ -20,7 +20,6 @@ package org.apache.solr.common.util;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
-import org.apache.solr.common.SolrException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -28,24 +27,38 @@ import org.slf4j.LoggerFactory;
public class ExecutorUtil {
public static Logger log = LoggerFactory.getLogger(ExecutorUtil.class);
- public static void shutdownAndAwaitTermination(ExecutorService pool) {
+ public static void shutdownNowAndAwaitTermination(ExecutorService pool) {
pool.shutdown(); // Disable new tasks from being submitted
pool.shutdownNow(); // Cancel currently executing tasks
- try {
- // Wait a while for existing tasks to terminate
- if (!pool.awaitTermination(60, TimeUnit.SECONDS))
- SolrException.log(log, "Executor still has running tasks.");
- } catch (InterruptedException ie) {
- // (Re-)Cancel if current thread also interrupted
- pool.shutdownNow();
+ boolean shutdown = false;
+ while (!shutdown) {
+ try {
+ // Wait a while for existing tasks to terminate
+ shutdown = pool.awaitTermination(5, TimeUnit.SECONDS);
+ } catch (InterruptedException ie) {
+ // Preserve interrupt status
+ Thread.currentThread().interrupt();
+ }
+ if (!shutdown) {
+ pool.shutdownNow(); // Cancel currently executing tasks
+ }
+ }
+ }
+
+ public static void shutdownAndAwaitTermination(ExecutorService pool) {
+ pool.shutdown(); // Disable new tasks from being submitted
+ boolean shutdown = false;
+ while (!shutdown) {
try {
- if (!pool.awaitTermination(60, TimeUnit.SECONDS))
- SolrException.log(log, "Executor still has running tasks.");
- } catch (InterruptedException e) {
-
+ // Wait a while for existing tasks to terminate
+ shutdown = pool.awaitTermination(60, TimeUnit.SECONDS);
+ } catch (InterruptedException ie) {
+ // Preserve interrupt status
+ Thread.currentThread().interrupt();
+ }
+ if (!shutdown) {
+ pool.shutdownNow(); // Cancel currently executing tasks
}
- // Preserve interrupt status
- Thread.currentThread().interrupt();
}
}
}
Modified: lucene/dev/branches/branch_4x/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java?rev=1384937&r1=1384936&r2=1384937&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java (original)
+++ lucene/dev/branches/branch_4x/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java Fri Sep 14 21:49:19 2012
@@ -242,7 +242,7 @@ public abstract class AbstractFullDistri
server.getLbServer().getHttpClient().getParams()
.setParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, 5000);
server.getLbServer().getHttpClient().getParams()
- .setParameter(CoreConnectionPNames.SO_TIMEOUT, 40000);
+ .setParameter(CoreConnectionPNames.SO_TIMEOUT, 20000);
cloudClient = server;
} catch (MalformedURLException e) {
throw new RuntimeException(e);
@@ -891,7 +891,7 @@ public abstract class AbstractFullDistri
cnt += results;
break;
}
- } catch (SolrServerException e) {
+ } catch (Exception e) {
// if we have a problem, try the next one
if (i == times - 1) {
throw e;
Modified: lucene/dev/branches/branch_4x/solr/test-framework/src/java/org/apache/solr/cloud/ChaosMonkey.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/test-framework/src/java/org/apache/solr/cloud/ChaosMonkey.java?rev=1384937&r1=1384936&r2=1384937&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/test-framework/src/java/org/apache/solr/cloud/ChaosMonkey.java (original)
+++ lucene/dev/branches/branch_4x/solr/test-framework/src/java/org/apache/solr/cloud/ChaosMonkey.java Fri Sep 14 21:49:19 2012
@@ -32,6 +32,7 @@ import org.apache.solr.common.cloud.Solr
import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.core.CoreContainer;
+import org.apache.solr.core.SolrCore;
import org.apache.solr.servlet.SolrDispatchFilter;
import org.apache.zookeeper.KeeperException;
import org.eclipse.jetty.servlet.FilterHolder;
@@ -43,13 +44,16 @@ import org.slf4j.LoggerFactory;
*
* It can also run in a background thread and start and stop jetties
* randomly.
- *
+ * TODO: expire multiple sessions / connectionloss at once
+ * TODO: kill multiple jetties at once
+ * TODO: ? add random headhunter mode that always kills the leader
+ * TODO: chaosmonkey should be able to do cluster stop/start tests
*/
public class ChaosMonkey {
private static Logger log = LoggerFactory.getLogger(ChaosMonkey.class);
- private static final int CONLOSS_PERCENT = 3; //30%
- private static final int EXPIRE_PERCENT = 4; //40%
+ private static final int CONLOSS_PERCENT = 10; // 0 - 10 = 0 - 100%
+ private static final int EXPIRE_PERCENT = 10; // 0 - 10 = 0 - 100%
private Map<String,List<CloudJettyRunner>> shardToJetty;
private ZkTestServer zkServer;
@@ -79,22 +83,50 @@ public class ChaosMonkey {
this.zkStateReader = zkStateReader;
this.collection = collection;
Random random = LuceneTestCase.random();
- expireSessions = random.nextBoolean();
- causeConnectionLoss = random.nextBoolean();
+ expireSessions = true; //= random.nextBoolean();
+
+ causeConnectionLoss = true;//= random.nextBoolean();
monkeyLog("init - expire sessions:" + expireSessions
+ " cause connection loss:" + causeConnectionLoss);
}
- public void expireSession(JettySolrRunner jetty) {
+ // TODO: expire all clients at once?
+ public void expireSession(final JettySolrRunner jetty) {
monkeyLog("expire session for " + jetty.getLocalPort() + " !");
- SolrDispatchFilter solrDispatchFilter = (SolrDispatchFilter) jetty.getDispatchFilter().getFilter();
+
+ SolrDispatchFilter solrDispatchFilter = (SolrDispatchFilter) jetty
+ .getDispatchFilter().getFilter();
if (solrDispatchFilter != null) {
CoreContainer cores = solrDispatchFilter.getCores();
if (cores != null) {
- long sessionId = cores.getZkController().getZkClient().getSolrZooKeeper().getSessionId();
- zkServer.expire(sessionId);
+ causeConnectionLoss(jetty, cores.getZkController().getClientTimeout() + 200);
}
}
+
+
+// Thread thread = new Thread() {
+// {
+// setDaemon(true);
+// }
+// public void run() {
+// SolrDispatchFilter solrDispatchFilter = (SolrDispatchFilter) jetty.getDispatchFilter().getFilter();
+// if (solrDispatchFilter != null) {
+// CoreContainer cores = solrDispatchFilter.getCores();
+// if (cores != null) {
+// try {
+// Thread.sleep(ZkTestServer.TICK_TIME * 2 + 800);
+// } catch (InterruptedException e) {
+// // we act as only connection loss
+// return;
+// }
+// long sessionId = cores.getZkController().getZkClient().getSolrZooKeeper().getSessionId();
+// zkServer.expire(sessionId);
+// }
+// }
+// }
+// };
+// thread.start();
+
}
public void expireRandomSession() throws KeeperException, InterruptedException {
@@ -119,6 +151,10 @@ public class ChaosMonkey {
}
private void causeConnectionLoss(JettySolrRunner jetty) {
+ causeConnectionLoss(jetty, ZkTestServer.TICK_TIME * 2 + 200);
+ }
+
+ private void causeConnectionLoss(JettySolrRunner jetty, int pauseTime) {
SolrDispatchFilter solrDispatchFilter = (SolrDispatchFilter) jetty
.getDispatchFilter().getFilter();
if (solrDispatchFilter != null) {
@@ -126,7 +162,7 @@ public class ChaosMonkey {
if (cores != null) {
SolrZkClient zkClient = cores.getZkController().getZkClient();
// must be at least double tick time...
- zkClient.getSolrZooKeeper().pauseCnxn(ZkTestServer.TICK_TIME * 2 + 200);
+ zkClient.getSolrZooKeeper().pauseCnxn(pauseTime);
}
}
}
@@ -291,6 +327,7 @@ public class ChaosMonkey {
}
}
+ // TODO: stale state makes this a tough call
if (numActive < 2) {
// we cannot kill anyone
monkeyLog("only one active node in shard - monkey cannot kill :(");
@@ -308,8 +345,44 @@ public class ChaosMonkey {
int index = random.nextInt(jetties.size());
cjetty = jetties.get(index);
- ZkNodeProps leader = zkStateReader.getLeaderProps(collection, slice);
- boolean isLeader = leader.getStr(ZkStateReader.NODE_NAME_PROP).equals(jetties.get(index).nodeName);
+ ZkNodeProps leader = null;
+ try {
+ leader = zkStateReader.getLeaderProps(collection, slice);
+ } catch (Throwable t) {
+ log.error("Could not get leader", t);
+ return null;
+ }
+
+ FilterHolder fh = cjetty.jetty.getDispatchFilter();
+ if (fh == null) {
+ monkeyLog("selected jetty not running correctly - skip");
+ return null;
+ }
+ SolrDispatchFilter df = ((SolrDispatchFilter) fh.getFilter());
+ if (df == null) {
+ monkeyLog("selected jetty not running correctly - skip");
+ return null;
+ }
+ CoreContainer cores = df.getCores();
+ if (cores == null) {
+ monkeyLog("selected jetty not running correctly - skip");
+ return null;
+ }
+ SolrCore core = cores.getCore(leader.getStr(ZkStateReader.CORE_NAME_PROP));
+ if (core == null) {
+ monkeyLog("selected jetty not running correctly - skip");
+ return null;
+ }
+ // cluster state can be stale - also go by our 'near real-time' is leader prop
+ boolean rtIsLeader;
+ try {
+ rtIsLeader = core.getCoreDescriptor().getCloudDescriptor().isLeader();
+ } finally {
+ core.close();
+ }
+
+ boolean isLeader = leader.getStr(ZkStateReader.NODE_NAME_PROP).equals(jetties.get(index).nodeName)
+ || rtIsLeader;
if (!aggressivelyKillLeaders && isLeader) {
// we don't kill leaders...
monkeyLog("abort! I don't kill leaders");
@@ -343,7 +416,7 @@ public class ChaosMonkey {
// synchronously starts and stops shards randomly, unless there is only one
// active shard up for a slice or if there is one active and others recovering
- public void startTheMonkey(boolean killLeaders, final int roundPause) {
+ public void startTheMonkey(boolean killLeaders, final int roundPauseUpperLimit) {
monkeyLog("starting");
this.aggressivelyKillLeaders = killLeaders;
startTime = System.currentTimeMillis();
@@ -357,8 +430,9 @@ public class ChaosMonkey {
public void run() {
while (!stop) {
try {
- Thread.sleep(roundPause);
+
Random random = LuceneTestCase.random();
+ Thread.sleep(random.nextInt(roundPauseUpperLimit));
if (random.nextBoolean()) {
if (!deadPool.isEmpty()) {
int index = random.nextInt(deadPool.size());
Modified: lucene/dev/branches/branch_4x/solr/test-framework/src/java/org/apache/solr/cloud/ZkTestServer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/test-framework/src/java/org/apache/solr/cloud/ZkTestServer.java?rev=1384937&r1=1384936&r2=1384937&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/test-framework/src/java/org/apache/solr/cloud/ZkTestServer.java (original)
+++ lucene/dev/branches/branch_4x/solr/test-framework/src/java/org/apache/solr/cloud/ZkTestServer.java Fri Sep 14 21:49:19 2012
@@ -43,7 +43,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class ZkTestServer {
- public static final int TICK_TIME = 3000;
+ public static final int TICK_TIME = 1000;
private static Logger log = LoggerFactory.getLogger(ZkTestServer.class);