You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2012/10/26 07:18:51 UTC
svn commit: r1402393 - in /lucene/dev/trunk/solr: ./
core/src/java/org/apache/solr/cloud/ core/src/java/org/apache/solr/core/
core/src/java/org/apache/solr/handler/admin/
core/src/java/org/apache/solr/update/ core/src/test/org/apache/solr/cloud/
solrj/...
Author: markrmiller
Date: Fri Oct 26 05:18:51 2012
New Revision: 1402393
URL: http://svn.apache.org/viewvc?rev=1402393&view=rev
Log:
SOLR-3995: Recovery may never finish on SolrCore shutdown if the last reference to a SolrCore is closed by the recovery process
SOLR-3994:Create more extensive tests around unloading cores.
Modified:
lucene/dev/trunk/solr/CHANGES.txt
lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/core/CoreContainer.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java
lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java
lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrServer.java
lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/cloud/ClusterState.java
Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1402393&r1=1402392&r2=1402393&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Fri Oct 26 05:18:51 2012
@@ -113,6 +113,8 @@ Bug Fixes
* SOLR-3992: QuerySenderListener doesn't populate document cache.
(Shotaro Kamio, yonik)
+* SOLR-3995: Recovery may never finish on SolrCore shutdown if the last reference to
+ a SolrCore is closed by the recovery process. (Mark Miller)
Other Changes
----------------------
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java?rev=1402393&r1=1402392&r2=1402393&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java Fri Oct 26 05:18:51 2012
@@ -17,6 +17,7 @@ import org.apache.solr.core.SolrCore;
import org.apache.solr.update.UpdateLog;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
+import org.apache.zookeeper.KeeperException.NoNodeException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -38,7 +39,7 @@ import org.slf4j.LoggerFactory;
*/
public abstract class ElectionContext {
-
+ private static Logger log = LoggerFactory.getLogger(ElectionContext.class);
final String electionPath;
final ZkNodeProps leaderProps;
final String id;
@@ -58,7 +59,12 @@ public abstract class ElectionContext {
public void close() {}
public void cancelElection() throws InterruptedException, KeeperException {
- zkClient.delete(leaderSeqPath, -1, true);
+ try {
+ zkClient.delete(leaderSeqPath, -1, true);
+ } catch (NoNodeException e) {
+ // fine
+ log.warn("cancelElection did not find election node to remove");
+ }
}
abstract void runLeaderProcess(boolean weAreReplacement) throws KeeperException, InterruptedException, IOException;
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java?rev=1402393&r1=1402392&r2=1402393&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java Fri Oct 26 05:18:51 2012
@@ -84,6 +84,10 @@ public class LeaderElector {
sortSeqs(seqs);
List<Integer> intSeqs = getSeqs(seqs);
+ if (intSeqs.size() == 0) {
+ log.warn("Our node is no longer in line to be leader");
+ return;
+ }
if (seq <= intSeqs.get(0)) {
// first we delete the node advertising the old leader in case the ephem is still there
try {
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java?rev=1402393&r1=1402392&r2=1402393&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java Fri Oct 26 05:18:51 2012
@@ -313,7 +313,7 @@ public class RecoveryStrategy extends Th
}
}
- while (!successfulRecovery && !isInterrupted()) { // don't use interruption or it will close channels though
+ while (!successfulRecovery && !isInterrupted() && !isClosed()) { // don't use interruption or it will close channels though
try {
CloudDescriptor cloudDesc = core.getCoreDescriptor()
.getCloudDescriptor();
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/core/CoreContainer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/core/CoreContainer.java?rev=1402393&r1=1402392&r2=1402393&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/core/CoreContainer.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/core/CoreContainer.java Fri Oct 26 05:18:51 2012
@@ -1072,7 +1072,9 @@ public class CoreContainer
synchronized(cores) {
SolrCore core = cores.remove( name );
- coreToOrigName.remove(core);
+ if (core != null) {
+ coreToOrigName.remove(core);
+ }
return core;
}
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java?rev=1402393&r1=1402392&r2=1402393&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java Fri Oct 26 05:18:51 2012
@@ -668,7 +668,12 @@ public class CoreAdminHandler extends Re
});
}
} finally {
- if (core != null) core.close();
+ if (core != null) {
+ if (coreContainer.getZkController() != null) {
+ core.getSolrCoreState().cancelRecovery();
+ }
+ core.close();
+ }
}
return coreContainer.isPersistent();
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java?rev=1402393&r1=1402392&r2=1402393&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java Fri Oct 26 05:18:51 2012
@@ -44,7 +44,7 @@ public final class DefaultSolrCoreState
private volatile boolean recoveryRunning;
private RecoveryStrategy recoveryStrat;
- private boolean closed = false;
+ private volatile boolean closed = false;
private RefCounted<IndexWriter> refCntWriter;
Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java?rev=1402393&r1=1402392&r2=1402393&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java Fri Oct 26 05:18:51 2012
@@ -67,7 +67,6 @@ import org.apache.solr.common.cloud.ZkNo
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.params.CollectionParams.CollectionAction;
import org.apache.solr.common.params.CommonParams;
-import org.apache.solr.common.params.CoreAdminParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.UpdateParams;
import org.apache.solr.common.util.NamedList;
@@ -322,19 +321,19 @@ public class BasicDistributedZkTest exte
// would be better if these where all separate tests - but much, much
// slower
-// doOptimisticLockingAndUpdating();
-// testMultipleCollections();
-// testANewCollectionInOneInstance();
-// testSearchByCollectionName();
-// testANewCollectionInOneInstanceWithManualShardAssignement();
-// testNumberOfCommitsWithCommitAfterAdd();
-//
-// testUpdateProcessorsRunOnlyOnce("distrib-dup-test-chain-explicit");
-// testUpdateProcessorsRunOnlyOnce("distrib-dup-test-chain-implicit");
-//
-// testCollectionsAPI();
+ doOptimisticLockingAndUpdating();
+ testMultipleCollections();
+ testANewCollectionInOneInstance();
+ testSearchByCollectionName();
+ testANewCollectionInOneInstanceWithManualShardAssignement();
+ testNumberOfCommitsWithCommitAfterAdd();
+
+ testUpdateProcessorsRunOnlyOnce("distrib-dup-test-chain-explicit");
+ testUpdateProcessorsRunOnlyOnce("distrib-dup-test-chain-implicit");
+
+ testCollectionsAPI();
testCoreUnloadAndLeaders();
-
+ testUnloadLotsOfCores();
// Thread.sleep(10000000000L);
if (DEBUG) {
super.printLayout();
@@ -412,8 +411,6 @@ public class BasicDistributedZkTest exte
createCmd.setDataDir(core3dataDir);
server.request(createCmd);
- Thread.sleep(1000);
-
waitForRecoveriesToFinish("unloadcollection", zkStateReader, false);
// so that we start with some versions when we reload...
@@ -474,8 +471,6 @@ public class BasicDistributedZkTest exte
createCmd.setDataDir(core4dataDir);
server.request(createCmd);
- Thread.sleep(1000);
-
waitForRecoveriesToFinish("unloadcollection", zkStateReader, false);
// unload the leader again
@@ -509,9 +504,7 @@ public class BasicDistributedZkTest exte
createCmd.setCollection("unloadcollection");
createCmd.setDataDir(core1DataDir);
server.request(createCmd);
-
- Thread.sleep(1000);
-
+
waitForRecoveriesToFinish("unloadcollection", zkStateReader, false);
@@ -538,7 +531,65 @@ public class BasicDistributedZkTest exte
assertEquals(found3, found4);
}
-
+
+ private void testUnloadLotsOfCores() throws Exception {
+ SolrServer client = clients.get(2);
+ String url3 = getBaseUrl(client);
+ final HttpSolrServer server = new HttpSolrServer(url3);
+
+ ThreadPoolExecutor executor = new ThreadPoolExecutor(0, Integer.MAX_VALUE,
+ 5, TimeUnit.SECONDS, new SynchronousQueue<Runnable>(),
+ new DefaultSolrThreadFactory("testExecutor"));
+ int cnt = atLeast(6);
+ for (int i = 0; i < cnt; i++) {
+ final int freezeI = i;
+ executor.execute(new Runnable() {
+
+ @Override
+ public void run() {
+ Create createCmd = new Create();
+ createCmd.setCoreName("multiunload" + freezeI);
+ createCmd.setCollection("multiunload");
+ String core3dataDir = dataDir.getAbsolutePath() + File.separator
+ + System.currentTimeMillis() + "unloadcollection" + "_3n" + freezeI;
+ createCmd.setDataDir(core3dataDir);
+ try {
+ server.request(createCmd);
+ } catch (SolrServerException e) {
+ throw new RuntimeException(e);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ });
+ }
+ executor.shutdown();
+ executor.awaitTermination(120, TimeUnit.SECONDS);
+ executor = new ThreadPoolExecutor(0, Integer.MAX_VALUE, 5,
+ TimeUnit.SECONDS, new SynchronousQueue<Runnable>(),
+ new DefaultSolrThreadFactory("testExecutor"));
+ for (int j = 0; j < cnt; j++) {
+ final int freezeJ = j;
+ executor.execute(new Runnable() {
+ @Override
+ public void run() {
+ Unload unloadCmd = new Unload(true);
+ unloadCmd.setCoreName("multiunload" + freezeJ);
+ try {
+ server.request(unloadCmd);
+ } catch (SolrServerException e) {
+ throw new RuntimeException(e);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ });
+ Thread.sleep(random().nextInt(50));
+ }
+ executor.shutdown();
+ executor.awaitTermination(120, TimeUnit.SECONDS);
+ }
private String getBaseUrl(SolrServer client) {
String url2 = ((HttpSolrServer) client).getBaseURL()
@@ -794,7 +845,7 @@ public class BasicDistributedZkTest exte
}
Thread.sleep(50);
}
- printLayout();
+
fail("Could not find the new collection - " + exp.code() + " : " + collectionClient.getBaseURL());
}
Modified: lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrServer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrServer.java?rev=1402393&r1=1402392&r2=1402393&view=diff
==============================================================================
--- lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrServer.java (original)
+++ lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrServer.java Fri Oct 26 05:18:51 2012
@@ -211,7 +211,6 @@ public class CloudSolrServer extends Sol
if ((sendToLeaders && leaderUrlList == null) || (!sendToLeaders
&& urlList == null)
|| clusterState.hashCode() != this.lastClusterStateHashCode) {
- System.out.println("build a new map for " + collection);
// build a map of unique nodes
// TODO: allow filtering by group, role, etc
Map<String,ZkNodeProps> nodes = new HashMap<String,ZkNodeProps>();
Modified: lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/cloud/ClusterState.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/cloud/ClusterState.java?rev=1402393&r1=1402392&r2=1402393&view=diff
==============================================================================
--- lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/cloud/ClusterState.java (original)
+++ lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/cloud/ClusterState.java Fri Oct 26 05:18:51 2012
@@ -117,6 +117,7 @@ public class ClusterState implements JSO
*/
public Replica getShardProps(final String collection, final String coreNodeName) {
Map<String, Slice> slices = getSlices(collection);
+ if (slices == null) return null;
for(Slice slice: slices.values()) {
if(slice.getReplicasMap().get(coreNodeName)!=null) {
return slice.getReplicasMap().get(coreNodeName);