You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2018/04/02 07:38:07 UTC
[20/34] lucene-solr:jira/solr-12095: SOLR-12066: Cleanup deleted core
when node start
SOLR-12066: Cleanup deleted core when node start
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/35bfe897
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/35bfe897
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/35bfe897
Branch: refs/heads/jira/solr-12095
Commit: 35bfe897901f1b51bce654b49aecd9560bfa797f
Parents: d483108
Author: Cao Manh Dat <da...@apache.org>
Authored: Fri Mar 30 20:11:39 2018 +0700
Committer: Cao Manh Dat <da...@apache.org>
Committed: Fri Mar 30 20:11:39 2018 +0700
----------------------------------------------------------------------
solr/CHANGES.txt | 2 ++
.../org/apache/solr/cloud/ZkController.java | 22 ++++++++++---
.../org/apache/solr/core/CoreContainer.java | 7 ++++-
.../apache/solr/cloud/DeleteReplicaTest.java | 33 ++++++++++++++++++++
4 files changed, 59 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/35bfe897/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 5854e0f..12bc25a 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -110,6 +110,8 @@ Optimizations
* SOLR-12146: LIR should skip deleted replicas (Cao Manh Dat)
+* SOLR-12066: Cleanup deleted core when node start (Cao Manh Dat)
+
Other Changes
----------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/35bfe897/solr/core/src/java/org/apache/solr/cloud/ZkController.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
index c0ddd26..872a8b9 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
@@ -1661,6 +1661,9 @@ public class ZkController {
Thread.currentThread().interrupt();
log.error("", e);
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
+ } catch (NotInClusterStateException e) {
+ // make the stack trace less verbose
+ throw e;
} catch (Exception e) {
log.error("", e);
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "", e);
@@ -1688,7 +1691,7 @@ public class ZkController {
return true;
}
- private void checkStateInZk(CoreDescriptor cd) throws InterruptedException {
+ private void checkStateInZk(CoreDescriptor cd) throws InterruptedException, NotInClusterStateException {
if (!Overseer.isLegacy(zkStateReader)) {
CloudDescriptor cloudDesc = cd.getCloudDescriptor();
String nodeName = cloudDesc.getCoreNodeName();
@@ -1722,7 +1725,8 @@ public class ZkController {
}
Replica replica = slice.getReplica(coreNodeName);
if (replica == null) {
- errorMessage.set("coreNodeName " + coreNodeName + " does not exist in shard " + cloudDesc.getShardId());
+ errorMessage.set("coreNodeName " + coreNodeName + " does not exist in shard " + cloudDesc.getShardId() +
+ ", ignore the exception if the replica was deleted");
return false;
}
return true;
@@ -1730,8 +1734,9 @@ public class ZkController {
} catch (TimeoutException e) {
String error = errorMessage.get();
if (error == null)
- error = "Replica " + coreNodeName + " is not present in cluster state";
- throw new SolrException(ErrorCode.SERVER_ERROR, error + ": " + collectionState.get());
+ error = "coreNodeName " + coreNodeName + " does not exist in shard " + cloudDesc.getShardId() +
+ ", ignore the exception if the replica was deleted";
+ throw new NotInClusterStateException(ErrorCode.SERVER_ERROR, error);
}
}
}
@@ -2711,6 +2716,15 @@ public class ZkController {
}
}
+ /**
+ * Thrown during pre register process if the replica is not present in clusterstate
+ */
+ public static class NotInClusterStateException extends SolrException {
+ public NotInClusterStateException(ErrorCode code, String msg) {
+ super(code, msg);
+ }
+ }
+
public boolean checkIfCoreNodeNameAlreadyExists(CoreDescriptor dcore) {
DocCollection collection = zkStateReader.getClusterState().getCollectionOrNull(dcore.getCollectionName());
if (collection != null) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/35bfe897/solr/core/src/java/org/apache/solr/core/CoreContainer.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/core/CoreContainer.java b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
index b667bc0..74b718c 100644
--- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java
+++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
@@ -677,7 +677,7 @@ public class CoreContainer {
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
} catch (ExecutionException e) {
- log.error("Error waiting for SolrCore to be created", e);
+ log.error("Error waiting for SolrCore to be loaded on startup", e.getCause());
}
}
} finally {
@@ -1063,6 +1063,11 @@ public class CoreContainer {
return core;
} catch (Exception e) {
coreInitFailures.put(dcore.getName(), new CoreLoadFailure(dcore, e));
+ if (e instanceof ZkController.NotInClusterStateException && !newCollection) {
+ // this mostly happen when the core is deleted when this node is down
+ unload(dcore.getName(), true, true, true);
+ throw e;
+ }
solrCores.removeCoreDescriptor(dcore);
final SolrException solrException = new SolrException(ErrorCode.SERVER_ERROR, "Unable to create core [" + dcore.getName() + "]", e);
if(core != null && !core.isClosed())
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/35bfe897/solr/core/src/test/org/apache/solr/cloud/DeleteReplicaTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/DeleteReplicaTest.java b/solr/core/src/test/org/apache/solr/cloud/DeleteReplicaTest.java
index 3208ebd..1a021d7 100644
--- a/solr/core/src/test/org/apache/solr/cloud/DeleteReplicaTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/DeleteReplicaTest.java
@@ -41,7 +41,10 @@ import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.util.TimeSource;
import org.apache.solr.common.util.Utils;
+import org.apache.solr.core.CoreDescriptor;
+import org.apache.solr.core.SolrCore;
import org.apache.solr.core.ZkContainer;
+import org.apache.solr.util.FileUtils;
import org.apache.solr.util.TimeOut;
import org.apache.zookeeper.KeeperException;
import org.junit.BeforeClass;
@@ -153,6 +156,36 @@ public class DeleteReplicaTest extends SolrCloudTestCase {
}
@Test
+ public void deleteReplicaOnDownNode() throws Exception {
+ final String collectionName = "deleteReplicaOnDownNode";
+ CollectionAdminRequest.createCollection(collectionName, "conf", 1, 2).process(cluster.getSolrClient());
+ waitForState("Expected one shards with two replicas", collectionName, clusterShape(1, 2));
+
+ Slice shard = getCollectionState(collectionName).getSlice("shard1");
+ Replica replica = shard.getReplicas(rep -> !rep.getName().equals(shard.getLeader().getName())).get(0);
+ JettySolrRunner replicaJetty = getJettyForReplica(replica);
+ CoreDescriptor replicaCd;
+ try (SolrCore core = replicaJetty.getCoreContainer().getCore(replica.getCoreName())) {
+ replicaCd = core.getCoreDescriptor();
+ }
+ assertNotNull("Expected core descriptor of "+ replica.getName() + " is not null",replicaCd);
+ String replicaJettyNodeName = replicaJetty.getNodeName();
+
+ // shutdown node of a replica
+ replicaJetty.stop();
+ waitForNodeLeave(replicaJettyNodeName);
+ waitForState("Expected one shards with one replica", collectionName, clusterShape(1, 1));
+ CollectionAdminRequest.deleteReplica(collectionName, shard.getName(), replica.getName()).process(cluster.getSolrClient());
+ waitForState("Expected only one replica left", collectionName, (liveNodes, collectionState) -> collectionState.getReplicas().size() == 1);
+
+ // restart the test and make sure the data get deleted
+ replicaJetty.start();
+ TimeOut timeOut = new TimeOut(60, TimeUnit.SECONDS, TimeSource.NANO_TIME);
+ timeOut.waitFor("Expected data dir and instance dir of " + replica.getName() + " is deleted", ()
+ -> !Files.exists(replicaCd.getInstanceDir()) && !FileUtils.fileExists(replicaCd.getDataDir()));
+ }
+
+ @Test
public void deleteReplicaByCountForAllShards() throws Exception {
final String collectionName = "deleteByCountNew";