You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by da...@apache.org on 2018/01/10 02:44:58 UTC
lucene-solr:jira/solr-11702: SOLR-11702: Recover
LeaderInitiatedRecoveryOnShardRestartTest
Repository: lucene-solr
Updated Branches:
refs/heads/jira/solr-11702 adf6789da -> 2e26d30f9
SOLR-11702: Recover LeaderInitiatedRecoveryOnShardRestartTest
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/2e26d30f
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/2e26d30f
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/2e26d30f
Branch: refs/heads/jira/solr-11702
Commit: 2e26d30f9c9bf72735dde816effa2eae399a5bd1
Parents: adf6789
Author: Cao Manh Dat <da...@apache.org>
Authored: Wed Jan 10 09:44:37 2018 +0700
Committer: Cao Manh Dat <da...@apache.org>
Committed: Wed Jan 10 09:44:37 2018 +0700
----------------------------------------------------------------------
...aderInitiatedRecoveryOnShardRestartTest.java | 187 +++++++++++++++++++
1 file changed, 187 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2e26d30f/solr/core/src/test/org/apache/solr/cloud/LeaderInitiatedRecoveryOnShardRestartTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/LeaderInitiatedRecoveryOnShardRestartTest.java b/solr/core/src/test/org/apache/solr/cloud/LeaderInitiatedRecoveryOnShardRestartTest.java
new file mode 100644
index 0000000..0bc0394
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/cloud/LeaderInitiatedRecoveryOnShardRestartTest.java
@@ -0,0 +1,187 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.cloud;
+
+import java.lang.invoke.MethodHandles;
+import java.util.Map;
+
+import org.apache.lucene.util.LuceneTestCase.Nightly;
+import org.apache.lucene.util.LuceneTestCase.Slow;
+import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
+import org.apache.solr.client.solrj.SolrQuery;
+import org.apache.solr.client.solrj.embedded.JettySolrRunner;
+import org.apache.solr.client.solrj.impl.HttpSolrClient;
+import org.apache.solr.client.solrj.impl.HttpSolrClient.RemoteSolrException;
+import org.apache.solr.client.solrj.request.QueryRequest;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.cloud.SolrZkClient;
+import org.apache.solr.common.cloud.ZkStateReader;
+import org.apache.solr.common.params.CollectionParams.CollectionAction;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.util.Utils;
+import org.apache.solr.update.processor.DistributedUpdateProcessor.DistribPhase;
+import org.apache.solr.update.processor.DistributingUpdateProcessorFactory;
+import org.apache.zookeeper.KeeperException.NodeExistsException;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@Slow
+@Nightly
+@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-10071")
+//TODO remove this test on SOLR-11812
+public class LeaderInitiatedRecoveryOnShardRestartTest extends AbstractFullDistribZkTestBase {
+ private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+ public LeaderInitiatedRecoveryOnShardRestartTest() throws Exception {
+ super();
+ sliceCount = 1;
+ // we want 3 jetties, but we are using the control jetty as one
+ fixShardCount(2);
+ useFactory("solr.StandardDirectoryFactory");
+ }
+
+ @BeforeClass
+ public static void before() {
+ // we want more realistic leaderVoteWait so raise from
+ // test default of 10s to 30s.
+ System.setProperty("leaderVoteWait", "300000");
+ }
+
+ @AfterClass
+ public static void after() {
+ System.clearProperty("leaderVoteWait");
+ }
+
+ @Test
+ public void testRestartWithAllInLIR() throws Exception {
+
+ // still waiting to be able to properly start with no default collection1,
+ // delete to remove confusion
+ waitForRecoveriesToFinish(false);
+ ModifiableSolrParams params = new ModifiableSolrParams();
+ params.set("action", CollectionAction.DELETE.toString());
+ params.set("name", DEFAULT_COLLECTION);
+ QueryRequest request = new QueryRequest(params);
+ request.setPath("/admin/collections");
+ String baseUrl = ((HttpSolrClient) clients.get(0)).getBaseURL();
+ HttpSolrClient delClient = getHttpSolrClient(baseUrl.substring(0, baseUrl.lastIndexOf("/")));
+ delClient.request(request);
+ delClient.close();
+
+ String testCollectionName = "all_in_lir";
+ String shardId = "shard1";
+ createCollection(testCollectionName, "conf1", 1, 3, 1);
+
+ waitForRecoveriesToFinish(testCollectionName, false);
+
+ cloudClient.setDefaultCollection(testCollectionName);
+
+ Map<String,Object> stateObj = Utils.makeMap();
+ stateObj.put(ZkStateReader.STATE_PROP, "down");
+ stateObj.put("createdByNodeName", "test");
+ stateObj.put("createdByCoreNodeName", "test");
+
+ byte[] znodeData = Utils.toJSON(stateObj);
+
+ SolrZkClient zkClient = cloudClient.getZkStateReader().getZkClient();
+ zkClient.makePath("/collections/" + testCollectionName + "/leader_initiated_recovery/" + shardId + "/core_node1", znodeData, true);
+ zkClient.makePath("/collections/" + testCollectionName + "/leader_initiated_recovery/" + shardId + "/core_node2", znodeData, true);
+ zkClient.makePath("/collections/" + testCollectionName + "/leader_initiated_recovery/" + shardId + "/core_node3", znodeData, true);
+
+ // everyone gets a couple docs so that everyone has tlog entries
+ // and won't become leader simply because they have no tlog versions
+ SolrInputDocument doc = new SolrInputDocument();
+ addFields(doc, "id", "1");
+ SolrInputDocument doc2 = new SolrInputDocument();
+ addFields(doc2, "id", "2");
+ cloudClient.add(doc);
+ cloudClient.add(doc2);
+
+ cloudClient.commit();
+
+ assertEquals("We just added 2 docs, we should be able to find them", 2, cloudClient.query(new SolrQuery("*:*")).getResults().getNumFound());
+
+ // randomly add too many docs to peer sync to one replica so that only one random replica is the valid leader
+ // the versions don't matter, they just have to be higher than what the last 2 docs got
+ HttpSolrClient client = (HttpSolrClient) clients.get(random().nextInt(clients.size()));
+ client.setBaseURL(client.getBaseURL().substring(0, client.getBaseURL().lastIndexOf("/")) + "/" + testCollectionName);
+ params = new ModifiableSolrParams();
+ params.set(DistributingUpdateProcessorFactory.DISTRIB_UPDATE_PARAM, DistribPhase.FROMLEADER.toString());
+
+ try {
+ for (int i = 0; i < 101; i++) {
+ add(client, params, sdoc("id", 3 + i, "_version_", Long.MAX_VALUE - 1 - i));
+ }
+ } catch (RemoteSolrException e) {
+ // if we got a conflict it's because we tried to send a versioned doc to the leader,
+ // resend without version
+ if (e.getMessage().contains("conflict")) {
+ for (int i = 0; i < 101; i++) {
+ add(client, params, sdoc("id", 3 + i));
+ }
+ }
+ }
+
+ client.commit();
+
+ for (JettySolrRunner jetty : jettys) {
+ ChaosMonkey.stop(jetty);
+ }
+ ChaosMonkey.stop(controlJetty);
+
+ Thread.sleep(10000);
+
+ log.info("Start back up");
+
+ for (JettySolrRunner jetty : jettys) {
+ ChaosMonkey.start(jetty);
+ }
+ ChaosMonkey.start(controlJetty);
+
+ // recoveries will not finish without SOLR-8075 and SOLR-8367
+ waitForRecoveriesToFinish(testCollectionName, true);
+
+ // now expire each node
+ try {
+ zkClient.makePath("/collections/" + testCollectionName + "/leader_initiated_recovery/" + shardId + "/core_node1", znodeData, true);
+ } catch (NodeExistsException e) {
+
+ }
+ try {
+ zkClient.makePath("/collections/" + testCollectionName + "/leader_initiated_recovery/" + shardId + "/core_node2", znodeData, true);
+ } catch (NodeExistsException e) {
+
+ }
+ try {
+ zkClient.makePath("/collections/" + testCollectionName + "/leader_initiated_recovery/" + shardId + "/core_node3", znodeData, true);
+ } catch (NodeExistsException e) {
+
+ }
+
+ for (JettySolrRunner jetty : jettys) {
+ chaosMonkey.expireSession(jetty);
+ }
+
+ Thread.sleep(2000);
+
+ // recoveries will not finish without SOLR-8075 and SOLR-8367
+ waitForRecoveriesToFinish(testCollectionName, true);
+ }
+}