You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by va...@apache.org on 2018/04/13 22:38:34 UTC

lucene-solr:master: SOLR-11724: Cdcr bootstrapping should ensure that non-leader replicas should sync with the leader

Repository: lucene-solr
Updated Branches:
  refs/heads/master e4eb8a870 -> 93f9a65b1


SOLR-11724: Cdcr bootstrapping should ensure that non-leader replicas should sync with the leader


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/93f9a65b
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/93f9a65b
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/93f9a65b

Branch: refs/heads/master
Commit: 93f9a65b1c8aa460489fdce50ed84d18168b53ef
Parents: e4eb8a8
Author: Varun Thacker <va...@apache.org>
Authored: Fri Apr 13 12:07:42 2018 -0700
Committer: Varun Thacker <va...@apache.org>
Committed: Fri Apr 13 15:38:26 2018 -0700

----------------------------------------------------------------------
 solr/CHANGES.txt                                |  3 ++
 .../solr/handler/CdcrReplicatorManager.java     | 29 +++++++++++++++
 .../solr/cloud/cdcr/CdcrBootstrapTest.java      |  5 +--
 .../apache/solr/cloud/cdcr/CdcrTestsUtil.java   | 38 ++++++++++++++++++++
 4 files changed, 73 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/93f9a65b/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index de748be..b39fd6f 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -156,6 +156,9 @@ Bug Fixes
 * SOLR-12150: Fix a test bug in CdcrBidirectionalTest.testBiDir (Steve Rowe, Amrit Sarkar via Varun Thacker)
 
 * SOLR-10513: ConjunctionSolrSpellChecker did not work with LuceneLevenshteinDistance (Amrit Sarkar via James Dyer)
+
+* SOLR-11724: Cdcr bootstrapping should ensure that non-leader replicas should sync with the leader
+  (Amrit Sarkar, Varun Thacker)
  
 Optimizations
 ----------------------

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/93f9a65b/solr/core/src/java/org/apache/solr/handler/CdcrReplicatorManager.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/CdcrReplicatorManager.java b/solr/core/src/java/org/apache/solr/handler/CdcrReplicatorManager.java
index abd6ed7..8ec3c8b 100644
--- a/solr/core/src/java/org/apache/solr/handler/CdcrReplicatorManager.java
+++ b/solr/core/src/java/org/apache/solr/handler/CdcrReplicatorManager.java
@@ -20,6 +20,7 @@ import java.io.Closeable;
 import java.io.IOException;
 import java.lang.invoke.MethodHandles;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
 import java.util.Locale;
@@ -36,11 +37,14 @@ import org.apache.solr.client.solrj.SolrServerException;
 import org.apache.solr.client.solrj.impl.CloudSolrClient;
 import org.apache.solr.client.solrj.impl.CloudSolrClient.Builder;
 import org.apache.solr.client.solrj.impl.HttpSolrClient;
+import org.apache.solr.client.solrj.request.CoreAdminRequest;
 import org.apache.solr.client.solrj.request.QueryRequest;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.cloud.Replica;
+import org.apache.solr.common.cloud.Slice;
 import org.apache.solr.common.cloud.ZkCoreNodeProps;
 import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.CoreAdminParams;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.ExecutorUtil;
@@ -298,6 +302,8 @@ class CdcrReplicatorManager implements CdcrStateManager.CdcrStateObserver {
                 checkpoint, collectionName, shard);
             CdcrUpdateLog.CdcrLogReader reader1 = ulog.newLogReader();
             reader1.seek(checkpoint);
+            // issue asynchronous request_recovery to the follower nodes of the shards of target collection
+            sendRequestRecoveryToFollowers(state);
             success = true;
             break;
           } else if (status == BootstrapStatus.FAILED) {
@@ -411,6 +417,29 @@ class CdcrReplicatorManager implements CdcrStateManager.CdcrStateObserver {
     return client.request(request);
   }
 
+  private void sendRequestRecoveryToFollowers(CdcrReplicatorState state) throws SolrServerException, IOException {
+    Collection<Slice> slices = state.getClient().getZkStateReader().getClusterState().getCollection(state.getTargetCollection()).getActiveSlices();
+    for (Slice slice : slices) {
+      Collection<Replica> replicas = slice.getReplicas();
+      for (Replica replica : replicas) {
+        if (slice.getLeader().getCoreName().equals(replica.getCoreName())) {
+          continue; // no need to request recovery for leader
+        }
+        sendRequestRecoveryToFollower(state.getClient(), replica.getCoreName());
+        log.info("RequestRecovery cmd is issued by core: " + replica.getCoreName() + " of shard: " + slice.getName() +
+            "for target: " + state.getTargetCollection());
+      }
+    }
+  }
+
+  private NamedList sendRequestRecoveryToFollower(SolrClient client, String coreName) throws SolrServerException, IOException {
+    CoreAdminRequest.RequestRecovery recoverRequestCmd = new CoreAdminRequest.RequestRecovery();
+    recoverRequestCmd.setAction(CoreAdminParams.CoreAdminAction.REQUESTRECOVERY);
+    recoverRequestCmd.setCoreName(coreName);
+    return client.request(recoverRequestCmd);
+  }
+
+
   private enum BootstrapStatus  {
     SUBMITTED,
     RUNNING,

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/93f9a65b/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrBootstrapTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrBootstrapTest.java b/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrBootstrapTest.java
index cae9855..543bd5c 100644
--- a/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrBootstrapTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrBootstrapTest.java
@@ -105,7 +105,8 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 {
 
         // setup the target cluster
         target.uploadConfigSet(configset("cdcr-target"), "cdcr-target");
-        CollectionAdminRequest.createCollection("cdcr-target", "cdcr-target", 1, 1)
+        CollectionAdminRequest.createCollection("cdcr-target", "cdcr-target", 1, 2)
+            .setMaxShardsPerNode(2)
             .process(target.getSolrClient());
         CloudSolrClient targetSolrClient = target.getSolrClient();
         targetSolrClient.setDefaultCollection("cdcr-target");
@@ -118,6 +119,7 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 {
         log.info("Cdcr queue response: " + response.getResponse());
         long foundDocs = CdcrTestsUtil.waitForClusterToSync(numDocs, targetSolrClient);
         assertEquals("Document mismatch on target after sync", numDocs, foundDocs);
+        assertTrue(CdcrTestsUtil.assertShardInSync("cdcr-target", "shard1", targetSolrClient)); // with more than 1 replica
 
         params = new ModifiableSolrParams();
         params.set(CommonParams.ACTION, CdcrParams.CdcrAction.COLLECTIONCHECKPOINT.toString());
@@ -300,5 +302,4 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 {
       target.shutdown();
     }
   }
-
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/93f9a65b/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrTestsUtil.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrTestsUtil.java b/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrTestsUtil.java
index 99aa471..6a186fd 100644
--- a/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrTestsUtil.java
+++ b/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrTestsUtil.java
@@ -26,11 +26,17 @@ import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.client.solrj.SolrQuery;
 import org.apache.solr.client.solrj.SolrServerException;
 import org.apache.solr.client.solrj.impl.CloudSolrClient;
+import org.apache.solr.client.solrj.impl.HttpSolrClient;
 import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.common.cloud.DocCollection;
+import org.apache.solr.common.cloud.Replica;
+import org.apache.solr.common.cloud.Slice;
 import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.TimeSource;
 import org.apache.solr.handler.CdcrParams;
+import org.apache.solr.util.TimeOut;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -108,4 +114,36 @@ public class CdcrTestsUtil extends SolrTestCaseJ4{
     }
     return response != null ? response.getResults().getNumFound() : 0;
   }
+
+  protected static boolean assertShardInSync(String collection, String shard, CloudSolrClient client) throws IOException, SolrServerException {
+    TimeOut waitTimeOut = new TimeOut(30, TimeUnit.SECONDS, TimeSource.NANO_TIME);
+    DocCollection docCollection = client.getZkStateReader().getClusterState().getCollection(collection);
+    Slice correctSlice = null;
+    for (Slice slice : docCollection.getSlices()) {
+      if (shard.equals(slice.getName())) {
+        correctSlice = slice;
+        break;
+      }
+    }
+    assertNotNull(correctSlice);
+
+    long leaderDocCount;
+    try (HttpSolrClient leaderClient = new HttpSolrClient.Builder(correctSlice.getLeader().getCoreUrl()).withHttpClient(client.getHttpClient()).build()) {
+      leaderDocCount = leaderClient.query(new SolrQuery("*:*").setParam("distrib", "false")).getResults().getNumFound();
+    }
+
+    while (!waitTimeOut.hasTimedOut()) {
+      int replicasInSync = 0;
+      for (Replica replica : correctSlice.getReplicas()) {
+        try (HttpSolrClient leaderClient = new HttpSolrClient.Builder(replica.getCoreUrl()).withHttpClient(client.getHttpClient()).build()) {
+          long replicaDocCount = leaderClient.query(new SolrQuery("*:*").setParam("distrib", "false")).getResults().getNumFound();
+          if (replicaDocCount == leaderDocCount) replicasInSync++;
+        }
+      }
+      if (replicasInSync == correctSlice.getReplicas().size()) {
+        return true;
+      }
+    }
+    return false;
+  }
 }