You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by va...@apache.org on 2018/03/16 21:04:49 UTC

lucene-solr:branch_7x: SOLR-12083: Fix PeerSync, Leader Election failures and CDCR checkpoint inconsistencies on a cluster running CDCR

Repository: lucene-solr
Updated Branches:
  refs/heads/branch_7x ed3ff7e20 -> 033afbfbb


SOLR-12083: Fix PeerSync, Leader Election failures and CDCR checkpoint inconsistencies on a cluster running CDCR

(cherry picked from commit c4d0223)


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/033afbfb
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/033afbfb
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/033afbfb

Branch: refs/heads/branch_7x
Commit: 033afbfbbbbaad0fc0b0a48967765cddf9e2b455
Parents: ed3ff7e
Author: Varun Thacker <va...@apache.org>
Authored: Fri Mar 16 12:57:10 2018 -0700
Committer: Varun Thacker <va...@apache.org>
Committed: Fri Mar 16 14:04:34 2018 -0700

----------------------------------------------------------------------
 solr/CHANGES.txt                                |  2 +
 .../java/org/apache/solr/update/UpdateLog.java  |  9 +++--
 .../solr/cloud/cdcr/CdcrRequestHandlerTest.java | 39 ++++++++++++++++----
 .../org/apache/solr/search/TestRecovery.java    |  1 +
 .../apache/solr/search/TestStressRecovery.java  |  1 +
 5 files changed, 42 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/033afbfb/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index b2514ef..a4233fd 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -307,6 +307,8 @@ Bug Fixes
 * SOLR-12083: Fix RealTime GET to work on a cluster running CDCR when using Solr's in-place updates
   (Amrit Sarkar, Varun Thacker)
 
+* SOLR-12083: Fix PeerSync, Leader Election failures and CDCR checkpoint inconsistencies on a cluster running CDCR
+  (Amrit Sarkar, Varun Thacker)
 
 Optimizations
 ----------------------

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/033afbfb/solr/core/src/java/org/apache/solr/update/UpdateLog.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/update/UpdateLog.java b/solr/core/src/java/org/apache/solr/update/UpdateLog.java
index 71670bc..2d6fd98 100644
--- a/solr/core/src/java/org/apache/solr/update/UpdateLog.java
+++ b/solr/core/src/java/org/apache/solr/update/UpdateLog.java
@@ -1431,8 +1431,11 @@ public class UpdateLog implements PluginInfoInitialized, SolrMetricProducer {
                   update.pointer = reader.position();
                   update.version = version;
 
-                  if (oper == UpdateLog.UPDATE_INPLACE && entry.size() == 5) {
-                    update.previousVersion = (Long) entry.get(UpdateLog.PREV_VERSION_IDX);
+                  if (oper == UpdateLog.UPDATE_INPLACE) {
+                    if ((update.log instanceof CdcrTransactionLog && entry.size() == 6) ||
+                        (!(update.log instanceof CdcrTransactionLog) && entry.size() == 5)) {
+                      update.previousVersion = (Long) entry.get(UpdateLog.PREV_VERSION_IDX);
+                    }
                   }
                   updatesForLog.add(update);
                   updates.put(version, update);
@@ -1440,7 +1443,7 @@ public class UpdateLog implements PluginInfoInitialized, SolrMetricProducer {
                   if (oper == UpdateLog.DELETE_BY_QUERY) {
                     deleteByQueryList.add(update);
                   } else if (oper == UpdateLog.DELETE) {
-                    deleteList.add(new DeleteUpdate(version, (byte[])entry.get(entry.size()-1)));
+                    deleteList.add(new DeleteUpdate(version, (byte[])entry.get(2)));
                   }
 
                   break;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/033afbfb/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrRequestHandlerTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrRequestHandlerTest.java b/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrRequestHandlerTest.java
index 237cc58..e12c693 100644
--- a/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrRequestHandlerTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrRequestHandlerTest.java
@@ -16,7 +16,10 @@
  */
 package org.apache.solr.cloud.cdcr;
 
+import java.util.Arrays;
+import com.google.common.collect.ImmutableMap;
 import org.apache.lucene.util.LuceneTestCase.Nightly;
+import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.handler.CdcrParams;
 import org.junit.Test;
@@ -69,7 +72,7 @@ public class CdcrRequestHandlerTest extends BaseCdcrDistributedZkTest {
     NamedList rsp = invokeCdcrAction(shardToLeaderJetty.get(SOURCE_COLLECTION).get(SHARD1), CdcrParams.CdcrAction.COLLECTIONCHECKPOINT);
     assertEquals(-1l, rsp.get(CdcrParams.CHECKPOINT));
 
-    index(SOURCE_COLLECTION, getDoc(id, "a")); // shard 2
+    index(SOURCE_COLLECTION, getDoc(id, "a","test_i_dvo",10)); // shard 2
 
     // only one document indexed in shard 2, the checkpoint must be still -1
     rsp = invokeCdcrAction(shardToLeaderJetty.get(SOURCE_COLLECTION).get(SHARD1), CdcrParams.CdcrAction.COLLECTIONCHECKPOINT);
@@ -97,17 +100,39 @@ public class CdcrRequestHandlerTest extends BaseCdcrDistributedZkTest {
     expected = (Long) invokeCdcrAction(shardToLeaderJetty.get(SOURCE_COLLECTION).get(SHARD1), CdcrParams.CdcrAction.SHARDCHECKPOINT).get(CdcrParams.CHECKPOINT);
     assertEquals(expected, checkpoint2);
 
+    // send a delete by id
+    long pre_op = (Long) invokeCdcrAction(shardToLeaderJetty.get(SOURCE_COLLECTION).get(SHARD2), CdcrParams.CdcrAction.SHARDCHECKPOINT).get(CdcrParams.CHECKPOINT);
+    deleteById(SOURCE_COLLECTION, Arrays.asList(new String[]{"c"})); //shard1
+    // document deleted in shard1, checkpoint should come from shard2
+    rsp = invokeCdcrAction(shardToLeaderJetty.get(SOURCE_COLLECTION).get(SHARD2), CdcrParams.CdcrAction.COLLECTIONCHECKPOINT);
+    long checkpoint3 = (Long) rsp.get(CdcrParams.CHECKPOINT);
+    expected = (Long) invokeCdcrAction(shardToLeaderJetty.get(SOURCE_COLLECTION).get(SHARD2), CdcrParams.CdcrAction.SHARDCHECKPOINT).get(CdcrParams.CHECKPOINT);
+    assertEquals(pre_op, expected);
+    assertEquals(expected, checkpoint3);
+
+    // send a in-place update
+    SolrInputDocument in_place_doc = new SolrInputDocument();
+    in_place_doc.setField(id, "a");
+    in_place_doc.setField("test_i_dvo", ImmutableMap.of("inc", 10)); //shard2
+    index(SOURCE_COLLECTION, in_place_doc);
+    // document updated in shard2, checkpoint should come from shard1
+    rsp = invokeCdcrAction(shardToLeaderJetty.get(SOURCE_COLLECTION).get(SHARD1), CdcrParams.CdcrAction.COLLECTIONCHECKPOINT);
+    long checkpoint4 = (Long) rsp.get(CdcrParams.CHECKPOINT);
+    expected = (Long) invokeCdcrAction(shardToLeaderJetty.get(SOURCE_COLLECTION).get(SHARD1), CdcrParams.CdcrAction.SHARDCHECKPOINT).get(CdcrParams.CHECKPOINT);
+    assertEquals(expected, checkpoint4);
+
     // send a delete by query
     deleteByQuery(SOURCE_COLLECTION, "*:*");
 
     // all the checkpoints must come from the DBQ
     rsp = invokeCdcrAction(shardToLeaderJetty.get(SOURCE_COLLECTION).get(SHARD2), CdcrParams.CdcrAction.COLLECTIONCHECKPOINT);
-    long checkpoint3 = (Long) rsp.get(CdcrParams.CHECKPOINT);
-    assertTrue(checkpoint3 > 0); // ensure that checkpoints from deletes are in absolute form
-    checkpoint3 = (Long) invokeCdcrAction(shardToLeaderJetty.get(SOURCE_COLLECTION).get(SHARD1), CdcrParams.CdcrAction.SHARDCHECKPOINT).get(CdcrParams.CHECKPOINT);
-    assertTrue(checkpoint3 > 0); // ensure that checkpoints from deletes are in absolute form
-    checkpoint3 = (Long) invokeCdcrAction(shardToLeaderJetty.get(SOURCE_COLLECTION).get(SHARD2), CdcrParams.CdcrAction.SHARDCHECKPOINT).get(CdcrParams.CHECKPOINT);
-    assertTrue(checkpoint3 > 0); // ensure that checkpoints from deletes are in absolute form
+    long checkpoint5= (Long) rsp.get(CdcrParams.CHECKPOINT);
+    assertTrue(checkpoint5 > 0); // ensure that checkpoints from deletes are in absolute form
+    checkpoint5 = (Long) invokeCdcrAction(shardToLeaderJetty.get(SOURCE_COLLECTION).get(SHARD1), CdcrParams.CdcrAction.SHARDCHECKPOINT).get(CdcrParams.CHECKPOINT);
+    assertTrue(checkpoint5 > 0); // ensure that checkpoints from deletes are in absolute form
+    checkpoint5 = (Long) invokeCdcrAction(shardToLeaderJetty.get(SOURCE_COLLECTION).get(SHARD2), CdcrParams.CdcrAction.SHARDCHECKPOINT).get(CdcrParams.CHECKPOINT);
+    assertTrue(checkpoint5 > 0); // ensure that checkpoints from deletes are in absolute form
+
 
     // replication never started, lastProcessedVersion should be -1 for both shards
     rsp = invokeCdcrAction(shardToLeaderJetty.get(SOURCE_COLLECTION).get(SHARD1), CdcrParams.CdcrAction.LASTPROCESSEDVERSION);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/033afbfb/solr/core/src/test/org/apache/solr/search/TestRecovery.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/TestRecovery.java b/solr/core/src/test/org/apache/solr/search/TestRecovery.java
index b6ec6b1..4b00ba2 100644
--- a/solr/core/src/test/org/apache/solr/search/TestRecovery.java
+++ b/solr/core/src/test/org/apache/solr/search/TestRecovery.java
@@ -75,6 +75,7 @@ public class TestRecovery extends SolrTestCaseJ4 {
   public static void beforeClass() throws Exception {
     savedFactory = System.getProperty("solr.DirectoryFactory");
     System.setProperty("solr.directoryFactory", "org.apache.solr.core.MockFSDirectoryFactory");
+    randomizeUpdateLogImpl();
     initCore("solrconfig-tlog.xml","schema15.xml");
     
     // validate that the schema was not changed to an unexpected state

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/033afbfb/solr/core/src/test/org/apache/solr/search/TestStressRecovery.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/TestStressRecovery.java b/solr/core/src/test/org/apache/solr/search/TestStressRecovery.java
index 933700d..aaac791 100644
--- a/solr/core/src/test/org/apache/solr/search/TestStressRecovery.java
+++ b/solr/core/src/test/org/apache/solr/search/TestStressRecovery.java
@@ -47,6 +47,7 @@ public class TestStressRecovery extends TestRTGBase {
 
   @BeforeClass
   public static void beforeClass() throws Exception {
+    randomizeUpdateLogImpl();
     initCore("solrconfig-tlog.xml","schema15.xml");
   }