You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by is...@apache.org on 2019/12/18 18:36:19 UTC

[lucene-solr] branch jira/solr14089 updated: Fixing SYNCSHARD and corresponding test, and also party fix RecoveryStrategy

This is an automated email from the ASF dual-hosted git repository.

ishan pushed a commit to branch jira/solr14089
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/jira/solr14089 by this push:
     new edce278  Fixing SYNCSHARD and corresponding test, and also party fix RecoveryStrategy
edce278 is described below

commit edce2785996b1258e72ae8e8cfab0a3f67a3720d
Author: Ishan Chattopadhyaya <is...@apache.org>
AuthorDate: Thu Dec 19 00:06:06 2019 +0530

    Fixing SYNCSHARD and corresponding test, and also party fix RecoveryStrategy
---
 solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java   | 8 +++++---
 solr/core/src/java/org/apache/solr/cloud/ZkShardTerms.java       | 6 +++++-
 .../java/org/apache/solr/handler/admin/CollectionsHandler.java   | 9 +++------
 solr/core/src/test/org/apache/solr/cloud/SyncSliceTest.java      | 1 +
 4 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java b/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
index 1148fa9..e25acd9 100644
--- a/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
+++ b/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
@@ -377,7 +377,7 @@ public class RecoveryStrategy implements Runnable, Closeable {
           return;
         }
 
-        log.info("Publishing state of core [{}] as recovering, leader is [{}] and I am [{}]", core.getName(), leaderUrl,
+        log.info("Publishing1 state of core [{}] as recovering, leader is [{}] and I am [{}]", core.getName(), leaderUrl,
             ourUrl);
         zkController.publish(this.coreDescriptor, Replica.State.RECOVERING);
 
@@ -582,11 +582,12 @@ public class RecoveryStrategy implements Runnable, Closeable {
         // recalling buffer updates will drop the old buffer tlog
         ulog.bufferUpdates();
 
-        log.info("Publishing state of core [{}] as recovering, leader is [{}] and I am [{}]", core.getName(),
+        log.info("Publishing2 state of core [{}] as recovering, leader is [{}] and I am [{}]", core.getName(),
             leader.getCoreUrl(),
             ourUrl);
         zkController.publish(this.coreDescriptor, Replica.State.RECOVERING);
-
+        zkController.getShardTerms(this.coreDescriptor.getCollectionName(), this.coreDescriptor.getCloudDescriptor().getShardId()).startRecovering(coreDescriptor.getCloudDescriptor().getCoreNodeName(), true);
+        
         final Slice slice = zkStateReader.getClusterState().getCollection(cloudDesc.getCollectionName())
             .getSlice(cloudDesc.getShardId());
 
@@ -694,6 +695,7 @@ public class RecoveryStrategy implements Runnable, Closeable {
               zkController.startReplicationFromLeader(coreName, true);
             }
             zkController.publish(this.coreDescriptor, Replica.State.ACTIVE);
+            zkController.getShardTerms(this.coreDescriptor.getCollectionName(), this.coreDescriptor.getCloudDescriptor().getShardId()).doneRecovering(coreDescriptor.getCloudDescriptor().getCoreNodeName());
           } catch (Exception e) {
             log.error("Could not publish as ACTIVE after succesful recovery", e);
             successfulRecovery = false;
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkShardTerms.java b/solr/core/src/java/org/apache/solr/cloud/ZkShardTerms.java
index 94af222..1a7d75c 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkShardTerms.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkShardTerms.java
@@ -224,8 +224,12 @@ public class ZkShardTerms implements AutoCloseable{
    * Mark {@code coreNodeName} as recovering
    */
   public void startRecovering(String coreNodeName) {
+    startRecovering(coreNodeName, false);
+  }
+
+  public void startRecovering(String coreNodeName, boolean forceMarkRecovery) {    
     ShardTerms newTerms;
-    while ( (newTerms = terms.startRecovering(coreNodeName)) != null) {
+    while ( (newTerms = terms.startRecovering(coreNodeName, forceMarkRecovery)) != null) {
       if (forceSaveTerms(newTerms)) break;
     }
   }
diff --git a/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java
index bb1f725..0ec8982 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java
@@ -72,7 +72,6 @@ import org.apache.solr.common.cloud.Replica.State;
 import org.apache.solr.common.cloud.Slice;
 import org.apache.solr.common.cloud.SolrZkClient;
 import org.apache.solr.common.cloud.ZkCmdExecutor;
-import org.apache.solr.common.cloud.ZkCoreNodeProps;
 import org.apache.solr.common.cloud.ZkNodeProps;
 import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.solr.common.params.AutoScalingParams;
@@ -601,18 +600,16 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
 
       ClusterState clusterState = h.coreContainer.getZkController().getClusterState();
 
-      DocCollection docCollection = clusterState.getCollection(collection);
-      ZkNodeProps leaderProps = docCollection.getLeader(shard);
-      ZkCoreNodeProps nodeProps = new ZkCoreNodeProps(leaderProps);
+      Replica leader = h.coreContainer.getZkController().getZkStateReader().getShardStateProvider(collection).getLeader(clusterState.getCollection(collection).getSlice(shard));
 
-      try (HttpSolrClient client = new Builder(nodeProps.getBaseUrl())
+      try (HttpSolrClient client = new Builder(leader.getBaseUrl())
           .withConnectionTimeout(15000)
           .withSocketTimeout(60000)
           .build()) {
         RequestSyncShard reqSyncShard = new RequestSyncShard();
         reqSyncShard.setCollection(collection);
         reqSyncShard.setShard(shard);
-        reqSyncShard.setCoreName(nodeProps.getCoreName());
+        reqSyncShard.setCoreName(leader.getCoreName());
         client.request(reqSyncShard);
       }
       return null;
diff --git a/solr/core/src/test/org/apache/solr/cloud/SyncSliceTest.java b/solr/core/src/test/org/apache/solr/cloud/SyncSliceTest.java
index 7f78516..af77b4e 100644
--- a/solr/core/src/test/org/apache/solr/cloud/SyncSliceTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/SyncSliceTest.java
@@ -96,6 +96,7 @@ public class SyncSliceTest extends AbstractFullDistribZkTestBase {
 
     // shard should be inconsistent
     String shardFailMessage = checkShardConsistency("shard1", true, false);
+    System.out.println("Good exception: "+shardFailMessage); // nocommit
     assertNotNull(shardFailMessage);
     
     ModifiableSolrParams params = new ModifiableSolrParams();