You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2019/10/30 20:45:16 UTC
[lucene-solr] 11/11: @W-6782433 Shared replica skip recovery (#409)
This is an automated email from the ASF dual-hosted git repository.
yonik pushed a commit to branch jira/SOLR-13101
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
commit 7c9063b6c5c05ecf196969ed6f88314e2ffe92a8
Author: Megan Carey <me...@salesforce.com>
AuthorDate: Tue Oct 29 15:09:23 2019 -0700
@W-6782433 Shared replica skip recovery (#409)
* Adding back joda-time dependency for S3
* Skip sync and recovery for SHARED replicas prior to leader election
---
.../org/apache/solr/cloud/ElectionContext.java | 157 +++++++++++----------
1 file changed, 81 insertions(+), 76 deletions(-)
diff --git a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
index ba2f3a5..d179ca0 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
@@ -362,92 +362,97 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
replicaType = core.getCoreDescriptor().getCloudDescriptor().getReplicaType();
coreNodeName = core.getCoreDescriptor().getCloudDescriptor().getCoreNodeName();
- // should I be leader?
- ZkShardTerms zkShardTerms = zkController.getShardTerms(collection, shardId);
- if (zkShardTerms.registered(coreNodeName) && !zkShardTerms.canBecomeLeader(coreNodeName)) {
- if (!waitForEligibleBecomeLeaderAfterTimeout(zkShardTerms, coreNodeName, leaderVoteWait)) {
- rejoinLeaderElection(core);
- return;
- } else {
- // only log an error if this replica win the election
- setTermToMax = true;
- }
- }
-
- if (isClosed) {
- return;
- }
-
- log.info("I may be the new leader - try and sync");
-
- // we are going to attempt to be the leader
- // first cancel any current recovery
- core.getUpdateHandler().getSolrCoreState().cancelRecovery();
-
- if (weAreReplacement) {
- // wait a moment for any floating updates to finish
- try {
- Thread.sleep(2500);
- } catch (InterruptedException e) {
- Thread.currentThread().interrupt();
- throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, e);
- }
- }
-
- PeerSync.PeerSyncResult result = null;
- boolean success = false;
- try {
- result = syncStrategy.sync(zkController, core, leaderProps, weAreReplacement);
- success = result.isSuccess();
- } catch (Exception e) {
- SolrException.log(log, "Exception while trying to sync", e);
- result = PeerSync.PeerSyncResult.failure();
- }
-
- UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
- if (!success) {
- boolean hasRecentUpdates = false;
- if (ulog != null) {
- // TODO: we could optimize this if necessary
- try (UpdateLog.RecentUpdates recentUpdates = ulog.getRecentUpdates()) {
- hasRecentUpdates = !recentUpdates.getVersions(1).isEmpty();
+ // if SHARED replica, skip sync and recovery stages. a SHARED replica that is not up-to-date can
+ // still become leader; it will sync the latest from blobstore with the next request.
+ if (replicaType != Replica.Type.SHARED) {
+ // should I be leader?
+ ZkShardTerms zkShardTerms = zkController.getShardTerms(collection, shardId);
+ if (zkShardTerms.registered(coreNodeName) && !zkShardTerms.canBecomeLeader(coreNodeName)) {
+ if (!waitForEligibleBecomeLeaderAfterTimeout(zkShardTerms, coreNodeName, leaderVoteWait)) {
+ rejoinLeaderElection(core);
+ return;
+ } else {
+ // only log an error if this replica win the election
+ setTermToMax = true;
}
}
+
+ if (isClosed) {
+ return;
+ }
- if (!hasRecentUpdates) {
- // we failed sync, but we have no versions - we can't sync in that case
- // - we were active
- // before, so become leader anyway if no one else has any versions either
- if (result.getOtherHasVersions().orElse(false)) {
- log.info("We failed sync, but we have no versions - we can't sync in that case. But others have some versions, so we should not become leader");
- success = false;
- } else {
- log.info(
- "We failed sync, but we have no versions - we can't sync in that case - we were active before, so become leader anyway");
- success = true;
+ log.info("I may be the new leader - try and sync");
+
+ // we are going to attempt to be the leader
+ // first cancel any current recovery
+ core.getUpdateHandler().getSolrCoreState().cancelRecovery();
+
+ if (weAreReplacement) {
+ // wait a moment for any floating updates to finish
+ try {
+ Thread.sleep(2500);
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, e);
}
}
- }
-
- // solrcloud_debug
- if (log.isDebugEnabled()) {
+
+ PeerSync.PeerSyncResult result = null;
+ boolean success = false;
try {
- RefCounted<SolrIndexSearcher> searchHolder = core.getNewestSearcher(false);
- SolrIndexSearcher searcher = searchHolder.get();
+ result = syncStrategy.sync(zkController, core, leaderProps, weAreReplacement);
+ success = result.isSuccess();
+ } catch (Exception e) {
+ SolrException.log(log, "Exception while trying to sync", e);
+ result = PeerSync.PeerSyncResult.failure();
+ }
+
+ UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
+
+ if (!success) {
+ boolean hasRecentUpdates = false;
+ if (ulog != null) {
+ // TODO: we could optimize this if necessary
+ try (UpdateLog.RecentUpdates recentUpdates = ulog.getRecentUpdates()) {
+ hasRecentUpdates = !recentUpdates.getVersions(1).isEmpty();
+ }
+ }
+
+ if (!hasRecentUpdates) {
+ // we failed sync, but we have no versions - we can't sync in that case
+ // - we were active
+ // before, so become leader anyway if no one else has any versions either
+ if (result.getOtherHasVersions().orElse(false)) {
+ log.info("We failed sync, but we have no versions - we can't sync in that case. But others have some versions, so we should not become leader");
+ success = false;
+ } else {
+ log.info(
+ "We failed sync, but we have no versions - we can't sync in that case - we were active before, so become leader anyway");
+ success = true;
+ }
+ }
+ }
+
+ // solrcloud_debug
+ if (log.isDebugEnabled()) {
try {
- log.debug(core.getCoreContainer().getZkController().getNodeName() + " synched "
- + searcher.count(new MatchAllDocsQuery()));
- } finally {
- searchHolder.decref();
+ RefCounted<SolrIndexSearcher> searchHolder = core.getNewestSearcher(false);
+ SolrIndexSearcher searcher = searchHolder.get();
+ try {
+ log.debug(core.getCoreContainer().getZkController().getNodeName() + " synched "
+ + searcher.count(new MatchAllDocsQuery()));
+ } finally {
+ searchHolder.decref();
+ }
+ } catch (Exception e) {
+ log.error("Error in solrcloud_debug block", e);
}
- } catch (Exception e) {
- log.error("Error in solrcloud_debug block", e);
}
- }
- if (!success) {
- rejoinLeaderElection(core);
- return;
+ if (!success) {
+ rejoinLeaderElection(core);
+ return;
+ }
}
}