You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by da...@apache.org on 2018/10/01 08:39:01 UTC
[1/2] lucene-solr:jira/http2: SOLR-12774: IndexOutOfBoundsException
on logging startupVersions
Repository: lucene-solr
Updated Branches:
refs/heads/jira/http2 183aa66ab -> 3c1d7ff4e
SOLR-12774: IndexOutOfBoundsException on logging startupVersions
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/93e2ae3f
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/93e2ae3f
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/93e2ae3f
Branch: refs/heads/jira/http2
Commit: 93e2ae3fc454f1f376a7b4389905ebeb1732af70
Parents: 3ece536
Author: Cao Manh Dat <da...@apache.org>
Authored: Mon Oct 1 15:25:30 2018 +0700
Committer: Cao Manh Dat <da...@apache.org>
Committed: Mon Oct 1 15:25:30 2018 +0700
----------------------------------------------------------------------
.../org/apache/solr/cloud/RecoveryStrategy.java | 206 ++++++++++---------
1 file changed, 105 insertions(+), 101 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/93e2ae3f/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java b/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
index aa87801..94e126e 100644
--- a/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
+++ b/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
@@ -321,135 +321,135 @@ public class RecoveryStrategy implements Runnable, Closeable {
// + core.getUpdateHandler().getUpdateLog());
// return;
// }
- while (!successfulRecovery && !Thread.currentThread().isInterrupted() && !isClosed()) { // don't use interruption or it will close channels though
- try {
- CloudDescriptor cloudDesc = core.getCoreDescriptor().getCloudDescriptor();
- ZkNodeProps leaderprops = zkStateReader.getLeaderRetry(
- cloudDesc.getCollectionName(), cloudDesc.getShardId());
- final String leaderBaseUrl = leaderprops.getStr(ZkStateReader.BASE_URL_PROP);
- final String leaderCoreName = leaderprops.getStr(ZkStateReader.CORE_NAME_PROP);
+ while (!successfulRecovery && !Thread.currentThread().isInterrupted() && !isClosed()) { // don't use interruption or it will close channels though
+ try {
+ CloudDescriptor cloudDesc = core.getCoreDescriptor().getCloudDescriptor();
+ ZkNodeProps leaderprops = zkStateReader.getLeaderRetry(
+ cloudDesc.getCollectionName(), cloudDesc.getShardId());
+ final String leaderBaseUrl = leaderprops.getStr(ZkStateReader.BASE_URL_PROP);
+ final String leaderCoreName = leaderprops.getStr(ZkStateReader.CORE_NAME_PROP);
- String leaderUrl = ZkCoreNodeProps.getCoreUrl(leaderBaseUrl, leaderCoreName);
+ String leaderUrl = ZkCoreNodeProps.getCoreUrl(leaderBaseUrl, leaderCoreName);
- String ourUrl = ZkCoreNodeProps.getCoreUrl(baseUrl, coreName);
+ String ourUrl = ZkCoreNodeProps.getCoreUrl(baseUrl, coreName);
- boolean isLeader = leaderUrl.equals(ourUrl); //TODO: We can probably delete most of this code if we say this strategy can only be used for pull replicas
- if (isLeader && !cloudDesc.isLeader()) {
- throw new SolrException(ErrorCode.SERVER_ERROR, "Cloud state still says we are leader.");
- }
- if (cloudDesc.isLeader()) {
- assert cloudDesc.getReplicaType() != Replica.Type.PULL;
- // we are now the leader - no one else must have been suitable
- log.warn("We have not yet recovered - but we are now the leader!");
- log.info("Finished recovery process.");
- zkController.publish(core.getCoreDescriptor(), Replica.State.ACTIVE);
- return;
- }
-
-
- log.info("Publishing state of core [{}] as recovering, leader is [{}] and I am [{}]", core.getName(), leaderUrl,
- ourUrl);
- zkController.publish(core.getCoreDescriptor(), Replica.State.RECOVERING);
-
- if (isClosed()) {
- log.info("Recovery for core {} has been closed", core.getName());
- break;
- }
- log.info("Starting Replication Recovery.");
+ boolean isLeader = leaderUrl.equals(ourUrl); //TODO: We can probably delete most of this code if we say this strategy can only be used for pull replicas
+ if (isLeader && !cloudDesc.isLeader()) {
+ throw new SolrException(ErrorCode.SERVER_ERROR, "Cloud state still says we are leader.");
+ }
+ if (cloudDesc.isLeader()) {
+ assert cloudDesc.getReplicaType() != Replica.Type.PULL;
+ // we are now the leader - no one else must have been suitable
+ log.warn("We have not yet recovered - but we are now the leader!");
+ log.info("Finished recovery process.");
+ zkController.publish(core.getCoreDescriptor(), Replica.State.ACTIVE);
+ return;
+ }
- try {
- log.info("Stopping background replicate from leader process");
- zkController.stopReplicationFromLeader(coreName);
- replicate(zkController.getNodeName(), core, leaderprops);
+
+ log.info("Publishing state of core [{}] as recovering, leader is [{}] and I am [{}]", core.getName(), leaderUrl,
+ ourUrl);
+ zkController.publish(core.getCoreDescriptor(), Replica.State.RECOVERING);
if (isClosed()) {
log.info("Recovery for core {} has been closed", core.getName());
break;
}
+ log.info("Starting Replication Recovery.");
- log.info("Replication Recovery was successful.");
- successfulRecovery = true;
- } catch (Exception e) {
- SolrException.log(log, "Error while trying to recover", e);
- }
-
- } catch (Exception e) {
- SolrException.log(log, "Error while trying to recover. core=" + coreName, e);
- } finally {
- if (successfulRecovery) {
- log.info("Restaring background replicate from leader process");
- zkController.startReplicationFromLeader(coreName, false);
- log.info("Registering as Active after recovery.");
try {
- zkController.publish(core.getCoreDescriptor(), Replica.State.ACTIVE);
- } catch (Exception e) {
- log.error("Could not publish as ACTIVE after succesful recovery", e);
- successfulRecovery = false;
- }
-
- if (successfulRecovery) {
- close = true;
- recoveryListener.recovered();
- }
- }
- }
+ log.info("Stopping background replicate from leader process");
+ zkController.stopReplicationFromLeader(coreName);
+ replicate(zkController.getNodeName(), core, leaderprops);
- if (!successfulRecovery) {
- // lets pause for a moment and we need to try again...
- // TODO: we don't want to retry for some problems?
- // Or do a fall off retry...
- try {
+ if (isClosed()) {
+ log.info("Recovery for core {} has been closed", core.getName());
+ break;
+ }
- if (isClosed()) {
- log.info("Recovery for core {} has been closed", core.getName());
- break;
+ log.info("Replication Recovery was successful.");
+ successfulRecovery = true;
+ } catch (Exception e) {
+ SolrException.log(log, "Error while trying to recover", e);
}
-
- log.error("Recovery failed - trying again... (" + retries + ")");
-
- retries++;
- if (retries >= maxRetries) {
- SolrException.log(log, "Recovery failed - max retries exceeded (" + retries + ").");
+
+ } catch (Exception e) {
+ SolrException.log(log, "Error while trying to recover. core=" + coreName, e);
+ } finally {
+ if (successfulRecovery) {
+ log.info("Restaring background replicate from leader process");
+ zkController.startReplicationFromLeader(coreName, false);
+ log.info("Registering as Active after recovery.");
try {
- recoveryFailed(core, zkController, baseUrl, coreZkNodeName, core.getCoreDescriptor());
+ zkController.publish(core.getCoreDescriptor(), Replica.State.ACTIVE);
} catch (Exception e) {
- SolrException.log(log, "Could not publish that recovery failed", e);
+ log.error("Could not publish as ACTIVE after succesful recovery", e);
+ successfulRecovery = false;
+ }
+
+ if (successfulRecovery) {
+ close = true;
+ recoveryListener.recovered();
}
- break;
}
- } catch (Exception e) {
- SolrException.log(log, "An error has occurred during recovery", e);
}
- try {
- // Wait an exponential interval between retries, start at 5 seconds and work up to a minute.
- // If we're at attempt >= 4, there's no point computing pow(2, retries) because the result
- // will always be the minimum of the two (12). Since we sleep at 5 seconds sub-intervals in
- // order to check if we were closed, 12 is chosen as the maximum loopCount (5s * 12 = 1m).
- int loopCount = retries < 4 ? (int) Math.min(Math.pow(2, retries), 12) : 12;
- log.info("Wait [{}] seconds before trying to recover again (attempt={})",
- TimeUnit.MILLISECONDS.toSeconds(loopCount * startingRecoveryDelayMilliSeconds), retries);
- for (int i = 0; i < loopCount; i++) {
+ if (!successfulRecovery) {
+ // lets pause for a moment and we need to try again...
+ // TODO: we don't want to retry for some problems?
+ // Or do a fall off retry...
+ try {
+
if (isClosed()) {
log.info("Recovery for core {} has been closed", core.getName());
- break; // check if someone closed us
+ break;
+ }
+
+ log.error("Recovery failed - trying again... (" + retries + ")");
+
+ retries++;
+ if (retries >= maxRetries) {
+ SolrException.log(log, "Recovery failed - max retries exceeded (" + retries + ").");
+ try {
+ recoveryFailed(core, zkController, baseUrl, coreZkNodeName, core.getCoreDescriptor());
+ } catch (Exception e) {
+ SolrException.log(log, "Could not publish that recovery failed", e);
+ }
+ break;
}
- Thread.sleep(startingRecoveryDelayMilliSeconds);
+ } catch (Exception e) {
+ SolrException.log(log, "An error has occurred during recovery", e);
+ }
+
+ try {
+ // Wait an exponential interval between retries, start at 5 seconds and work up to a minute.
+ // If we're at attempt >= 4, there's no point computing pow(2, retries) because the result
+ // will always be the minimum of the two (12). Since we sleep at 5 seconds sub-intervals in
+ // order to check if we were closed, 12 is chosen as the maximum loopCount (5s * 12 = 1m).
+ int loopCount = retries < 4 ? (int) Math.min(Math.pow(2, retries), 12) : 12;
+ log.info("Wait [{}] seconds before trying to recover again (attempt={})",
+ TimeUnit.MILLISECONDS.toSeconds(loopCount * startingRecoveryDelayMilliSeconds), retries);
+ for (int i = 0; i < loopCount; i++) {
+ if (isClosed()) {
+ log.info("Recovery for core {} has been closed", core.getName());
+ break; // check if someone closed us
+ }
+ Thread.sleep(startingRecoveryDelayMilliSeconds);
+ }
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ log.warn("Recovery was interrupted.", e);
+ close = true;
}
- } catch (InterruptedException e) {
- Thread.currentThread().interrupt();
- log.warn("Recovery was interrupted.", e);
- close = true;
}
- }
+ }
+ // We skip core.seedVersionBuckets(); We don't have a transaction log
+ log.info("Finished recovery process, successful=[{}]", Boolean.toString(successfulRecovery));
}
- // We skip core.seedVersionBuckets(); We don't have a transaction log
- log.info("Finished recovery process, successful=[{}]", Boolean.toString(successfulRecovery));
-}
// TODO: perhaps make this grab a new core each time through the loop to handle core reloads?
- final public void doSyncOrReplicateRecovery(SolrCore core) throws Exception {
+ public final void doSyncOrReplicateRecovery(SolrCore core) throws Exception {
boolean successfulRecovery = false;
UpdateLog ulog;
@@ -488,7 +488,11 @@ public class RecoveryStrategy implements Runnable, Closeable {
log.info("currentVersions size={} range=[{} to {}]", recentVersions.size(), recentVersions.get(0), recentVersions.get(recentVersions.size()-1));
}
- log.info("startupVersions size={} range=[{} to {}]", startingVersions.size(), startingVersions.get(0), startingVersions.get(startingVersions.size()-1));
+ if (startingVersions.isEmpty()) {
+ log.info("startupVersions is empty");
+ } else {
+ log.info("startupVersions size={} range=[{} to {}]", startingVersions.size(), startingVersions.get(0), startingVersions.get(startingVersions.size()-1));
+ }
} catch (Exception e) {
SolrException.log(log, "Error getting recent versions.", e);
recentVersions = new ArrayList<>(0);
[2/2] lucene-solr:jira/http2: Merge branch 'master' into jira/http2
Posted by da...@apache.org.
Merge branch 'master' into jira/http2
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/3c1d7ff4
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/3c1d7ff4
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/3c1d7ff4
Branch: refs/heads/jira/http2
Commit: 3c1d7ff4e15d5aa190555b8924865bfb1f1cbb54
Parents: 183aa66 93e2ae3
Author: Cao Manh Dat <da...@apache.org>
Authored: Mon Oct 1 15:30:43 2018 +0700
Committer: Cao Manh Dat <da...@apache.org>
Committed: Mon Oct 1 15:30:43 2018 +0700
----------------------------------------------------------------------
.../org/apache/solr/cloud/RecoveryStrategy.java | 206 ++++++++++---------
1 file changed, 105 insertions(+), 101 deletions(-)
----------------------------------------------------------------------