You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2013/12/30 04:27:14 UTC
svn commit: r1554131 - in /lucene/dev/branches/lucene_solr_4_6: ./ solr/
solr/CHANGES.txt solr/core/
solr/core/src/java/org/apache/solr/update/PeerSync.java
solr/core/src/test/org/apache/solr/cloud/BasicDistributedZk2Test.java
Author: markrmiller
Date: Mon Dec 30 03:27:13 2013
New Revision: 1554131
URL: http://svn.apache.org/r1554131
Log:
SOLR-5588: PeerSync doesn't count all connect failures as success.
Modified:
lucene/dev/branches/lucene_solr_4_6/ (props changed)
lucene/dev/branches/lucene_solr_4_6/solr/ (props changed)
lucene/dev/branches/lucene_solr_4_6/solr/CHANGES.txt (contents, props changed)
lucene/dev/branches/lucene_solr_4_6/solr/core/ (props changed)
lucene/dev/branches/lucene_solr_4_6/solr/core/src/java/org/apache/solr/update/PeerSync.java
lucene/dev/branches/lucene_solr_4_6/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZk2Test.java
Modified: lucene/dev/branches/lucene_solr_4_6/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_6/solr/CHANGES.txt?rev=1554131&r1=1554130&r2=1554131&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_6/solr/CHANGES.txt (original)
+++ lucene/dev/branches/lucene_solr_4_6/solr/CHANGES.txt Mon Dec 30 03:27:13 2013
@@ -90,7 +90,10 @@ Bug Fixes
* SOLR-5503: Retry 'forward to leader' requests less aggressively - rather
than on IOException and status 500, ConnectException. (Mark Miller)
-
+
+* SOLR-5588: PeerSync doesn't count all connect failures as success.
+ (Mark Miller)
+
Optimizations
----------------------
Modified: lucene/dev/branches/lucene_solr_4_6/solr/core/src/java/org/apache/solr/update/PeerSync.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_6/solr/core/src/java/org/apache/solr/update/PeerSync.java?rev=1554131&r1=1554130&r2=1554131&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_6/solr/core/src/java/org/apache/solr/update/PeerSync.java (original)
+++ lucene/dev/branches/lucene_solr_4_6/solr/core/src/java/org/apache/solr/update/PeerSync.java Mon Dec 30 03:27:13 2013
@@ -291,7 +291,8 @@ public class PeerSync {
if (cantReachIsSuccess && sreq.purpose == 1 && srsp.getException() instanceof SolrServerException) {
Throwable solrException = ((SolrServerException) srsp.getException())
.getRootCause();
- if (solrException instanceof ConnectException || solrException instanceof ConnectTimeoutException
+ boolean connectTimeoutExceptionInChain = connectTimeoutExceptionInChain(srsp.getException());
+ if (connectTimeoutExceptionInChain || solrException instanceof ConnectException || solrException instanceof ConnectTimeoutException
|| solrException instanceof NoHttpResponseException || solrException instanceof SocketException) {
log.warn(msg() + " couldn't connect to " + srsp.getShardAddress() + ", counting as success");
@@ -308,6 +309,10 @@ public class PeerSync {
log.warn(msg() + " got a 404 from " + srsp.getShardAddress() + ", counting as success");
return true;
}
+
+ // TODO: we should return the above information so that when we can request a recovery through zookeeper, we do
+ // that for these nodes
+
// TODO: at least log???
// srsp.getException().printStackTrace(System.out);
@@ -323,6 +328,23 @@ public class PeerSync {
}
}
+ // sometimes the root exception is a SocketTimeoutException, but ConnectTimeoutException
+ // is in the chain
+ private boolean connectTimeoutExceptionInChain(Throwable exception) {
+ Throwable t = exception;
+ while (true) {
+ if (t instanceof ConnectTimeoutException) {
+ return true;
+ }
+ Throwable cause = t.getCause();
+ if (cause != null) {
+ t = cause;
+ } else {
+ return false;
+ }
+ }
+ }
+
private boolean handleVersions(ShardResponse srsp) {
// we retrieved the last N updates from the replica
List<Long> otherVersions = (List<Long>)srsp.getSolrResponse().getResponse().get("versions");
Modified: lucene/dev/branches/lucene_solr_4_6/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZk2Test.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_6/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZk2Test.java?rev=1554131&r1=1554130&r2=1554131&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_6/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZk2Test.java (original)
+++ lucene/dev/branches/lucene_solr_4_6/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZk2Test.java Mon Dec 30 03:27:13 2013
@@ -267,14 +267,6 @@ public class BasicDistributedZk2Test ext
// kill a shard
CloudJettyRunner deadShard = chaosMonkey.stopShard(SHARD1, 0);
-
-
- // we are careful to make sure the downed node is no longer in the state,
- // because on some systems (especially freebsd w/ blackhole enabled), trying
- // to talk to a downed node causes grief
- Set<CloudJettyRunner> jetties = new HashSet<CloudJettyRunner>();
- jetties.addAll(shardToJetty.get(SHARD1));
- jetties.remove(deadShard);
// ensure shard is dead
try {