You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by da...@apache.org on 2018/01/23 08:19:45 UTC
[1/2] lucene-solr:jira/solr-11702: SOLR-11702: Renaming
Repository: lucene-solr
Updated Branches:
refs/heads/jira/solr-11702 c6e1c40e8 -> bea9f6501
SOLR-11702: Renaming
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/7b055f4a
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/7b055f4a
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/7b055f4a
Branch: refs/heads/jira/solr-11702
Commit: 7b055f4afc201cf911ca7a7562988b950b1da0fc
Parents: c6e1c40
Author: Cao Manh Dat <da...@apache.org>
Authored: Tue Jan 23 15:18:25 2018 +0700
Committer: Cao Manh Dat <da...@apache.org>
Committed: Tue Jan 23 15:18:25 2018 +0700
----------------------------------------------------------------------
.../org/apache/solr/cloud/ZkShardTerms.java | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7b055f4a/solr/core/src/java/org/apache/solr/cloud/ZkShardTerms.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkShardTerms.java b/solr/core/src/java/org/apache/solr/cloud/ZkShardTerms.java
index 3dc6b41..d776bf4 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkShardTerms.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkShardTerms.java
@@ -95,11 +95,11 @@ public class ZkShardTerms implements AutoCloseable{
/**
* Ensure that leader's term is higher than some replica's terms
* @param leader coreNodeName of leader
- * @param replicasInLowerTerms set of replicas in which their terms should be lower than leader's term
+ * @param replicasNeedingRecovery set of replicas in which their terms should be lower than leader's term
*/
- public void ensureTermsIsHigher(String leader, Set<String> replicasInLowerTerms) {
+ public void ensureTermsIsHigher(String leader, Set<String> replicasNeedingRecovery) {
Terms newTerms;
- while( (newTerms = terms.increaseTerms(leader, replicasInLowerTerms)) != null) {
+ while( (newTerms = terms.increaseTerms(leader, replicasNeedingRecovery)) != null) {
if (forceSaveTerms(newTerms)) return;
}
}
@@ -368,12 +368,12 @@ public class ZkShardTerms implements AutoCloseable{
}
/**
- * Return a new {@link Terms} in which term of {@code leader} is higher than {@code replicasInLowerTerms}
+ * Return a new {@link Terms} in which term of {@code leader} is higher than {@code replicasNeedingRecovery}
* @param leader coreNodeName of leader
- * @param replicasInLowerTerms set of replicas in which their terms should be lower than leader's term
- * @return null if term of {@code leader} is already higher than {@code replicasInLowerTerms}
+ * @param replicasNeedingRecovery set of replicas in which their terms should be lower than leader's term
+ * @return null if term of {@code leader} is already higher than {@code replicasNeedingRecovery}
*/
- Terms increaseTerms(String leader, Set<String> replicasInLowerTerms) {
+ Terms increaseTerms(String leader, Set<String> replicasNeedingRecovery) {
if (!values.containsKey(leader)) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Can not find leader's term " + leader);
}
@@ -384,9 +384,9 @@ public class ZkShardTerms implements AutoCloseable{
HashMap<String, Long> newValues = new HashMap<>(values);
long leaderTerm = newValues.get(leader);
for (String replica : newValues.keySet()) {
- if (replicasInLowerTerms.contains(replica)) foundReplicasInLowerTerms = true;
+ if (replicasNeedingRecovery.contains(replica)) foundReplicasInLowerTerms = true;
if (Objects.equals(newValues.get(replica), leaderTerm)) {
- if(replicasInLowerTerms.contains(replica)) {
+ if(replicasNeedingRecovery.contains(replica)) {
changed = true;
} else {
newValues.put(replica, leaderTerm+1);
@@ -394,7 +394,7 @@ public class ZkShardTerms implements AutoCloseable{
}
}
- // We should skip the optimization if there are no replicasInLowerTerms present in local terms,
+ // We should skip the optimization if there are no replicasNeedingRecovery present in local terms,
// this may indicate that the current value is stale
if (!changed && foundReplicasInLowerTerms) return null;
return new Terms(newValues, version);
[2/2] lucene-solr:jira/solr-11702: SOLR-11702: Adding testing for
doing recovery on restart
Posted by da...@apache.org.
SOLR-11702: Adding testing for doing recovery on restart
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/bea9f650
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/bea9f650
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/bea9f650
Branch: refs/heads/jira/solr-11702
Commit: bea9f6501366d604328f22de375011af4a45d3d9
Parents: 7b055f4
Author: Cao Manh Dat <da...@apache.org>
Authored: Tue Jan 23 15:19:16 2018 +0700
Committer: Cao Manh Dat <da...@apache.org>
Committed: Tue Jan 23 15:19:16 2018 +0700
----------------------------------------------------------------------
.../apache/solr/cloud/HttpPartitionTest.java | 73 +++++++++++++++++---
1 file changed, 65 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/bea9f650/solr/core/src/test/org/apache/solr/cloud/HttpPartitionTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/HttpPartitionTest.java b/solr/core/src/test/org/apache/solr/cloud/HttpPartitionTest.java
index 401cb9b..8837ed4 100644
--- a/solr/core/src/test/org/apache/solr/cloud/HttpPartitionTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/HttpPartitionTest.java
@@ -62,6 +62,9 @@ import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import static org.apache.solr.common.cloud.Replica.State.DOWN;
+import static org.apache.solr.common.cloud.Replica.State.RECOVERING;
+
/**
* Simulates HTTP partitions between a leader and replica but the replica does
* not lose its ZooKeeper connection.
@@ -125,6 +128,8 @@ public class HttpPartitionTest extends AbstractFullDistribZkTestBase {
testLeaderInitiatedRecoveryCRUD();
+ testDoRecoveryOnRestart();
+
// Tests that if we set a minRf that's not satisfied, no recovery is requested, but if minRf is satisfied,
// recovery is requested
testMinRf();
@@ -188,10 +193,10 @@ public class HttpPartitionTest extends AbstractFullDistribZkTestBase {
}
};
- zkController.updateLeaderInitiatedRecoveryState(testCollectionName, shardId, notLeader.getName(), Replica.State.DOWN, cd, true);
+ zkController.updateLeaderInitiatedRecoveryState(testCollectionName, shardId, notLeader.getName(), DOWN, cd, true);
Map<String,Object> lirStateMap = zkController.getLeaderInitiatedRecoveryStateObject(testCollectionName, shardId, notLeader.getName());
assertNotNull(lirStateMap);
- assertSame(Replica.State.DOWN, Replica.State.getState((String) lirStateMap.get(ZkStateReader.STATE_PROP)));
+ assertSame(DOWN, Replica.State.getState((String) lirStateMap.get(ZkStateReader.STATE_PROP)));
// test old non-json format handling
SolrZkClient zkClient = zkController.getZkClient();
@@ -199,13 +204,65 @@ public class HttpPartitionTest extends AbstractFullDistribZkTestBase {
zkClient.setData(znodePath, "down".getBytes(StandardCharsets.UTF_8), true);
lirStateMap = zkController.getLeaderInitiatedRecoveryStateObject(testCollectionName, shardId, notLeader.getName());
assertNotNull(lirStateMap);
- assertSame(Replica.State.DOWN, Replica.State.getState((String) lirStateMap.get(ZkStateReader.STATE_PROP)));
+ assertSame(DOWN, Replica.State.getState((String) lirStateMap.get(ZkStateReader.STATE_PROP)));
zkClient.delete(znodePath, -1, false);
// try to clean up
attemptCollectionDelete(cloudClient, testCollectionName);
}
+ private void testDoRecoveryOnRestart() throws Exception {
+ String testCollectionName = "collDoRecoveryOnRestart";
+ try {
+ // Inject pausing in recovery op, hence the replica won't be able to finish recovery
+ System.setProperty("solr.cloud.wait-for-updates-with-stale-state-pause", String.valueOf(Integer.MAX_VALUE));
+
+ createCollection(testCollectionName, "conf1", 1, 2, 1);
+ cloudClient.setDefaultCollection(testCollectionName);
+
+ sendDoc(1, 2);
+
+ JettySolrRunner leaderJetty = getJettyOnPort(getReplicaPort(getShardLeader(testCollectionName, "shard1", 1000)));
+ List<Replica> notLeaders =
+ ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 2, maxWaitSecsToSeeAllActive);
+ assertDocsExistInAllReplicas(notLeaders, testCollectionName, 1, 1);
+
+ SocketProxy proxy0 = getProxyForReplica(notLeaders.get(0));
+ SocketProxy leaderProxy = getProxyForReplica(getShardLeader(testCollectionName, "shard1", 1000));
+
+ proxy0.close();
+ leaderProxy.close();
+
+ // indexing during a partition
+ int achievedRf = sendDoc(2, 1, leaderJetty);
+ assertEquals("Unexpected achieved replication factor", 1, achievedRf);
+ try (ZkShardTerms zkShardTerms = new ZkShardTerms(testCollectionName, "shard1", cloudClient.getZkStateReader().getZkClient())) {
+ assertFalse(zkShardTerms.canBecomeLeader(notLeaders.get(0).getName()));
+ }
+ waitForState(testCollectionName, notLeaders.get(0).getName(), DOWN, 10000);
+
+ // heal partition
+ proxy0.reopen();
+ leaderProxy.reopen();
+
+ waitForState(testCollectionName, notLeaders.get(0).getName(), RECOVERING, 10000);
+
+ System.clearProperty("solr.cloud.wait-for-updates-with-stale-state-pause");
+ JettySolrRunner notLeaderJetty = getJettyOnPort(getReplicaPort(notLeaders.get(0)));
+ notLeaderJetty.stop();
+ //DOWNNODE will bring replica into DOWN state
+ waitForState(testCollectionName, notLeaders.get(0).getName(), DOWN, 10000);
+ notLeaderJetty.start();
+ ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 2, 100);
+ assertDocsExistInAllReplicas(notLeaders, testCollectionName, 1, 2);
+ } finally {
+ System.clearProperty("solr.cloud.wait-for-updates-with-stale-state-pause");
+ }
+
+ // try to clean up
+ attemptCollectionDelete(cloudClient, testCollectionName);
+ }
+
protected void testMinRf() throws Exception {
// create a collection that has 1 shard and 3 replicas
String testCollectionName = "collMinRf_1x3";
@@ -314,7 +371,7 @@ public class HttpPartitionTest extends AbstractFullDistribZkTestBase {
// indexing during a partition
sendDoc(2, null, leaderJetty);
// replica should publish itself as DOWN if the network is not healed after some amount time
- waitForDownState(testCollectionName, notLeader.getName(), 10000);
+ waitForState(testCollectionName, notLeader.getName(), DOWN, 10000);
proxy.reopen();
leaderProxy.reopen();
@@ -394,7 +451,7 @@ public class HttpPartitionTest extends AbstractFullDistribZkTestBase {
attemptCollectionDelete(cloudClient, testCollectionName);
}
- private void waitForDownState(String collection, String replicaName, long ms) throws KeeperException, InterruptedException {
+ private void waitForState(String collection, String replicaName, Replica.State state, long ms) throws KeeperException, InterruptedException {
TimeOut timeOut = new TimeOut(ms, TimeUnit.MILLISECONDS, TimeSource.CURRENT_TIME);
Replica.State replicaState = Replica.State.ACTIVE;
while (!timeOut.hasTimedOut()) {
@@ -405,10 +462,10 @@ public class HttpPartitionTest extends AbstractFullDistribZkTestBase {
Slice slice = slices.iterator().next();
Replica partitionedReplica = slice.getReplica(replicaName);
replicaState = partitionedReplica.getState();
- if (replicaState == Replica.State.DOWN) return;
+ if (replicaState == state) return;
}
- assertEquals("The partitioned replica did not published it self as down",
- Replica.State.DOWN, replicaState);
+ assertEquals("Timeout waiting for state "+ state +" of replica " + replicaName + ", current state " + replicaState,
+ state, replicaState);
}
protected void testRf3() throws Exception {