You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by da...@apache.org on 2018/01/23 08:19:45 UTC

[1/2] lucene-solr:jira/solr-11702: SOLR-11702: Renaming

Repository: lucene-solr
Updated Branches:
  refs/heads/jira/solr-11702 c6e1c40e8 -> bea9f6501


SOLR-11702: Renaming


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/7b055f4a
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/7b055f4a
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/7b055f4a

Branch: refs/heads/jira/solr-11702
Commit: 7b055f4afc201cf911ca7a7562988b950b1da0fc
Parents: c6e1c40
Author: Cao Manh Dat <da...@apache.org>
Authored: Tue Jan 23 15:18:25 2018 +0700
Committer: Cao Manh Dat <da...@apache.org>
Committed: Tue Jan 23 15:18:25 2018 +0700

----------------------------------------------------------------------
 .../org/apache/solr/cloud/ZkShardTerms.java     | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7b055f4a/solr/core/src/java/org/apache/solr/cloud/ZkShardTerms.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkShardTerms.java b/solr/core/src/java/org/apache/solr/cloud/ZkShardTerms.java
index 3dc6b41..d776bf4 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkShardTerms.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkShardTerms.java
@@ -95,11 +95,11 @@ public class ZkShardTerms implements AutoCloseable{
   /**
    * Ensure that leader's term is higher than some replica's terms
    * @param leader coreNodeName of leader
-   * @param replicasInLowerTerms set of replicas in which their terms should be lower than leader's term
+   * @param replicasNeedingRecovery set of replicas in which their terms should be lower than leader's term
    */
-  public void ensureTermsIsHigher(String leader, Set<String> replicasInLowerTerms) {
+  public void ensureTermsIsHigher(String leader, Set<String> replicasNeedingRecovery) {
     Terms newTerms;
-    while( (newTerms = terms.increaseTerms(leader, replicasInLowerTerms)) != null) {
+    while( (newTerms = terms.increaseTerms(leader, replicasNeedingRecovery)) != null) {
       if (forceSaveTerms(newTerms)) return;
     }
   }
@@ -368,12 +368,12 @@ public class ZkShardTerms implements AutoCloseable{
     }
 
     /**
-     * Return a new {@link Terms} in which term of {@code leader} is higher than {@code replicasInLowerTerms}
+     * Return a new {@link Terms} in which term of {@code leader} is higher than {@code replicasNeedingRecovery}
      * @param leader coreNodeName of leader
-     * @param replicasInLowerTerms set of replicas in which their terms should be lower than leader's term
-     * @return null if term of {@code leader} is already higher than {@code replicasInLowerTerms}
+     * @param replicasNeedingRecovery set of replicas in which their terms should be lower than leader's term
+     * @return null if term of {@code leader} is already higher than {@code replicasNeedingRecovery}
      */
-    Terms increaseTerms(String leader, Set<String> replicasInLowerTerms) {
+    Terms increaseTerms(String leader, Set<String> replicasNeedingRecovery) {
       if (!values.containsKey(leader)) {
         throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Can not find leader's term " + leader);
       }
@@ -384,9 +384,9 @@ public class ZkShardTerms implements AutoCloseable{
       HashMap<String, Long> newValues = new HashMap<>(values);
       long leaderTerm = newValues.get(leader);
       for (String replica : newValues.keySet()) {
-        if (replicasInLowerTerms.contains(replica)) foundReplicasInLowerTerms = true;
+        if (replicasNeedingRecovery.contains(replica)) foundReplicasInLowerTerms = true;
         if (Objects.equals(newValues.get(replica), leaderTerm)) {
-          if(replicasInLowerTerms.contains(replica)) {
+          if(replicasNeedingRecovery.contains(replica)) {
             changed = true;
           } else {
             newValues.put(replica, leaderTerm+1);
@@ -394,7 +394,7 @@ public class ZkShardTerms implements AutoCloseable{
         }
       }
 
-      // We should skip the optimization if there are no replicasInLowerTerms present in local terms,
+      // We should skip the optimization if there are no replicasNeedingRecovery present in local terms,
       // this may indicate that the current value is stale
       if (!changed && foundReplicasInLowerTerms) return null;
       return new Terms(newValues, version);


[2/2] lucene-solr:jira/solr-11702: SOLR-11702: Adding testing for doing recovery on restart

Posted by da...@apache.org.
SOLR-11702: Adding testing for doing recovery on restart


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/bea9f650
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/bea9f650
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/bea9f650

Branch: refs/heads/jira/solr-11702
Commit: bea9f6501366d604328f22de375011af4a45d3d9
Parents: 7b055f4
Author: Cao Manh Dat <da...@apache.org>
Authored: Tue Jan 23 15:19:16 2018 +0700
Committer: Cao Manh Dat <da...@apache.org>
Committed: Tue Jan 23 15:19:16 2018 +0700

----------------------------------------------------------------------
 .../apache/solr/cloud/HttpPartitionTest.java    | 73 +++++++++++++++++---
 1 file changed, 65 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/bea9f650/solr/core/src/test/org/apache/solr/cloud/HttpPartitionTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/HttpPartitionTest.java b/solr/core/src/test/org/apache/solr/cloud/HttpPartitionTest.java
index 401cb9b..8837ed4 100644
--- a/solr/core/src/test/org/apache/solr/cloud/HttpPartitionTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/HttpPartitionTest.java
@@ -62,6 +62,9 @@ import org.junit.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import static org.apache.solr.common.cloud.Replica.State.DOWN;
+import static org.apache.solr.common.cloud.Replica.State.RECOVERING;
+
 /**
  * Simulates HTTP partitions between a leader and replica but the replica does
  * not lose its ZooKeeper connection.
@@ -125,6 +128,8 @@ public class HttpPartitionTest extends AbstractFullDistribZkTestBase {
 
     testLeaderInitiatedRecoveryCRUD();
 
+    testDoRecoveryOnRestart();
+
     // Tests that if we set a minRf that's not satisfied, no recovery is requested, but if minRf is satisfied,
     // recovery is requested
     testMinRf();
@@ -188,10 +193,10 @@ public class HttpPartitionTest extends AbstractFullDistribZkTestBase {
       }
     };
 
-    zkController.updateLeaderInitiatedRecoveryState(testCollectionName, shardId, notLeader.getName(), Replica.State.DOWN, cd, true);
+    zkController.updateLeaderInitiatedRecoveryState(testCollectionName, shardId, notLeader.getName(), DOWN, cd, true);
     Map<String,Object> lirStateMap = zkController.getLeaderInitiatedRecoveryStateObject(testCollectionName, shardId, notLeader.getName());
     assertNotNull(lirStateMap);
-    assertSame(Replica.State.DOWN, Replica.State.getState((String) lirStateMap.get(ZkStateReader.STATE_PROP)));
+    assertSame(DOWN, Replica.State.getState((String) lirStateMap.get(ZkStateReader.STATE_PROP)));
 
     // test old non-json format handling
     SolrZkClient zkClient = zkController.getZkClient();
@@ -199,13 +204,65 @@ public class HttpPartitionTest extends AbstractFullDistribZkTestBase {
     zkClient.setData(znodePath, "down".getBytes(StandardCharsets.UTF_8), true);
     lirStateMap = zkController.getLeaderInitiatedRecoveryStateObject(testCollectionName, shardId, notLeader.getName());
     assertNotNull(lirStateMap);
-    assertSame(Replica.State.DOWN, Replica.State.getState((String) lirStateMap.get(ZkStateReader.STATE_PROP)));
+    assertSame(DOWN, Replica.State.getState((String) lirStateMap.get(ZkStateReader.STATE_PROP)));
     zkClient.delete(znodePath, -1, false);
 
     // try to clean up
     attemptCollectionDelete(cloudClient, testCollectionName);
   }
 
+  private void testDoRecoveryOnRestart() throws Exception {
+    String testCollectionName = "collDoRecoveryOnRestart";
+    try {
+      // Inject pausing in recovery op, hence the replica won't be able to finish recovery
+      System.setProperty("solr.cloud.wait-for-updates-with-stale-state-pause", String.valueOf(Integer.MAX_VALUE));
+
+      createCollection(testCollectionName, "conf1", 1, 2, 1);
+      cloudClient.setDefaultCollection(testCollectionName);
+
+      sendDoc(1, 2);
+
+      JettySolrRunner leaderJetty = getJettyOnPort(getReplicaPort(getShardLeader(testCollectionName, "shard1", 1000)));
+      List<Replica> notLeaders =
+          ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 2, maxWaitSecsToSeeAllActive);
+      assertDocsExistInAllReplicas(notLeaders, testCollectionName, 1, 1);
+
+      SocketProxy proxy0 = getProxyForReplica(notLeaders.get(0));
+      SocketProxy leaderProxy = getProxyForReplica(getShardLeader(testCollectionName, "shard1", 1000));
+
+      proxy0.close();
+      leaderProxy.close();
+
+      // indexing during a partition
+      int achievedRf = sendDoc(2, 1, leaderJetty);
+      assertEquals("Unexpected achieved replication factor", 1, achievedRf);
+      try (ZkShardTerms zkShardTerms = new ZkShardTerms(testCollectionName, "shard1", cloudClient.getZkStateReader().getZkClient())) {
+        assertFalse(zkShardTerms.canBecomeLeader(notLeaders.get(0).getName()));
+      }
+      waitForState(testCollectionName, notLeaders.get(0).getName(), DOWN, 10000);
+
+      // heal partition
+      proxy0.reopen();
+      leaderProxy.reopen();
+
+      waitForState(testCollectionName, notLeaders.get(0).getName(), RECOVERING, 10000);
+
+      System.clearProperty("solr.cloud.wait-for-updates-with-stale-state-pause");
+      JettySolrRunner notLeaderJetty = getJettyOnPort(getReplicaPort(notLeaders.get(0)));
+      notLeaderJetty.stop();
+      //DOWNNODE will bring replica into DOWN state
+      waitForState(testCollectionName, notLeaders.get(0).getName(), DOWN, 10000);
+      notLeaderJetty.start();
+      ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 2, 100);
+      assertDocsExistInAllReplicas(notLeaders, testCollectionName, 1, 2);
+    } finally {
+      System.clearProperty("solr.cloud.wait-for-updates-with-stale-state-pause");
+    }
+
+    // try to clean up
+    attemptCollectionDelete(cloudClient, testCollectionName);
+  }
+
   protected void testMinRf() throws Exception {
     // create a collection that has 1 shard and 3 replicas
     String testCollectionName = "collMinRf_1x3";
@@ -314,7 +371,7 @@ public class HttpPartitionTest extends AbstractFullDistribZkTestBase {
     // indexing during a partition
     sendDoc(2, null, leaderJetty);
     // replica should publish itself as DOWN if the network is not healed after some amount time
-    waitForDownState(testCollectionName, notLeader.getName(), 10000);
+    waitForState(testCollectionName, notLeader.getName(), DOWN, 10000);
     
     proxy.reopen();
     leaderProxy.reopen();
@@ -394,7 +451,7 @@ public class HttpPartitionTest extends AbstractFullDistribZkTestBase {
     attemptCollectionDelete(cloudClient, testCollectionName);
   }
 
-  private void waitForDownState(String collection, String replicaName, long ms) throws KeeperException, InterruptedException {
+  private void waitForState(String collection, String replicaName, Replica.State state, long ms) throws KeeperException, InterruptedException {
     TimeOut timeOut = new TimeOut(ms, TimeUnit.MILLISECONDS, TimeSource.CURRENT_TIME);
     Replica.State replicaState = Replica.State.ACTIVE;
     while (!timeOut.hasTimedOut()) {
@@ -405,10 +462,10 @@ public class HttpPartitionTest extends AbstractFullDistribZkTestBase {
       Slice slice = slices.iterator().next();
       Replica partitionedReplica = slice.getReplica(replicaName);
       replicaState = partitionedReplica.getState();
-      if (replicaState == Replica.State.DOWN) return;
+      if (replicaState == state) return;
     }
-    assertEquals("The partitioned replica did not published it self as down",
-        Replica.State.DOWN, replicaState);
+    assertEquals("Timeout waiting for state "+ state +" of replica " + replicaName + ", current state " + replicaState,
+        state, replicaState);
   }
 
   protected void testRf3() throws Exception {