You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by an...@apache.org on 2015/09/15 22:39:14 UTC
svn commit: r1703289 - in /lucene/dev/trunk/solr: CHANGES.txt
core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
core/src/test/org/apache/solr/cloud/HttpPartitionTest.java
Author: anshum
Date: Tue Sep 15 20:39:14 2015
New Revision: 1703289
URL: http://svn.apache.org/r1703289
Log:
SOLR-8034: Leader no longer puts replicas in recovery in case of a failed update, when minRF isn't achieved.
Modified:
lucene/dev/trunk/solr/CHANGES.txt
lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/HttpPartitionTest.java
Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1703289&r1=1703288&r2=1703289&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Tue Sep 15 20:39:14 2015
@@ -264,6 +264,9 @@ Other Changes
* SOLR-7999: SolrRequestParser tests no longer depend on external URLs
that may fail to work. (Uwe Schindler)
+* SOLR-8034: Leader no longer puts replicas in recovery in case of a failed update, when minRF
+ isn't achieved. (Jessica Cheng, Timothy Potter, Anshum Gupta)
+
================== 5.3.1 ==================
Bug Fixes
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java?rev=1703289&r1=1703288&r2=1703289&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java Tue Sep 15 20:39:14 2015
@@ -870,6 +870,11 @@ public class DistributedUpdateProcessor
}
}
+ // If the client specified minRf and we didn't achieve the minRf, don't send recovery and let client retry
+ if (replicationTracker != null && replicationTracker.getAchievedRf() < replicationTracker.minRf) {
+ continue;
+ }
+
if (cloudDesc.getCoreNodeName().equals(leaderCoreNodeName) && foundErrorNodeInReplicaList) {
try {
// if false, then the node is probably not "live" anymore
Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/HttpPartitionTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/HttpPartitionTest.java?rev=1703289&r1=1703288&r2=1703289&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/HttpPartitionTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/HttpPartitionTest.java Tue Sep 15 20:39:14 2015
@@ -25,6 +25,7 @@ import org.apache.solr.client.solrj.embe
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.request.QueryRequest;
+import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.response.CollectionAdminResponse;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
@@ -38,7 +39,6 @@ import org.apache.solr.common.util.Named
import org.apache.solr.core.CoreContainer;
import org.apache.solr.core.SolrCore;
import org.apache.solr.servlet.SolrDispatchFilter;
-import org.apache.solr.update.UpdateHandler;
import org.apache.solr.update.UpdateLog;
import org.apache.solr.util.RTimer;
import org.junit.Test;
@@ -50,7 +50,6 @@ import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
-import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
@@ -99,6 +98,12 @@ public class HttpPartitionTest extends A
testLeaderInitiatedRecoveryCRUD();
+ // Tests that if we set a minRf that's not satisfied, no recovery is requested, but if minRf is satisfied,
+ // recovery is requested
+ testMinRf();
+
+ waitForThingsToLevelOut(30000);
+
// test a 1x2 collection
testRf2();
@@ -164,6 +169,90 @@ public class HttpPartitionTest extends A
}
}
+ protected void testMinRf() throws Exception {
+ // create a collection that has 1 shard and 3 replicas
+ String testCollectionName = "collMinRf_1x3";
+ createCollection(testCollectionName, 1, 3, 1);
+ cloudClient.setDefaultCollection(testCollectionName);
+
+ sendDoc(1, 2);
+
+ List<Replica> notLeaders =
+ ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 3, maxWaitSecsToSeeAllActive);
+ assertTrue("Expected 2 non-leader replicas for collection " + testCollectionName
+ + " but found " + notLeaders.size() + "; clusterState: "
+ + printClusterStateInfo(testCollectionName),
+ notLeaders.size() == 2);
+
+ assertDocsExistInAllReplicas(notLeaders, testCollectionName, 1, 1);
+
+ // Now introduce a network partition between the leader and 1 replica, so a minRf of 2 is still achieved
+ SocketProxy proxy0 = getProxyForReplica(notLeaders.get(0));
+
+ proxy0.close();
+
+ // indexing during a partition
+ int achievedRf = sendDoc(2, 2);
+ assertEquals("Unexpected achieved replication factor", 2, achievedRf);
+
+ Thread.sleep(sleepMsBeforeHealPartition);
+
+ // Verify that the partitioned replica is DOWN
+ ZkStateReader zkr = cloudClient.getZkStateReader();
+ zkr.updateClusterState(); // force the state to be fresh
+ ClusterState cs = zkr.getClusterState();
+ Collection<Slice> slices = cs.getActiveSlices(testCollectionName);
+ Slice slice = slices.iterator().next();
+ Replica partitionedReplica = slice.getReplica(notLeaders.get(0).getName());
+ assertEquals("The partitioned replica did not get marked down",
+ Replica.State.DOWN.toString(), partitionedReplica.getStr(ZkStateReader.STATE_PROP));
+
+ proxy0.reopen();
+
+ notLeaders =
+ ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 3, maxWaitSecsToSeeAllActive);
+
+ // Since minRf is achieved, we expect recovery, so we expect seeing 2 documents
+ assertDocsExistInAllReplicas(notLeaders, testCollectionName, 1, 2);
+
+ // Now introduce a network partition between the leader and both of its replicas, so a minRf of 2 is NOT achieved
+ proxy0 = getProxyForReplica(notLeaders.get(0));
+ proxy0.close();
+ SocketProxy proxy1 = getProxyForReplica(notLeaders.get(1));
+ proxy1.close();
+
+ achievedRf = sendDoc(3, 2);
+ assertEquals("Unexpected achieved replication factor", 1, achievedRf);
+
+ Thread.sleep(sleepMsBeforeHealPartition);
+
+ // Verify that the partitioned replicas are NOT DOWN since minRf wasn't achieved
+ ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 3, 1);
+
+ proxy0.reopen();
+ proxy1.reopen();
+
+ notLeaders =
+ ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 3, maxWaitSecsToSeeAllActive);
+
+ // Check that doc 3 is on the leader but not on the notLeaders
+ Replica leader = cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, "shard1", 10000);
+ HttpSolrClient leaderSolr = getHttpSolrClient(leader, testCollectionName);
+ assertDocExists(leaderSolr, testCollectionName, "3");
+
+ for (Replica notLeader : notLeaders) {
+ HttpSolrClient notLeaderSolr = getHttpSolrClient(notLeader, testCollectionName);
+ assertDocNotExists(notLeaderSolr, testCollectionName, "3");
+ }
+
+ // Retry sending doc 3
+ achievedRf = sendDoc(3, 2);
+ assertEquals("Unexpected achieved replication factor", 3, achievedRf);
+
+ // Now doc 3 should be on all replicas
+ assertDocsExistInAllReplicas(notLeaders, testCollectionName, 1, 3);
+ }
+
protected void testRf2() throws Exception {
// create a collection that has 1 shard but 2 replicas
String testCollectionName = "c8n_1x2";
@@ -480,19 +569,29 @@ public class HttpPartitionTest extends A
}
}
}
-
+
protected HttpSolrClient getHttpSolrClient(Replica replica, String coll) throws Exception {
ZkCoreNodeProps zkProps = new ZkCoreNodeProps(replica);
String url = zkProps.getBaseUrl() + "/" + coll;
return new HttpSolrClient(url);
}
- protected void sendDoc(int docId) throws Exception {
+ protected int sendDoc(int docId) throws Exception {
+ return sendDoc(docId, null);
+ }
+
+ protected int sendDoc(int docId, Integer minRf) throws Exception {
SolrInputDocument doc = new SolrInputDocument();
doc.addField(id, String.valueOf(docId));
doc.addField("a_t", "hello" + docId);
- sendDocsWithRetry(Collections.singletonList(doc), 2, 3, 100);
+ UpdateRequest up = new UpdateRequest();
+ if (minRf != null) {
+ up.setParam(UpdateRequest.MIN_REPFACT, String.valueOf(minRf));
+ }
+ up.add(doc);
+
+ return cloudClient.getMinAchievedReplicationFactor(cloudClient.getDefaultCollection(), cloudClient.request(up));
}
/**
@@ -501,13 +600,24 @@ public class HttpPartitionTest extends A
*/
@SuppressWarnings("rawtypes")
protected void assertDocExists(HttpSolrClient solr, String coll, String docId) throws Exception {
- QueryRequest qr = new QueryRequest(params("qt", "/get", "id", docId, "distrib", "false"));
- NamedList rsp = solr.request(qr);
+ NamedList rsp = realTimeGetDocId(solr, docId);
String match = JSONTestUtil.matchObj("/id", rsp.get("doc"), new Integer(docId));
assertTrue("Doc with id=" + docId + " not found in " + solr.getBaseURL()
+ " due to: " + match + "; rsp="+rsp, match == null);
}
+ protected void assertDocNotExists(HttpSolrClient solr, String coll, String docId) throws Exception {
+ NamedList rsp = realTimeGetDocId(solr, docId);
+ String match = JSONTestUtil.matchObj("/id", rsp.get("doc"), new Integer(docId));
+ assertTrue("Doc with id=" + docId + " is found in " + solr.getBaseURL()
+ + " due to: " + match + "; rsp="+rsp, match != null);
+ }
+
+ private NamedList realTimeGetDocId(HttpSolrClient solr, String docId) throws SolrServerException, IOException {
+ QueryRequest qr = new QueryRequest(params("qt", "/get", "id", docId, "distrib", "false"));
+ return solr.request(qr);
+ }
+
protected int getReplicaPort(Replica replica) {
String replicaNode = replica.getNodeName();
String tmp = replicaNode.substring(replicaNode.indexOf(':')+1);