You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2015/01/30 17:56:24 UTC

svn commit: r1656056 - in /lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud: HttpPartitionTest.java ReplicationFactorTest.java

Author: markrmiller
Date: Fri Jan 30 16:56:24 2015
New Revision: 1656056

URL: http://svn.apache.org/r1656056
Log:
SOLR-6944: ReplicationFactorTest and HttpPartitionTest both fail with org.apache.http.NoHttpResponseException: The target server failed to respond

Modified:
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/HttpPartitionTest.java
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/ReplicationFactorTest.java

Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/HttpPartitionTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/HttpPartitionTest.java?rev=1656056&r1=1656055&r2=1656056&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/HttpPartitionTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/HttpPartitionTest.java Fri Jan 30 16:56:24 2015
@@ -17,14 +17,17 @@ package org.apache.solr.cloud;
  * limitations under the License.
  */
 
+import org.apache.http.NoHttpResponseException;
 import org.apache.lucene.util.LuceneTestCase.Slow;
 import org.apache.solr.JSONTestUtil;
 import org.apache.solr.SolrTestCaseJ4.SuppressSSL;
+import org.apache.solr.client.solrj.SolrServerException;
 import org.apache.solr.client.solrj.embedded.JettySolrRunner;
 import org.apache.solr.client.solrj.impl.HttpSolrClient;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
 import org.apache.solr.client.solrj.request.QueryRequest;
 import org.apache.solr.client.solrj.request.UpdateRequest;
+import org.apache.solr.client.solrj.response.CollectionAdminResponse;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.cloud.ClusterState;
@@ -41,6 +44,7 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.File;
+import java.io.IOException;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Collection;
@@ -123,7 +127,7 @@ public class HttpPartitionTest extends A
   protected void testLeaderInitiatedRecoveryCRUD() throws Exception {
     String testCollectionName = "c8n_crud_1x2";
     String shardId = "shard1";
-    createCollection(testCollectionName, 1, 2, 1);
+    createCollectionRetry(testCollectionName, 1, 2, 1);
     cloudClient.setDefaultCollection(testCollectionName);
 
     Replica leader =
@@ -172,7 +176,7 @@ public class HttpPartitionTest extends A
   protected void testRf2() throws Exception {
     // create a collection that has 1 shard but 2 replicas
     String testCollectionName = "c8n_1x2";
-    createCollection(testCollectionName, 1, 2, 1);
+    createCollectionRetry(testCollectionName, 1, 2, 1);
     cloudClient.setDefaultCollection(testCollectionName);
     
     sendDoc(1);
@@ -253,11 +257,12 @@ public class HttpPartitionTest extends A
   protected void testRf3() throws Exception {
     // create a collection that has 1 shard but 2 replicas
     String testCollectionName = "c8n_1x3";
-    createCollection(testCollectionName, 1, 3, 1);
+    createCollectionRetry(testCollectionName, 1, 3, 1);
+    
     cloudClient.setDefaultCollection(testCollectionName);
     
     sendDoc(1);
-    
+
     List<Replica> notLeaders = 
         ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 3, maxWaitSecsToSeeAllActive);
     assertTrue("Expected 2 replicas for collection " + testCollectionName
@@ -306,11 +311,27 @@ public class HttpPartitionTest extends A
     }
   }
 
+  private void createCollectionRetry(String testCollectionName, int numShards, int replicationFactor, int maxShardsPerNode)
+      throws SolrServerException, IOException {
+    CollectionAdminResponse resp = createCollection(testCollectionName, numShards, replicationFactor, maxShardsPerNode);
+    if (resp.getResponse().get("failure") != null) {
+      CollectionAdminRequest.Delete req = new CollectionAdminRequest.Delete();
+      req.setCollectionName(testCollectionName);
+      req.process(cloudClient);
+      
+      resp = createCollection(testCollectionName, numShards, replicationFactor, maxShardsPerNode);
+      
+      if (resp.getResponse().get("failure") != null) {
+        fail("Could not create " + testCollectionName);
+      }
+    }
+  }
+
   // test inspired by SOLR-6511
   protected void testLeaderZkSessionLoss() throws Exception {
 
     String testCollectionName = "c8n_1x2_leader_session_loss";
-    createCollection(testCollectionName, 1, 2, 1);
+    createCollectionRetry(testCollectionName, 1, 2, 1);
     cloudClient.setDefaultCollection(testCollectionName);
 
     sendDoc(1);
@@ -329,7 +350,7 @@ public class HttpPartitionTest extends A
         testCollectionName+"; clusterState: "+printClusterStateInfo(testCollectionName), leader);
     JettySolrRunner leaderJetty = getJettyOnPort(getReplicaPort(leader));
 
-    HttpSolrClient leaderSolr = getHttpSolrClient(leader, testCollectionName);
+
     SolrInputDocument doc = new SolrInputDocument();
     doc.addField(id, String.valueOf(2));
     doc.addField("a_t", "hello" + 2);
@@ -360,7 +381,8 @@ public class HttpPartitionTest extends A
     // TODO: This test logic seems to be timing dependent and fails on Jenkins
     // need to come up with a better approach
     log.info("Sending doc 2 to old leader "+leader.getName());
-    try {
+    try ( HttpSolrClient leaderSolr = getHttpSolrClient(leader, testCollectionName)) {
+    
       leaderSolr.add(doc);
       leaderSolr.close();
 
@@ -374,7 +396,7 @@ public class HttpPartitionTest extends A
       try (HttpSolrClient client = getHttpSolrClient(currentLeader, testCollectionName)) {
         client.add(doc); // this should work
       }
-    }
+    } 
 
     List<Replica> participatingReplicas = getActiveOrRecoveringReplicas(testCollectionName, "shard1");
     Set<String> replicasToCheck = new HashSet<>();
@@ -452,17 +474,37 @@ public class HttpPartitionTest extends A
     return new HttpSolrClient(url);
   }
   
-  protected void sendDoc(int docId) throws Exception {
+  protected void doSendDoc(int docid) throws Exception {
     UpdateRequest up = new UpdateRequest();
     up.setParam(UpdateRequest.MIN_REPFACT, String.valueOf(2));
     SolrInputDocument doc = new SolrInputDocument();
-    doc.addField(id, String.valueOf(docId));
-    doc.addField("a_t", "hello" + docId);
+    doc.addField(id, String.valueOf(docid));
+    doc.addField("a_t", "hello" + docid);
     up.add(doc);
     int minAchievedRf =
         cloudClient.getMinAchievedReplicationFactor(cloudClient.getDefaultCollection(), cloudClient.request(up));
   }
   
+  protected void sendDoc(int docId) throws Exception {
+    try {
+      doSendDoc(docId);
+    } catch (SolrServerException e) {
+      if (e.getRootCause() instanceof NoHttpResponseException) {
+        // we don't know if the doc was accepted or not, we send again
+        Thread.sleep(100);
+        try {
+          doSendDoc(docId);
+        } catch (SolrServerException e2) {
+          if (e2.getRootCause() instanceof NoHttpResponseException) {
+            // we don't know if the doc was accepted or not, we send again
+            Thread.sleep(3000);
+            doSendDoc(docId);
+          }
+        }
+      }
+    }
+  }
+   
   /**
    * Query the real-time get handler for a specific doc by ID to verify it
    * exists in the provided server, using distrib=false so it doesn't route to another replica.

Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/ReplicationFactorTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/ReplicationFactorTest.java?rev=1656056&r1=1656055&r2=1656056&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/ReplicationFactorTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/ReplicationFactorTest.java Fri Jan 30 16:56:24 2015
@@ -17,11 +17,19 @@ package org.apache.solr.cloud;
  * limitations under the License.
  */
 
+import java.io.File;
+import java.net.ServerSocket;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Locale;
+
 import org.apache.lucene.util.LuceneTestCase.Slow;
 import org.apache.solr.SolrTestCaseJ4.SuppressSSL;
 import org.apache.solr.client.solrj.embedded.JettySolrRunner;
 import org.apache.solr.client.solrj.impl.HttpSolrClient;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
 import org.apache.solr.client.solrj.request.UpdateRequest;
+import org.apache.solr.client.solrj.response.CollectionAdminResponse;
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.cloud.Replica;
 import org.apache.solr.common.cloud.ZkCoreNodeProps;
@@ -30,12 +38,6 @@ import org.junit.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.File;
-import java.net.ServerSocket;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Locale;
-
 //@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-6157")
 
 /**
@@ -128,7 +130,20 @@ public class ReplicationFactorTest exten
     String shardId = "shard1";
     int minRf = 2;
     
-    createCollection(testCollectionName, numShards, replicationFactor, maxShardsPerNode);
+    CollectionAdminResponse resp = createCollection(testCollectionName, numShards, replicationFactor, maxShardsPerNode);
+    
+    if (resp.getResponse().get("failure") != null) {
+      CollectionAdminRequest.Delete req = new CollectionAdminRequest.Delete();
+      req.setCollectionName(testCollectionName);
+      req.process(cloudClient);
+      
+      resp = createCollection(testCollectionName, numShards, replicationFactor, maxShardsPerNode);
+      
+      if (resp.getResponse().get("failure") != null) {
+        fail("Could not create " + testCollectionName);
+      }
+    }
+    
     cloudClient.setDefaultCollection(testCollectionName);
     
     List<Replica> replicas = 
@@ -149,8 +164,8 @@ public class ReplicationFactorTest exten
     up.add(batch);
 
     Replica leader = cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, shardId);
-    sendNonDirectUpdateRequestReplica(leader, up, 2, testCollectionName);    
-    sendNonDirectUpdateRequestReplica(replicas.get(0), up, 2, testCollectionName);    
+    sendNonDirectUpdateRequestReplicaWithRetry(leader, up, 2, testCollectionName);    
+    sendNonDirectUpdateRequestReplicaWithRetry(replicas.get(0), up, 2, testCollectionName);    
     
     // so now kill the replica of shard2 and verify the achieved rf is only 1
     List<Replica> shard2Replicas = 
@@ -162,8 +177,8 @@ public class ReplicationFactorTest exten
     Thread.sleep(2000);
     
     // shard1 will have rf=2 but shard2 will only have rf=1
-    sendNonDirectUpdateRequestReplica(leader, up, 1, testCollectionName);    
-    sendNonDirectUpdateRequestReplica(replicas.get(0), up, 1, testCollectionName);
+    sendNonDirectUpdateRequestReplicaWithRetry(leader, up, 1, testCollectionName);    
+    sendNonDirectUpdateRequestReplicaWithRetry(replicas.get(0), up, 1, testCollectionName);
     
     // heal the partition
     getProxyForReplica(shard2Replicas.get(0)).reopen();
@@ -171,6 +186,15 @@ public class ReplicationFactorTest exten
     Thread.sleep(2000);
   }
   
+
+  protected void sendNonDirectUpdateRequestReplicaWithRetry(Replica replica, UpdateRequest up, int expectedRf, String collection) throws Exception {
+    try {
+      sendNonDirectUpdateRequestReplica(replica, up, expectedRf, collection);
+    } catch (Exception e) {
+      sendNonDirectUpdateRequestReplica(replica, up, expectedRf, collection);
+    }
+  }
+  
   @SuppressWarnings("rawtypes")
   protected void sendNonDirectUpdateRequestReplica(Replica replica, UpdateRequest up, int expectedRf, String collection) throws Exception {