You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by tf...@apache.org on 2017/05/18 01:00:23 UTC

[1/2] lucene-solr:jira/solr-10233: Removed some TODOs and nocommits. Some improvements to tests and other minor changes

Repository: lucene-solr
Updated Branches:
  refs/heads/jira/solr-10233 caad85c93 -> 30e305ef5


Removed some TODOs and nocommits. Some improvements to tests and other minor changes


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/a8bac800
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/a8bac800
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/a8bac800

Branch: refs/heads/jira/solr-10233
Commit: a8bac800a30d8434208e33912e388b0fd9547818
Parents: caad85c9
Author: Tomas Fernandez Lobbe <tf...@apache.org>
Authored: Wed May 17 16:08:04 2017 -0700
Committer: Tomas Fernandez Lobbe <tf...@apache.org>
Committed: Wed May 17 16:08:04 2017 -0700

----------------------------------------------------------------------
 .../org/apache/solr/cloud/ZkController.java     |  2 -
 .../org/apache/solr/handler/IndexFetcher.java   |  8 +-
 .../apache/solr/handler/RealTimeGetHandler.java |  4 +-
 .../apache/solr/handler/ReplicationHandler.java |  2 +-
 .../handler/component/HttpShardHandler.java     | 12 ++-
 .../handler/component/RealTimeGetComponent.java |  2 +-
 .../org/apache/solr/update/UpdateHandler.java   |  2 +-
 .../processor/DistributedUpdateProcessor.java   | 11 ++-
 .../org/apache/solr/util/TestInjection.java     |  5 +-
 .../solr/collection1/conf/solrconfig.xml        |  8 +-
 ...MonkeyNothingIsSafeWithPullReplicasTest.java | 17 +---
 ...aosMonkeySafeLeaderWithPullReplicasTest.java |  5 +-
 .../org/apache/solr/cloud/ShardSplitTest.java   |  5 --
 .../org/apache/solr/cloud/TestPullReplica.java  |  6 +-
 .../org/apache/solr/cloud/TestTlogReplica.java  | 83 ++++++++++++--------
 .../solrj/request/CollectionAdminRequest.java   |  2 +-
 .../apache/solr/common/cloud/ZkStateReader.java |  3 +-
 .../cloud/AbstractFullDistribZkTestBase.java    | 11 +++
 18 files changed, 115 insertions(+), 73 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a8bac800/solr/core/src/java/org/apache/solr/cloud/ZkController.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
index 53d6747..cb8175e 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
@@ -1302,8 +1302,6 @@ public class ZkController {
         context.cancelElection();
       }
     }
-//    //TODO: Do we need to stop replication for type==tlog?
-
     CloudDescriptor cloudDescriptor = cd.getCloudDescriptor();
     zkStateReader.unregisterCore(cloudDescriptor.getCollectionName());
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a8bac800/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java b/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java
index 84b36b5..7d15701 100644
--- a/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java
+++ b/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java
@@ -178,11 +178,13 @@ public class IndexFetcher {
     public static final IndexFetchResult INDEX_FETCH_FAILURE = new IndexFetchResult("Fetching lastest index is failed", false, null);
     public static final IndexFetchResult INDEX_FETCH_SUCCESS = new IndexFetchResult("Fetching latest index is successful", true, null);
     public static final IndexFetchResult LOCK_OBTAIN_FAILED = new IndexFetchResult("Obtaining SnapPuller lock failed", false, null);
+    public static final IndexFetchResult CONTAINER_IS_SHUTTING_DOWN = new IndexFetchResult("I was asked to replicate but CoreContainer is shutting down", false, null);
     public static final IndexFetchResult MASTER_VERSION_ZERO = new IndexFetchResult("Index in peer is empty and never committed yet", true, null);
     public static final IndexFetchResult NO_INDEX_COMMIT_EXIST = new IndexFetchResult("No IndexCommit in local index", false, null);
     public static final IndexFetchResult PEER_INDEX_COMMIT_DELETED = new IndexFetchResult("No files to download because IndexCommit in peer was deleted", false, null);
     public static final IndexFetchResult LOCAL_ACTIVITY_DURING_REPLICATION = new IndexFetchResult("Local index modification during replication", false, null);
     public static final IndexFetchResult EXPECTING_NON_LEADER = new IndexFetchResult("Replicating from leader but I'm the shard leader", false, null);
+    public static final IndexFetchResult LEADER_IS_NOT_ACTIVE = new IndexFetchResult("Replicating from leader but leader is not active", false, null);
 
     IndexFetchResult(String message, boolean successful, Throwable exception) {
       this.message = message;
@@ -365,7 +367,11 @@ public class IndexFetcher {
         }
         if (replica.getState() != Replica.State.ACTIVE) {
           LOG.info("Replica {} is leader but it's state is {}, skipping replication", replica.getName(), replica.getState());
-          return IndexFetchResult.EXPECTING_NON_LEADER;//nocommit: not the correct error
+          return IndexFetchResult.LEADER_IS_NOT_ACTIVE;
+        }
+        if (!solrCore.getCoreContainer().getZkController().getClusterState().liveNodesContain(replica.getNodeName())) {
+          LOG.info("Replica {} is leader but it's not hosted on a live node, skipping replication", replica.getName());
+          return IndexFetchResult.LEADER_IS_NOT_ACTIVE;
         }
         if (!replica.getCoreUrl().equals(masterUrl)) {
           masterUrl = replica.getCoreUrl();

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a8bac800/solr/core/src/java/org/apache/solr/handler/RealTimeGetHandler.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/RealTimeGetHandler.java b/solr/core/src/java/org/apache/solr/handler/RealTimeGetHandler.java
index 247b65c..9f2b693 100644
--- a/solr/core/src/java/org/apache/solr/handler/RealTimeGetHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/RealTimeGetHandler.java
@@ -22,6 +22,7 @@ import java.util.List;
 
 import org.apache.solr.api.Api;
 import org.apache.solr.api.ApiBag;
+import org.apache.solr.handler.component.HttpShardHandler;
 import org.apache.solr.handler.component.RealTimeGetComponent;
 import org.apache.solr.handler.component.SearchHandler;
 import org.apache.solr.request.SolrQueryRequest;
@@ -40,7 +41,8 @@ public class RealTimeGetHandler extends SearchHandler {
   
   @Override
   public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
-    req.getContext().put("distribOnlyRealtime", Boolean.TRUE);
+    // Tell HttpShardHandlerthat this request should only be distributed to NRT replicas
+    req.getContext().put(HttpShardHandler.ONLY_NRT_REPLICAS, Boolean.TRUE);
     super.handleRequestBody(req, rsp);
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a8bac800/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java b/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java
index 90d33be..f3dcdeb 100644
--- a/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java
@@ -405,7 +405,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
       return IndexFetchResult.LOCK_OBTAIN_FAILED;
     if (core.getCoreContainer().isShutDown()) {
       LOG.warn("I was asked to replicate but CoreContainer is shutting down");
-      return IndexFetchResult.LOCK_OBTAIN_FAILED;//nocommit: different 
+      return IndexFetchResult.CONTAINER_IS_SHUTTING_DOWN; 
     }
     try {
       if (masterUrl != null) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a8bac800/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandler.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandler.java b/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandler.java
index a2108be..4ec3b79 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandler.java
@@ -60,6 +60,14 @@ import org.slf4j.LoggerFactory;
 import org.slf4j.MDC;
 
 public class HttpShardHandler extends ShardHandler {
+  
+  /**
+   * If the request context map has an entry with this key and Boolean.TRUE as value,
+   * {@link #prepDistributed(ResponseBuilder)} will only include {@link org.apache.solr.common.cloud.Replica.Type#NRT} replicas as possible
+   * destination of the distributed request (or a leader replica of type {@link org.apache.solr.common.cloud.Replica.Type#TLOG}). This is used 
+   * by the RealtimeGet handler, since other types of replicas shouldn't respond to RTG requests
+   */
+  public static String ONLY_NRT_REPLICAS = "distribOnlyRealtime";
 
   private HttpShardHandlerFactory httpShardHandlerFactory;
   private CompletionService<ShardResponse> completionService;
@@ -349,8 +357,8 @@ public class HttpShardHandler extends ShardHandler {
       // and make it a non-distributed request.
       String ourSlice = cloudDescriptor.getShardId();
       String ourCollection = cloudDescriptor.getCollectionName();
-      // Some requests may only be fulfilled by replicas of type Replica.Type.REALTIME
-      boolean onlyNrtReplicas = Boolean.TRUE == req.getContext().get("distribOnlyRealtime");
+      // Some requests may only be fulfilled by replicas of type Replica.Type.NRT
+      boolean onlyNrtReplicas = Boolean.TRUE == req.getContext().get(ONLY_NRT_REPLICAS);
       if (rb.slices.length == 1 && rb.slices[0] != null
           && ( rb.slices[0].equals(ourSlice) || rb.slices[0].equals(ourCollection + "_" + ourSlice) )  // handle the <collection>_<slice> format
           && cloudDescriptor.getLastPublished() == Replica.State.ACTIVE

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a8bac800/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java b/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
index 6760705..6d70435 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
@@ -117,7 +117,7 @@ public class RealTimeGetComponent extends SearchComponent
                   cloudDesc.getCoreNodeName(),
                   Replica.Type.PULL));
         } 
-        // non-leader APPEND replicas should not respond to distrib /get requests, but internal requests are OK
+        // non-leader TLOG replicas should not respond to distrib /get requests, but internal requests are OK
       }
     }
     

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a8bac800/solr/core/src/java/org/apache/solr/update/UpdateHandler.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/update/UpdateHandler.java b/solr/core/src/java/org/apache/solr/update/UpdateHandler.java
index 42abaf8..f0eb8bc 100644
--- a/solr/core/src/java/org/apache/solr/update/UpdateHandler.java
+++ b/solr/core/src/java/org/apache/solr/update/UpdateHandler.java
@@ -126,7 +126,7 @@ public abstract class UpdateHandler implements SolrInfoBean {
 
     // If this is a replica of type PULL, don't create the update log
     boolean skipUpdateLog = core.getCoreDescriptor().getCloudDescriptor() != null && !core.getCoreDescriptor().getCloudDescriptor().requiresTransactionLog();
-    if (updateLog == null && ulogPluginInfo != null && !skipUpdateLog) {
+    if (updateLog == null && ulogPluginInfo != null && ulogPluginInfo.isEnabled() && !skipUpdateLog) {
       String dataDir = (String)ulogPluginInfo.initArgs.get("dir");
 
       String ulogDir = core.getCoreDescriptor().getUlogDir();

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a8bac800/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java b/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
index e67f982..a91831c 100644
--- a/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
+++ b/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
@@ -1609,7 +1609,7 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
           String myShardId = req.getCore().getCoreDescriptor().getCloudDescriptor().getShardId();
           Replica leaderReplica = zkController.getZkStateReader().getLeaderRetry(
               collection, myShardId);
-          // DBQ forwarded to Realtime and Append
+          // DBQ forwarded to NRT and TLOG replicas
           List<ZkCoreNodeProps> replicaProps = zkController.getZkStateReader()
               .getReplicaProps(collection, myShardId, leaderReplica.getName(), null, Replica.State.DOWN, EnumSet.of(Replica.Type.NRT, Replica.Type.TLOG));
           if (replicaProps != null) {
@@ -1700,7 +1700,7 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
           }
 
           if (replicaType == Replica.Type.TLOG && (cmd.getFlags() & UpdateCommand.REPLAY) == 0) {
-            // Append replica not leader, don't write the DBQ to IW
+            // TLOG replica not leader, don't write the DBQ to IW
             cmd.setFlags(cmd.getFlags() | UpdateCommand.IGNORE_INDEXWRITER);
           }
           doLocalDelete(cmd);
@@ -1896,7 +1896,7 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
     }
     
     if (!zkEnabled || req.getParams().getBool(COMMIT_END_POINT, false) || singleLeader) {
-      if (replicaType == Replica.Type.TLOG) { // REALTIME will always commit
+      if (replicaType == Replica.Type.TLOG) {
         try {
           Replica leaderReplica = zkController.getZkStateReader().getLeaderRetry(
               collection, cloudDesc.getShardId());
@@ -1914,7 +1914,12 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
         } catch (InterruptedException e) {
           throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "Exception finding leader for shard " + cloudDesc.getShardId(), e);
         }
+      } else if (replicaType == Replica.Type.PULL) {
+        log.warn("Commit not supported on replicas of type " + Replica.Type.PULL);
       } else {
+        // NRT replicas will always commit
+        long commitVersion = vinfo.getNewClock();
+        cmd.setVersion(commitVersion);
         doLocalCommit(cmd);
       }
     } else {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a8bac800/solr/core/src/java/org/apache/solr/util/TestInjection.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/util/TestInjection.java b/solr/core/src/java/org/apache/solr/util/TestInjection.java
index 7b30151..656428b 100644
--- a/solr/core/src/java/org/apache/solr/util/TestInjection.java
+++ b/solr/core/src/java/org/apache/solr/util/TestInjection.java
@@ -385,13 +385,14 @@ public class TestInjection {
           long leaderVersion = (long) ((NamedList)response.get("details")).get("indexVersion");
           RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
           try {
-            String localVersion = searcher.get().getIndexReader().getIndexCommit().getUserData().get(SolrIndexWriter.COMMIT_TIME_MSEC_KEY);
+//            String localVersion = searcher.get().getIndexReader().getIndexCommit().getUserData().get(SolrIndexWriter.COMMIT_TIME_MSEC_KEY);
+            String localVersion = searcher.get().getIndexReader().getIndexCommit().getUserData().get(SolrIndexWriter.COMMIT_COMMAND_VERSION);
             if (localVersion == null && leaderVersion == 0 && !core.getUpdateHandler().getUpdateLog().hasUncommittedChanges()) return true;
             if (localVersion != null && Long.parseLong(localVersion) == leaderVersion && (leaderVersion >= t || i >= 6)) {
               log.info("Waiting time for tlog replica to be in sync with leader: {}", System.currentTimeMillis()-currentTime);
               return true;
             } else {
-              log.debug("Append replica not in sync with leader yet. Attempt: {}", i);
+              log.debug("Tlog replica not in sync with leader yet. Attempt: {}. Local Version={}, leader Version={}", i, localVersion, leaderVersion);
               Thread.sleep(500);
             }
           } finally {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a8bac800/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml
----------------------------------------------------------------------
diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml
index 58f9551..a63f6cb 100644
--- a/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml
@@ -63,6 +63,10 @@
 
   <updateHandler class="solr.DirectUpdateHandler2">
 
+    <autoCommit>
+      <maxTime>${solr.autoCommit.maxTime:-1}</maxTime>
+    </autoCommit>
+
     <!-- autocommit pending docs if certain criteria are met
     <autoCommit>
       <maxDocs>10000</maxDocs>
@@ -478,7 +482,7 @@
       <str name="facet.query">foo_s:bar</str>
     </lst>
   </requestHandler>
-
+  
   <admin>
     <defaultQuery>solr</defaultQuery>
     <gettableFiles>solrconfig.xml schema.xml admin-extra.html</gettableFiles>
@@ -577,6 +581,8 @@
       <str name="df">text</str>
     </lst>
   </initParams>
+  
+  
 
 </config>
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a8bac800/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyNothingIsSafeWithPullReplicasTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyNothingIsSafeWithPullReplicasTest.java b/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyNothingIsSafeWithPullReplicasTest.java
index 8c4c781..37c96d7 100644
--- a/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyNothingIsSafeWithPullReplicasTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyNothingIsSafeWithPullReplicasTest.java
@@ -28,14 +28,12 @@ import org.apache.lucene.util.LuceneTestCase.Slow;
 import org.apache.solr.SolrTestCaseJ4.SuppressObjectReleaseTracker;
 import org.apache.solr.SolrTestCaseJ4.SuppressSSL;
 import org.apache.solr.client.solrj.SolrQuery;
-import org.apache.solr.client.solrj.embedded.JettySolrRunner;
 import org.apache.solr.client.solrj.impl.CloudSolrClient;
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.cloud.DocCollection;
 import org.apache.solr.common.cloud.Replica;
 import org.apache.solr.common.cloud.Slice;
 import org.apache.solr.common.cloud.ZkStateReader;
-import org.apache.solr.core.SolrCore;
 import org.apache.solr.util.TestInjection;
 import org.apache.solr.util.TimeOut;
 import org.junit.AfterClass;
@@ -69,7 +67,9 @@ public class ChaosMonkeyNothingIsSafeWithPullReplicasTest extends AbstractFullDi
   @BeforeClass
   public static void beforeSuperClass() {
     schemaString = "schema15.xml";      // we need a string id
-    System.setProperty("solr.autoCommit.maxTime", "15000");
+    if (usually()) {
+      System.setProperty("solr.autoCommit.maxTime", "15000");
+    }
     TestInjection.waitForReplicasInSync = null;
     setErrorHook();
   }
@@ -299,17 +299,6 @@ public class ChaosMonkeyNothingIsSafeWithPullReplicasTest extends AbstractFullDi
     }
   }
 
-  private void waitForAllWarmingSearchers() throws InterruptedException {
-    for (JettySolrRunner jetty:jettys) {
-      if (!jetty.isRunning()) {
-        continue;
-      }
-      for (SolrCore core:jetty.getCoreContainer().getCores()) {
-        waitForWarming(core);
-      }
-    }
-  }
-
   private Set<String> getAddFails(List<StoppableIndexingThread> threads) {
     Set<String> addFails = new HashSet<String>();
     for (StoppableIndexingThread thread : threads)   {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a8bac800/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderWithPullReplicasTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderWithPullReplicasTest.java b/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderWithPullReplicasTest.java
index 62d77a6..5826eff 100644
--- a/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderWithPullReplicasTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderWithPullReplicasTest.java
@@ -64,7 +64,9 @@ public class ChaosMonkeySafeLeaderWithPullReplicasTest extends AbstractFullDistr
   @BeforeClass
   public static void beforeSuperClass() {
     schemaString = "schema15.xml";      // we need a string id
-    System.setProperty("solr.autoCommit.maxTime", "15000");
+    if (usually()) {
+      System.setProperty("solr.autoCommit.maxTime", "15000");
+    }
     TestInjection.waitForReplicasInSync = null;
     setErrorHook();
   }
@@ -202,6 +204,7 @@ public class ChaosMonkeySafeLeaderWithPullReplicasTest extends AbstractFullDistr
     log.info("control docs:" + controlClient.query(new SolrQuery("*:*")).getResults().getNumFound() + "\n\n");
     
     waitForReplicationFromReplicas(DEFAULT_COLLECTION, cloudClient.getZkStateReader(), new TimeOut(30, TimeUnit.SECONDS));
+    waitForAllWarmingSearchers();
 
     checkShardConsistency(batchSize == 1, true);
     

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a8bac800/solr/core/src/test/org/apache/solr/cloud/ShardSplitTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/ShardSplitTest.java b/solr/core/src/test/org/apache/solr/cloud/ShardSplitTest.java
index 73a0bf7..72f0694 100644
--- a/solr/core/src/test/org/apache/solr/cloud/ShardSplitTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/ShardSplitTest.java
@@ -79,11 +79,6 @@ public class ShardSplitTest extends BasicDistributedZkTest {
   public ShardSplitTest() {
     schemaString = "schema15.xml";      // we need a string id
   }
-  
-  @Override
-  protected boolean useTlogReplicas() {
-    return false;
-  }
 
   @Override
   public void distribSetUp() throws Exception {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a8bac800/solr/core/src/test/org/apache/solr/cloud/TestPullReplica.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestPullReplica.java b/solr/core/src/test/org/apache/solr/cloud/TestPullReplica.java
index fa0578b..95d62d9 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestPullReplica.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestPullReplica.java
@@ -387,7 +387,7 @@ public class TestPullReplica extends SolrCloudTestCase {
     
     // Queries should still work
     waitForNumDocsInAllReplicas(1, docCollection.getReplicas(EnumSet.of(Replica.Type.PULL)));
-    // Add nrt replica back. Since there is no rt now, new rt will have no docs. There will be data loss, since the it will become the leader
+    // Add nrt replica back. Since there is no nrt now, new nrt will have no docs. There will be data loss, since the it will become the leader
     // and pull replicas will replicate from it. Maybe we want to change this. Replicate from pull replicas is not a good idea, since they
     // are by definition out of date.
     if (removeReplica) {
@@ -398,13 +398,13 @@ public class TestPullReplica extends SolrCloudTestCase {
     waitForState("Expected collection to be 1x2", collectionName, clusterShape(1, 2));
     unIgnoreException("No registered leader was found"); // Should have a leader from now on
 
-    // Validate that the new writer is the leader now
+    // Validate that the new nrt replica is the leader now
     cluster.getSolrClient().getZkStateReader().forceUpdateCollection(collectionName);
     docCollection = getCollectionState(collectionName);
     leader = docCollection.getSlice("shard1").getLeader();
     assertTrue(leader != null && leader.isActive(cluster.getSolrClient().getZkStateReader().getClusterState().getLiveNodes()));
 
-    //nocommit: If jetty is restarted, the replication is not forced, and replica doesn't replicate from leader until new docs are added. Is this the correct behavior? Why should these two cases be different?
+    // If jetty is restarted, the replication is not forced, and replica doesn't replicate from leader until new docs are added. Is this the correct behavior? Why should these two cases be different?
     if (removeReplica) {
       // Pull replicas will replicate the empty index if a new replica was added and becomes leader
       waitForNumDocsInAllReplicas(0, docCollection.getReplicas(EnumSet.of(Replica.Type.PULL)));

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a8bac800/solr/core/src/test/org/apache/solr/cloud/TestTlogReplica.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestTlogReplica.java b/solr/core/src/test/org/apache/solr/cloud/TestTlogReplica.java
index 57f25d0..d228140 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestTlogReplica.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestTlogReplica.java
@@ -147,25 +147,37 @@ public class TestTlogReplica extends SolrCloudTestCase {
       CollectionAdminRequest.createCollection(collectionName, "conf", 2, 0, 4, 0)
       .setMaxShardsPerNode(100)
       .process(cluster.getSolrClient());
-      DocCollection docCollection = getCollectionState(collectionName);
-      assertNotNull(docCollection);
-      assertEquals("Expecting 2 shards",
-          2, docCollection.getSlices().size());
-      assertEquals("Expecting 4 relpicas per shard",
-          8, docCollection.getReplicas().size());
-      assertEquals("Expecting 8 tlog replicas, 4 per shard",
-          8, docCollection.getReplicas(EnumSet.of(Replica.Type.TLOG)).size());
-      assertEquals("Expecting no nrt replicas",
-          0, docCollection.getReplicas(EnumSet.of(Replica.Type.NRT)).size());
-      assertEquals("Expecting no pull replicas",
-          0, docCollection.getReplicas(EnumSet.of(Replica.Type.PULL)).size());
-      for (Slice s:docCollection.getSlices()) {
-        assertTrue(s.getLeader().getType() == Replica.Type.TLOG);
-        List<String> shardElectionNodes = cluster.getZkClient().getChildren(ZkStateReader.getShardLeadersElectPath(collectionName, s.getName()), null, true);
-        assertEquals("Unexpected election nodes for Shard: " + s.getName() + ": " + Arrays.toString(shardElectionNodes.toArray()), 
-            4, shardElectionNodes.size());
+      boolean reloaded = false;
+      while (true) {
+        DocCollection docCollection = getCollectionState(collectionName);
+        assertNotNull(docCollection);
+        assertEquals("Expecting 2 shards",
+            2, docCollection.getSlices().size());
+        assertEquals("Expecting 4 relpicas per shard",
+            8, docCollection.getReplicas().size());
+        assertEquals("Expecting 8 tlog replicas, 4 per shard",
+            8, docCollection.getReplicas(EnumSet.of(Replica.Type.TLOG)).size());
+        assertEquals("Expecting no nrt replicas",
+            0, docCollection.getReplicas(EnumSet.of(Replica.Type.NRT)).size());
+        assertEquals("Expecting no pull replicas",
+            0, docCollection.getReplicas(EnumSet.of(Replica.Type.PULL)).size());
+        for (Slice s:docCollection.getSlices()) {
+          assertTrue(s.getLeader().getType() == Replica.Type.TLOG);
+          List<String> shardElectionNodes = cluster.getZkClient().getChildren(ZkStateReader.getShardLeadersElectPath(collectionName, s.getName()), null, true);
+          assertEquals("Unexpected election nodes for Shard: " + s.getName() + ": " + Arrays.toString(shardElectionNodes.toArray()), 
+              4, shardElectionNodes.size());
+        }
+        assertUlogPresence(docCollection);
+        if (reloaded) {
+          break;
+        } else {
+          // reload
+          CollectionAdminResponse response = CollectionAdminRequest.reloadCollection(collectionName)
+          .process(cluster.getSolrClient());
+          assertEquals(0, response.getStatus());
+          reloaded = true;
+        }
       }
-      assertUlogPresence(docCollection);
     } finally {
       zkClient().printLayoutToStdOut();
     }
@@ -225,7 +237,7 @@ public class TestTlogReplica extends SolrCloudTestCase {
     
     waitForState("Expecting collection to have 2 shards and 2 replica each", collectionName, clusterShape(2, 2));
     
-    //Delete pull replica from shard1
+    //Delete tlog replica from shard1
     CollectionAdminRequest.deleteReplica(
         collectionName, 
         "shard1", 
@@ -360,8 +372,8 @@ public class TestTlogReplica extends SolrCloudTestCase {
     JettySolrRunner pullReplicaJetty = cluster.getReplicaJetty(docCollection.getSlice("shard1").getReplicas(EnumSet.of(Replica.Type.TLOG)).get(0));
     ChaosMonkey.kill(pullReplicaJetty);
     waitForState("Replica not removed", collectionName, activeReplicaCount(0, 1, 0));
-    // Also wait for the replica to be placed in state="down"
-    waitForState("Didn't update state", collectionName, clusterStateReflectsActiveAndDownReplicas());
+//    // Also wait for the replica to be placed in state="down"
+//    waitForState("Didn't update state", collectionName, clusterStateReflectsActiveAndDownReplicas());
     
     cluster.getSolrClient().add(collectionName, new SolrInputDocument("id", "2", "foo", "bar"));
     cluster.getSolrClient().commit(collectionName);
@@ -436,6 +448,7 @@ public class TestTlogReplica extends SolrCloudTestCase {
     waitForReplicasCatchUp(20);
   }
   
+  @SuppressWarnings("unchecked")
   public void testRecovery() throws Exception {
     boolean useKill = random().nextBoolean();
     createAndWaitForCollection(1, 0, 2, 0);
@@ -445,7 +458,6 @@ public class TestTlogReplica extends SolrCloudTestCase {
         .add(sdoc("id", "3"))
         .add(sdoc("id", "4"))
         .commit(cloudClient, collectionName);
-    // Replica recovery
     new UpdateRequest()
         .add(sdoc("id", "5"))
         .process(cloudClient, collectionName);
@@ -455,30 +467,37 @@ public class TestTlogReplica extends SolrCloudTestCase {
     } else {
       ChaosMonkey.stop(solrRunner);
     }
+    waitForState("Replica still up", collectionName, activeReplicaCount(0,1,0));
     new UpdateRequest()
         .add(sdoc("id", "6"))
         .process(cloudClient, collectionName);
     ChaosMonkey.start(solrRunner);
     waitForState("Replica didn't recover", collectionName, activeReplicaCount(0,2,0));
     // We skip peerSync, so replica will always trigger commit on leader
-    waitForNumDocsInAllActiveReplicas(4);
-    
-    // If I add the doc immediately, the leader fails to communicate with the follower with broken pipe. Related to SOLR-9555 I believe
-    //nocommit
-    Thread.sleep(10000);
+//    waitForNumDocsInAllActiveReplicas(4);
+    waitForNumDocsInAllReplicas(4, getCollectionState(collectionName).getReplicas(), 0);// Should be immediate
     
-    // More Replica recovery testing
-    new UpdateRequest()
-        .add(sdoc("id", "7"))
-        .process(cloudClient, collectionName);
+    // If I add the doc immediately, the leader fails to communicate with the follower with broken pipe.
+    // Options are, wait or retry...
+    for (int i = 0; i < 3; i++) {
+      UpdateRequest ureq = new UpdateRequest().add(sdoc("id", "7"));
+      ureq.setParam("collection", collectionName);
+      ureq.setParam(UpdateRequest.MIN_REPFACT, "2");
+      NamedList<Object> response = cloudClient.request(ureq);
+      if ((Integer)((NamedList<Object>)response.get("responseHeader")).get(UpdateRequest.REPFACT) >= 2) {
+        break;
+      }
+      LOG.info("Min RF not achieved yet. retrying");
+    }
     checkRTG(3,7, cluster.getJettySolrRunners());
     DirectUpdateHandler2.commitOnClose = false;
     ChaosMonkey.stop(solrRunner);
+    waitForState("Replica still up", collectionName, activeReplicaCount(0,1,0));
     DirectUpdateHandler2.commitOnClose = true;
     ChaosMonkey.start(solrRunner);
     waitForState("Replica didn't recover", collectionName, activeReplicaCount(0,2,0));
     checkRTG(3,7, cluster.getJettySolrRunners());
-    waitForNumDocsInAllActiveReplicas(5, 0);
+    waitForNumDocsInAllReplicas(5, getCollectionState(collectionName).getReplicas(), 0);// Should be immediate
 
     // Test replica recovery apply buffer updates
     Semaphore waitingForBufferUpdates = new Semaphore(0);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a8bac800/solr/solrj/src/java/org/apache/solr/client/solrj/request/CollectionAdminRequest.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/request/CollectionAdminRequest.java b/solr/solrj/src/java/org/apache/solr/client/solrj/request/CollectionAdminRequest.java
index dcfed27..cc51a29 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/request/CollectionAdminRequest.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/request/CollectionAdminRequest.java
@@ -338,7 +338,7 @@ public abstract class CollectionAdminRequest<T extends CollectionAdminResponse>
    * @param collection the collection name
    * @param config     the collection config
    * @param numShards  the number of shards in the collection
-   * @param numReplicas the replication factor of the collection
+   * @param numReplicas the replication factor of the collection (same as numNrtReplicas)
    */
   public static Create createCollection(String collection, String config, int numShards, int numReplicas) {
     return new Create(collection, config, numShards, numReplicas, 0, 0);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a8bac800/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java
index 3c3497a..158b53a 100644
--- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java
+++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java
@@ -98,7 +98,6 @@ public class ZkStateReader implements Closeable {
   public static final String MAX_SHARDS_PER_NODE = "maxShardsPerNode";
   public static final String AUTO_ADD_REPLICAS = "autoAddReplicas";
   public static final String MAX_CORES_PER_NODE = "maxCoresPerNode";
-  //TODO: Move these constants out of ZkStateReader
   public static final String PULL_REPLICAS = "pullReplicas";
   public static final String NRT_REPLICAS = "nrtReplicas";
   public static final String TLOG_REPLICAS = "tlogReplicas";
@@ -787,7 +786,7 @@ public class ZkStateReader implements Closeable {
   
   public List<ZkCoreNodeProps> getReplicaProps(String collection, String shardId, String thisCoreNodeName,
       Replica.State mustMatchStateFilter, Replica.State mustNotMatchStateFilter) {
-    //nocommit: We don't need all these getReplicaProps method overloading. Also, it's odd that the default is to return replicas of type APPEND and REALTIME only
+    //TODO: We don't need all these getReplicaProps method overloading. Also, it's odd that the default is to return replicas of type TLOG and NRT only
     return getReplicaProps(collection, shardId, thisCoreNodeName, mustMatchStateFilter, null, EnumSet.of(Replica.Type.TLOG,  Replica.Type.NRT));
   }
   

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a8bac800/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java
----------------------------------------------------------------------
diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java b/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java
index f30c035..409e04a 100644
--- a/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java
+++ b/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java
@@ -2103,6 +2103,17 @@ public abstract class AbstractFullDistribZkTestBase extends AbstractDistribZkTes
       }
     }
   }
+  
+  protected void waitForAllWarmingSearchers() throws InterruptedException {
+    for (JettySolrRunner jetty:jettys) {
+      if (!jetty.isRunning()) {
+        continue;
+      }
+      for (SolrCore core:jetty.getCoreContainer().getCores()) {
+        waitForWarming(core);
+      }
+    }
+  }
 
   protected long getIndexVersion(Replica replica) throws IOException {
     try (HttpSolrClient client = new HttpSolrClient.Builder(replica.getCoreUrl()).build()) {


[2/2] lucene-solr:jira/solr-10233: Always distrib commits on PULL replicas

Posted by tf...@apache.org.
Always distrib commits on PULL replicas


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/30e305ef
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/30e305ef
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/30e305ef

Branch: refs/heads/jira/solr-10233
Commit: 30e305ef5bdc38d0c45b5c44d29084259574d17e
Parents: a8bac80
Author: Tomas Fernandez Lobbe <tf...@apache.org>
Authored: Wed May 17 17:13:32 2017 -0700
Committer: Tomas Fernandez Lobbe <tf...@apache.org>
Committed: Wed May 17 17:13:32 2017 -0700

----------------------------------------------------------------------
 .../solr/update/processor/DistributedUpdateProcessor.java       | 2 +-
 solr/core/src/test/org/apache/solr/cloud/TestPullReplica.java   | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/30e305ef/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java b/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
index a91831c..993f387 100644
--- a/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
+++ b/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
@@ -1890,7 +1890,7 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
         throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, 
             "Unable to distribute commit operation. No replicas available of types " + Replica.Type.TLOG + " or " + Replica.Type.NRT);
       }
-      if (isLeader && nodes.size() == 1) {
+      if (isLeader && nodes.size() == 1 && req.getCore().getCoreDescriptor().getCloudDescriptor().getReplicaType() != Replica.Type.PULL) {
         singleLeader = true;
       }
     }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/30e305ef/solr/core/src/test/org/apache/solr/cloud/TestPullReplica.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestPullReplica.java b/solr/core/src/test/org/apache/solr/cloud/TestPullReplica.java
index 95d62d9..e87d9cb 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestPullReplica.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestPullReplica.java
@@ -187,7 +187,6 @@ public class TestPullReplica extends SolrCloudTestCase {
     
     Slice s = docCollection.getSlices().iterator().next();
     try (HttpSolrClient leaderClient = getHttpSolrClient(s.getLeader().getCoreUrl())) {
-      leaderClient.commit(); // TODO: this shouldn't be necessary here
       assertEquals(1, leaderClient.query(new SolrQuery("*:*")).getResults().getNumFound());
     }
     
@@ -283,7 +282,7 @@ public class TestPullReplica extends SolrCloudTestCase {
   }
   
   public void testRealTimeGet() throws SolrServerException, IOException, KeeperException, InterruptedException {
-    // should be redirected to Replica.Type.REALTIME
+    // should be redirected to Replica.Type.NRT
     int numReplicas = random().nextBoolean()?1:2;
     CollectionAdminRequest.createCollection(collectionName, "conf", 1, numReplicas, 0, numReplicas)
       .setMaxShardsPerNode(100)
@@ -425,7 +424,7 @@ public class TestPullReplica extends SolrCloudTestCase {
     CollectionAdminRequest.createCollection(collectionName, "conf", 1, 1, 0, 1)
       .setMaxShardsPerNode(100)
       .process(cluster.getSolrClient());
-    cluster.getSolrClient().getZkStateReader().registerCore(collectionName); //TODO: Is this needed? 
+//    cluster.getSolrClient().getZkStateReader().registerCore(collectionName); //TODO: Is this needed? 
     waitForState("Expected collection to be created with 1 shard and 2 replicas", collectionName, clusterShape(1, 2));
     DocCollection docCollection = assertNumberOfReplicas(1, 0, 1, false, true);
     assertEquals(1, docCollection.getSlices().size());