You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2017/03/14 10:39:14 UTC

lucene-solr:branch_6x: SOLR-10279: The autoAddReplica feature can result in SolrCores being assigned new shards when using legacyCloud=false and will also fail on a state check when taking over a core registration with a new core.

Repository: lucene-solr
Updated Branches:
  refs/heads/branch_6x 464722a0a -> 5983f9e70


SOLR-10279: The autoAddReplica feature can result in SolrCores being assigned new shards when using legacyCloud=false and will also fail on a state check when taking over a core registration with a new core.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/5983f9e7
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/5983f9e7
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/5983f9e7

Branch: refs/heads/branch_6x
Commit: 5983f9e702a9dbd1ba9c5f383c8cc310d00fde91
Parents: 464722a
Author: markrmiller <ma...@apache.org>
Authored: Tue Mar 14 06:01:06 2017 -0400
Committer: markrmiller <ma...@apache.org>
Committed: Tue Mar 14 06:36:23 2017 -0400

----------------------------------------------------------------------
 solr/CHANGES.txt                                |  4 +++
 .../OverseerAutoReplicaFailoverThread.java      |  6 ++--
 .../org/apache/solr/cloud/ZkController.java     |  8 +-----
 .../cloud/SharedFSAutoReplicaFailoverTest.java  | 29 ++++++++++++++++++++
 4 files changed, 38 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5983f9e7/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index a71a4ba..6add95d 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -172,6 +172,10 @@ Bug Fixes
 
 * SOLR-10269: MetricsHandler JSON output incorrect. (ab)
 
+* SOLR-10279: The autoAddReplica feature can result in SolrCores being assigned new shards when using
+  legacyCloud=false and will also fail on a state check when taking over a core registration with a new
+  core. (Mark Miller, Hrishikesh Gadre, Patrick Dvorack)
+
 Optimizations
 ----------------------
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5983f9e7/solr/core/src/java/org/apache/solr/cloud/OverseerAutoReplicaFailoverThread.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerAutoReplicaFailoverThread.java b/solr/core/src/java/org/apache/solr/cloud/OverseerAutoReplicaFailoverThread.java
index 10b4bf3..ea09eef 100644
--- a/solr/core/src/java/org/apache/solr/cloud/OverseerAutoReplicaFailoverThread.java
+++ b/solr/core/src/java/org/apache/solr/cloud/OverseerAutoReplicaFailoverThread.java
@@ -243,13 +243,14 @@ public class OverseerAutoReplicaFailoverThread implements Runnable, Closeable {
     final String dataDir = badReplica.replica.getStr("dataDir");
     final String ulogDir = badReplica.replica.getStr("ulogDir");
     final String coreNodeName = badReplica.replica.getName();
+    final String shardId = badReplica.slice.getName();
     if (dataDir != null) {
       // need an async request - full shard goes down leader election
       final String coreName = badReplica.replica.getStr(ZkStateReader.CORE_NAME_PROP);
       log.debug("submit call to {}", createUrl);
       MDC.put("OverseerAutoReplicaFailoverThread.createUrl", createUrl);
       try {
-        updateExecutor.submit(() -> createSolrCore(collection, createUrl, dataDir, ulogDir, coreNodeName, coreName));
+        updateExecutor.submit(() -> createSolrCore(collection, createUrl, dataDir, ulogDir, coreNodeName, coreName, shardId));
       } finally {
         MDC.remove("OverseerAutoReplicaFailoverThread.createUrl");
       }
@@ -440,7 +441,7 @@ public class OverseerAutoReplicaFailoverThread implements Runnable, Closeable {
 
   private boolean createSolrCore(final String collection,
       final String createUrl, final String dataDir, final String ulogDir,
-      final String coreNodeName, final String coreName) {
+      final String coreNodeName, final String coreName, final String shardId) {
 
     try (HttpSolrClient client = new HttpSolrClient.Builder(createUrl).build()) {
       log.debug("create url={}", createUrl);
@@ -451,6 +452,7 @@ public class OverseerAutoReplicaFailoverThread implements Runnable, Closeable {
       createCmd.setCoreNodeName(coreNodeName);
       // TODO: how do we ensure unique coreName
       // for now, the collections API will use unique names
+      createCmd.setShardId(shardId);
       createCmd.setCoreName(coreName);
       createCmd.setDataDir(dataDir);
       createCmd.setUlogDir(ulogDir.substring(0, ulogDir.length() - "/tlog".length()));

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5983f9e7/solr/core/src/java/org/apache/solr/cloud/ZkController.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
index c083736..37d673c 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
@@ -1424,13 +1424,7 @@ public class ZkController {
             errorMessage.set("coreNodeName " + coreNodeName + " does not exist in shard " + cloudDesc.getShardId());
             return false;
           }
-          String baseUrl = replica.getStr(BASE_URL_PROP);
-          String coreName = replica.getStr(CORE_NAME_PROP);
-          if (baseUrl.equals(this.baseURL) && coreName.equals(cd.getName())) {
-            return true;
-          }
-          errorMessage.set("coreNodeName " + coreNodeName + " exists, but does not match expected node or core name");
-          return false;
+          return true;
         });
       } catch (TimeoutException e) {
         String error = errorMessage.get();

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5983f9e7/solr/core/src/test/org/apache/solr/cloud/SharedFSAutoReplicaFailoverTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/SharedFSAutoReplicaFailoverTest.java b/solr/core/src/test/org/apache/solr/cloud/SharedFSAutoReplicaFailoverTest.java
index 18503e7..9c345fd 100644
--- a/solr/core/src/test/org/apache/solr/cloud/SharedFSAutoReplicaFailoverTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/SharedFSAutoReplicaFailoverTest.java
@@ -38,6 +38,7 @@ import org.apache.solr.SolrTestCaseJ4.SuppressSSL;
 import org.apache.solr.client.solrj.SolrQuery;
 import org.apache.solr.client.solrj.SolrRequest;
 import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest.Create;
 import org.apache.solr.client.solrj.request.QueryRequest;
 import org.apache.solr.client.solrj.response.CollectionAdminResponse;
@@ -103,6 +104,11 @@ public class SharedFSAutoReplicaFailoverTest extends AbstractFullDistribZkTestBa
   public void setUp() throws Exception {
     super.setUp();
     collectionUlogDirMap.clear();
+    if (random().nextBoolean()) {
+      CollectionAdminRequest.setClusterProperty("legacyCloud", "false").process(cloudClient);
+    } else {
+      CollectionAdminRequest.setClusterProperty("legacyCloud", "true").process(cloudClient);
+    }
   }
   
   @Override
@@ -313,6 +319,29 @@ public class SharedFSAutoReplicaFailoverTest extends AbstractFullDistribZkTestBa
     assertSliceAndReplicaCount(collection1);
 
     assertUlogDir(collections);
+    
+    // restart all to test core saved state
+    
+    ChaosMonkey.stop(jettys);
+    ChaosMonkey.stop(controlJetty);
+
+    assertTrue("Timeout waiting for all not live", ClusterStateUtil.waitForAllReplicasNotLive(cloudClient.getZkStateReader(), 45000));
+
+    ChaosMonkey.start(jettys);
+    ChaosMonkey.start(controlJetty);
+
+    assertTrue("Timeout waiting for all live and active", ClusterStateUtil.waitForAllActiveAndLiveReplicas(cloudClient.getZkStateReader(), collection1, 120000));
+    
+    assertSliceAndReplicaCount(collection1);
+
+    assertUlogDir(collections);
+    
+    assertSliceAndReplicaCount(collection1);
+    assertSingleReplicationAndShardSize(collection3, 5);
+
+    // all docs should be queried
+    assertSingleReplicationAndShardSize(collection4, 5);
+    queryAndAssertResultSize(collection4, numDocs, 10000);
   }
 
   private void queryAndAssertResultSize(String collection, int expectedResultSize, int timeoutMS)