You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dr...@apache.org on 2016/04/20 02:57:47 UTC

lucene-solr:branch_6x: SOLR-8973: Zookeeper frenzy when a core is first created.

Repository: lucene-solr
Updated Branches:
  refs/heads/branch_6x d68b090dd -> 7c356bad0


SOLR-8973: Zookeeper frenzy when a core is first created.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/7c356bad
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/7c356bad
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/7c356bad

Branch: refs/heads/branch_6x
Commit: 7c356bad06418d95c5394a7d7d5bf5e54cbf39bb
Parents: d68b090
Author: Scott Blum <dr...@apache.org>
Authored: Tue Apr 19 19:39:55 2016 -0400
Committer: Scott Blum <dr...@apache.org>
Committed: Tue Apr 19 20:57:42 2016 -0400

----------------------------------------------------------------------
 solr/CHANGES.txt                                |  2 +
 .../org/apache/solr/cloud/ZkController.java     | 12 ++--
 .../solr/cloud/overseer/ZkStateReaderTest.java  | 60 ++++++++++++++++++++
 .../apache/solr/common/cloud/ZkStateReader.java | 36 ++++++------
 4 files changed, 89 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7c356bad/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 6e451c1..d94b463 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -152,6 +152,8 @@ Other Changes
 
 * SOLR-9015: Adds SelectStream as a default function in the StreamHandler (Dennis Gove)
 
+* SOLR-8973: Zookeeper frenzy when a core is first created. (Janmejay Singh, Scott Blum, shalin)
+
 ==================  6.0.0 ==================
 
 Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7c356bad/solr/core/src/java/org/apache/solr/cloud/ZkController.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
index ab9422d..bdd6a62 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
@@ -1478,11 +1478,13 @@ public final class ZkController {
       }
 
       publish(cd, Replica.State.DOWN, false, true);
-      DocCollection collection = zkStateReader.getClusterState().getCollectionOrNull(cd.getCloudDescriptor().getCollectionName());
-      if (collection != null) {
-        log.info("Registering watch for collection {}", cd.getCloudDescriptor().getCollectionName());
-        zkStateReader.addCollectionWatch(cd.getCloudDescriptor().getCollectionName());
-      }
+      String collectionName = cd.getCloudDescriptor().getCollectionName();
+      DocCollection collection = zkStateReader.getClusterState().getCollectionOrNull(collectionName);
+      log.info(collection == null ?
+              "Collection {} not visible yet, but flagging it so a watch is registered when it becomes visible" :
+              "Registering watch for collection {}",
+          collectionName);
+      zkStateReader.addCollectionWatch(collectionName);
     } catch (KeeperException e) {
       log.error("", e);
       throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7c356bad/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java
index 4b60fc6..47d094c 100644
--- a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java
@@ -26,6 +26,7 @@ import org.apache.solr.cloud.Overseer;
 import org.apache.solr.cloud.OverseerTest;
 import org.apache.solr.cloud.ZkController;
 import org.apache.solr.cloud.ZkTestServer;
+import org.apache.solr.common.cloud.ClusterState;
 import org.apache.solr.common.cloud.DocCollection;
 import org.apache.solr.common.cloud.DocRouter;
 import org.apache.solr.common.cloud.Slice;
@@ -180,4 +181,63 @@ public class ZkStateReaderTest extends SolrTestCaseJ4 {
       server.shutdown();
     }
   }
+
+  public void testWatchedCollectionCreation() throws Exception {
+    String zkDir = createTempDir("testWatchedCollectionCreation").toFile().getAbsolutePath();
+
+    ZkTestServer server = new ZkTestServer(zkDir);
+
+    SolrZkClient zkClient = null;
+
+    try {
+      server.run();
+      AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
+      AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
+
+      zkClient = new SolrZkClient(server.getZkAddress(), OverseerTest.DEFAULT_CONNECTION_TIMEOUT);
+      ZkController.createClusterZkNodes(zkClient);
+
+      ZkStateReader reader = new ZkStateReader(zkClient);
+      reader.createClusterStateWatchersAndUpdate();
+      reader.addCollectionWatch("c1");
+
+      // Initially there should be no c1 collection.
+      assertNull(reader.getClusterState().getCollectionRef("c1"));
+
+      zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true);
+      reader.forceUpdateCollection("c1");
+
+      // Still no c1 collection, despite a collection path.
+      assertNull(reader.getClusterState().getCollectionRef("c1"));
+
+      ZkStateWriter writer = new ZkStateWriter(reader, new Overseer.Stats());
+
+
+      // create new collection with stateFormat = 2
+      DocCollection state = new DocCollection("c1", new HashMap<String, Slice>(), new HashMap<String, Object>(), DocRouter.DEFAULT, 0, ZkStateReader.CLUSTER_STATE + "/c1/state.json");
+      ZkWriteCommand wc = new ZkWriteCommand("c1", state);
+      writer.enqueueUpdate(reader.getClusterState(), wc, null);
+      writer.writePendingUpdates();
+
+      assertTrue(zkClient.exists(ZkStateReader.COLLECTIONS_ZKNODE + "/c1/state.json", true));
+
+      //reader.forceUpdateCollection("c1");
+
+      for (int i = 0; i < 100; ++i) {
+        Thread.sleep(50);
+        ClusterState.CollectionRef ref = reader.getClusterState().getCollectionRef("c1");
+        if (ref != null) {
+          break;
+        }
+      }
+      ClusterState.CollectionRef ref = reader.getClusterState().getCollectionRef("c1");
+      assertNotNull(ref);
+      assertFalse(ref.isLazilyLoaded());
+      assertEquals(2, ref.get().getStateFormat());
+    } finally {
+      IOUtils.close(zkClient);
+      server.shutdown();
+
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7c356bad/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java
index e8d95c3..c6f88c0 100644
--- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java
+++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java
@@ -443,13 +443,6 @@ public class ZkStateReader implements Closeable {
     // To move a collection's state to format2, first create the new state2 format node, then remove legacy entry.
     Map<String, ClusterState.CollectionRef> result = new LinkedHashMap<>(legacyCollectionStates);
 
-    // Are there any interesting collections that disappeared from the legacy cluster state?
-    for (String coll : interestingCollections) {
-      if (!result.containsKey(coll) && !watchedCollectionStates.containsKey(coll)) {
-        new StateWatcher(coll).refreshAndWatch(true);
-      }
-    }
-  
     // Add state format2 collections, but don't override legacy collection states.
     for (Map.Entry<String, DocCollection> entry : watchedCollectionStates.entrySet()) {
       result.putIfAbsent(entry.getKey(), new ClusterState.CollectionRef(entry.getValue()));
@@ -1048,15 +1041,26 @@ public class ZkStateReader implements Closeable {
 
   private DocCollection fetchCollectionState(String coll, Watcher watcher) throws KeeperException, InterruptedException {
     String collectionPath = getCollectionPath(coll);
-    try {
-      Stat stat = new Stat();
-      byte[] data = zkClient.getData(collectionPath, watcher, stat, true);
-      ClusterState state = ClusterState.load(stat.getVersion(), data,
-              Collections.<String>emptySet(), collectionPath);
-      ClusterState.CollectionRef collectionRef = state.getCollectionStates().get(coll);
-      return collectionRef == null ? null : collectionRef.get();
-    } catch (KeeperException.NoNodeException e) {
-      return null;
+    while (true) {
+      try {
+        Stat stat = new Stat();
+        byte[] data = zkClient.getData(collectionPath, watcher, stat, true);
+        ClusterState state = ClusterState.load(stat.getVersion(), data,
+            Collections.<String>emptySet(), collectionPath);
+        ClusterState.CollectionRef collectionRef = state.getCollectionStates().get(coll);
+        return collectionRef == null ? null : collectionRef.get();
+      } catch (KeeperException.NoNodeException e) {
+        if (watcher != null) {
+          // Leave an exists watch in place in case a state.json is created later.
+          Stat exists = zkClient.exists(collectionPath, watcher, true);
+          if (exists != null) {
+            // Rare race condition, we tried to fetch the data and couldn't find it, then we found it exists.
+            // Loop and try again.
+            continue;
+          }
+        }
+        return null;
+      }
     }
   }