You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2013/09/12 09:14:11 UTC

svn commit: r1522463 - in /lucene/dev/trunk/solr: ./ core/src/java/org/apache/solr/cloud/ core/src/java/org/apache/solr/handler/admin/ core/src/test/org/apache/solr/cloud/

Author: shalin
Date: Thu Sep 12 07:14:10 2013
New Revision: 1522463

URL: http://svn.apache.org/r1522463
Log:
SOLR-5233: The deleteshard collections API doesn't wait for cluster state to update, can fail if some nodes of the deleted shard were down and had incorrect logging.

Modified:
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/Overseer.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/DeleteShardTest.java

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1522463&r1=1522462&r2=1522463&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Thu Sep 12 07:14:10 2013
@@ -233,6 +233,10 @@ Bug Fixes
   documents in the same index segment had a value of true.
   (Robert Muir, hossman, yonik)
 
+* SOLR-5233: The "deleteshard" collections API doesn't wait for cluster state to update,
+  can fail if some nodes of the deleted shard were down and had incorrect logging.
+  (Christine Poerschke, shalin)
+
 Optimizations
 ----------------------
 

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/Overseer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/Overseer.java?rev=1522463&r1=1522462&r2=1522463&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/Overseer.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/Overseer.java Thu Sep 12 07:14:10 2013
@@ -598,10 +598,11 @@ public class Overseer {
      * Remove collection slice from cloudstate
      */
     private ClusterState removeShard(final ClusterState clusterState, ZkNodeProps message) {
-
       final String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
       final String sliceId = message.getStr(ZkStateReader.SHARD_ID_PROP);
 
+      log.info("Removing collection: " + collection + " shard: " + sliceId + " from clusterstate");
+
       final Map<String, DocCollection> newCollections = new LinkedHashMap<String,DocCollection>(clusterState.getCollectionStates()); // shallow copy
       DocCollection coll = newCollections.get(collection);
 

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java?rev=1522463&r1=1522462&r2=1522463&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java Thu Sep 12 07:14:10 2013
@@ -820,7 +820,8 @@ public class OverseerCollectionProcessor
       } while (srsp != null);
 
       ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION,
-          Overseer.REMOVESHARD, ZkStateReader.COLLECTION_PROP, collection);
+          Overseer.REMOVESHARD, ZkStateReader.COLLECTION_PROP, collection,
+          ZkStateReader.SHARD_ID_PROP, sliceId);
       Overseer.getInQueue(zkStateReader.getZkClient()).offer(ZkStateReader.toJSON(m));
 
       // wait for a while until we don't see the shard
@@ -829,7 +830,7 @@ public class OverseerCollectionProcessor
       boolean removed = false;
       while (System.currentTimeMillis() < timeout) {
         Thread.sleep(100);
-        removed = zkStateReader.getClusterState().getSlice(collection, message.getStr("name")) == null;
+        removed = zkStateReader.getClusterState().getSlice(collection, sliceId) == null;
         if (removed) {
           Thread.sleep(100); // just a bit of time so it's more likely other readers see on return
           break;
@@ -837,15 +838,15 @@ public class OverseerCollectionProcessor
       }
       if (!removed) {
         throw new SolrException(ErrorCode.SERVER_ERROR,
-            "Could not fully remove collection: " + collection + " shard: " + message.getStr("name"));
+            "Could not fully remove collection: " + collection + " shard: " + sliceId);
       }
 
-      log.info("Successfully deleted collection " + collection + ", shard: " + message.getStr("name"));
+      log.info("Successfully deleted collection: " + collection + ", shard: " + sliceId);
 
     } catch (SolrException e) {
       throw e;
     } catch (Exception e) {
-      throw new SolrException(ErrorCode.SERVER_ERROR, "Error executing delete operation for collection: " + collection + " shard: " + message.getStr("name"), e);
+      throw new SolrException(ErrorCode.SERVER_ERROR, "Error executing delete operation for collection: " + collection + " shard: " + sliceId, e);
     }
   }
 

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java?rev=1522463&r1=1522462&r2=1522463&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java Thu Sep 12 07:14:10 2013
@@ -329,11 +329,11 @@ public class CollectionsHandler extends 
   private void handleDeleteShardAction(SolrQueryRequest req,
       SolrQueryResponse rsp) throws InterruptedException, KeeperException {
     log.info("Deleting Shard : " + req.getParamString());
-    String name = req.getParams().required().get("collection");
-    String shard = req.getParams().required().get("shard");
+    String name = req.getParams().required().get(ZkStateReader.COLLECTION_PROP);
+    String shard = req.getParams().required().get(ZkStateReader.SHARD_ID_PROP);
     
     Map<String,Object> props = new HashMap<String,Object>();
-    props.put("collection", name);
+    props.put(ZkStateReader.COLLECTION_PROP, name);
     props.put(Overseer.QUEUE_OPERATION, OverseerCollectionProcessor.DELETESHARD);
     props.put(ZkStateReader.SHARD_ID_PROP, shard);
 

Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/DeleteShardTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/DeleteShardTest.java?rev=1522463&r1=1522462&r2=1522463&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/DeleteShardTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/DeleteShardTest.java Thu Sep 12 07:14:10 2013
@@ -101,18 +101,18 @@ public class DeleteShardTest extends Abs
 
     deleteShard(SHARD1);
 
-    confirmShardDeletion();
+    confirmShardDeletion(SHARD1);
   }
 
-  protected void confirmShardDeletion() throws SolrServerException, KeeperException,
+  protected void confirmShardDeletion(String shard) throws SolrServerException, KeeperException,
       InterruptedException {
     ZkStateReader zkStateReader = cloudClient.getZkStateReader();
-    ClusterState clusterState = null;
+    ClusterState clusterState = zkStateReader.getClusterState();
     int counter = 10;
     while (counter-- > 0) {
       zkStateReader.updateClusterState(true);
       clusterState = zkStateReader.getClusterState();
-      if (clusterState.getSlice("collection1", SHARD1) == null) {
+      if (clusterState.getSlice("collection1", shard) == null) {
         break;
       }
       Thread.sleep(1000);