You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by no...@apache.org on 2014/04/02 18:34:35 UTC

svn commit: r1584085 - in /lucene/dev/branches/branch_4x: ./ solr/ solr/core/ solr/core/src/java/org/apache/solr/cloud/ solr/core/src/java/org/apache/solr/handler/admin/ solr/core/src/test/org/apache/solr/cloud/

Author: noble
Date: Wed Apr  2 16:34:34 2014
New Revision: 1584085

URL: http://svn.apache.org/r1584085
Log:
SOLR-5859 Harden Overseer restart

Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/solr/   (props changed)
    lucene/dev/branches/branch_4x/solr/core/   (props changed)
    lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
    lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java
    lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/Overseer.java
    lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java
    lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/ZkController.java
    lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
    lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyShardSplitTest.java
    lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/OverseerRolesTest.java
    lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java

Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java?rev=1584085&r1=1584084&r2=1584085&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java Wed Apr  2 16:34:34 2014
@@ -71,7 +71,7 @@ public abstract class ElectionContext {
       zkClient.delete(leaderSeqPath, -1, true);
     } catch (NoNodeException e) {
       // fine
-      log.warn("cancelElection did not find election node to remove");
+      log.warn("cancelElection did not find election node to remove",e);
     }
   }
 

Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java?rev=1584085&r1=1584084&r2=1584085&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java Wed Apr  2 16:34:34 2014
@@ -67,6 +67,8 @@ public  class LeaderElector {
 
   private volatile ElectionContext context;
 
+  private ElectionWatcher watcher;
+
   public LeaderElector(SolrZkClient zkClient) {
     this.zkClient = zkClient;
     zkCmdExecutor = new ZkCmdExecutor(zkClient.getZkClientTimeout());
@@ -90,7 +92,7 @@ public  class LeaderElector {
     // get all other numbers...
     final String holdElectionPath = context.electionPath + ELECTION_NODE;
     List<String> seqs = zkClient.getChildren(holdElectionPath, null, true);
-    
+
     sortSeqs(seqs);
     List<Integer> intSeqs = getSeqs(seqs);
     if (intSeqs.size() == 0) {
@@ -122,31 +124,7 @@ public  class LeaderElector {
         return;
       }
       try {
-        zkClient.getData(holdElectionPath + "/" + seqs.get(index),
-            new Watcher() {
-              
-              @Override
-              public void process(WatchedEvent event) {
-                // session events are not change events,
-                // and do not remove the watcher
-                if (EventType.None.equals(event.getType())) {
-                  return;
-                }
-                // am I the next leader?
-                try {
-                  checkIfIamLeader(seq, context, true);
-                } catch (InterruptedException e) {
-                  // Restore the interrupted status
-                  Thread.currentThread().interrupt();
-                  log.warn("", e);
-                } catch (IOException e) {
-                  log.warn("", e);
-                } catch (Exception e) {
-                  log.warn("", e);
-                }
-              }
-              
-            }, null, true);
+        zkClient.getData(holdElectionPath + "/" + seqs.get(index), watcher = new ElectionWatcher(context.leaderSeqPath , seq, context) , null, true);
       } catch (KeeperException.SessionExpiredException e) {
         throw e;
       } catch (KeeperException e) {
@@ -290,6 +268,50 @@ public  class LeaderElector {
     
     return seq;
   }
+
+  private class ElectionWatcher implements Watcher {
+    final String leaderSeqPath;
+    final int seq;
+    final ElectionContext context;
+
+    private boolean canceled = false;
+
+    private ElectionWatcher(String leaderSeqPath, int seq, ElectionContext context) {
+      this.leaderSeqPath = leaderSeqPath;
+      this.seq = seq;
+      this.context = context;
+    }
+
+    void cancel(String leaderSeqPath){
+      canceled = true;
+
+    }
+
+    @Override
+    public void process(WatchedEvent event) {
+      // session events are not change events,
+      // and do not remove the watcher
+      if (EventType.None.equals(event.getType())) {
+        return;
+      }
+      if(canceled) {
+        log.info("This watcher is not active anymore {}", leaderSeqPath);
+        return;
+      }
+      try {
+        // am I the next leader?
+        checkIfIamLeader(seq, context, true);
+      } catch (InterruptedException e) {
+        // Restore the interrupted status
+        Thread.currentThread().interrupt();
+        log.warn("", e);
+      } catch (IOException e) {
+        log.warn("", e);
+      } catch (Exception e) {
+        log.warn("", e);
+      }
+    }
+  }
   
   /**
    * Set up any ZooKeeper nodes needed for leader election.
@@ -317,6 +339,8 @@ public  class LeaderElector {
   }
   void retryElection() throws KeeperException, InterruptedException, IOException {
     context.cancelElection();
+    ElectionWatcher watcher = this.watcher;
+    if(watcher!= null) watcher.cancel(context.leaderSeqPath);
     joinElection(context, true);
   }
 }

Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/Overseer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/Overseer.java?rev=1584085&r1=1584084&r2=1584085&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/Overseer.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/Overseer.java Wed Apr  2 16:34:34 2014
@@ -70,6 +70,7 @@ public class Overseer {
   public static final String ADD_ROUTING_RULE = "addroutingrule";
   public static final String REMOVE_ROUTING_RULE = "removeroutingrule";
   public static final String STATE = "state";
+  public static final String QUIT = "quit";
 
   public static final int STATE_UPDATE_DELAY = 1500;  // delay between cloud state updates
   public static final String CREATESHARD = "createshard";
@@ -200,85 +201,131 @@ public class Overseer {
       }
       
       log.info("Starting to work on the main queue");
-      while (!this.isClosed) {
-        isLeader = amILeader();
-        if (LeaderStatus.NO == isLeader) {
-          break;
-        }
-        else if (LeaderStatus.YES != isLeader) {
-          log.debug("am_i_leader unclear {}", isLeader);
-          continue; // not a no, not a yes, try ask again
-        }
-        DistributedQueue.QueueEvent head = null;
-        try {
-          head = stateUpdateQueue.peek(true);
-        } catch (KeeperException e) {
-          if (e.code() == KeeperException.Code.SESSIONEXPIRED) {
-            log.warn(
-                "Solr cannot talk to ZK, exiting Overseer main queue loop", e);
-            return;
+      try {
+        while (!this.isClosed) {
+          isLeader = amILeader();
+          if (LeaderStatus.NO == isLeader) {
+            break;
+          }
+          else if (LeaderStatus.YES != isLeader) {
+            log.debug("am_i_leader unclear {}", isLeader);
+            continue; // not a no, not a yes, try ask again
           }
-          log.error("Exception in Overseer main queue loop", e);
-        } catch (InterruptedException e) {
-          Thread.currentThread().interrupt();
-          return;
-          
-        } catch (Exception e) {
-          log.error("Exception in Overseer main queue loop", e);
-        }
-        synchronized (reader.getUpdateLock()) {
+          DistributedQueue.QueueEvent head = null;
           try {
-            reader.updateClusterState(true);
-            ClusterState clusterState = reader.getClusterState();
-
-            while (head != null) {
-              final ZkNodeProps message = ZkNodeProps.load(head.getBytes());
-              final String operation = message.getStr(QUEUE_OPERATION);
-              final TimerContext timerContext = stats.time(operation);
-              try {
-                clusterState = processMessage(clusterState, message, operation);
-                stats.success(operation);
-              } catch (Exception e) {
-                // generally there is nothing we can do - in most cases, we have
-                // an issue that will fail again on retry or we cannot communicate with
-                // ZooKeeper in which case another Overseer should take over
-                // TODO: if ordering for the message is not important, we could
-                // track retries and put it back on the end of the queue
-                log.error("Overseer could not process the current clusterstate state update message, skipping the message.", e);
-                stats.error(operation);
-              } finally {
-                timerContext.stop();
-              }
-              workQueue.offer(head.getBytes());
-
-              stateUpdateQueue.poll();
-
-              if (System.nanoTime() - lastUpdatedTime > TimeUnit.NANOSECONDS.convert(STATE_UPDATE_DELAY, TimeUnit.MILLISECONDS)) break;
-              
-              // if an event comes in the next 100ms batch it together
-              head = stateUpdateQueue.peek(100); 
-            }
-            lastUpdatedTime = System.nanoTime();
-            zkClient.setData(ZkStateReader.CLUSTER_STATE,
-                ZkStateReader.toJSON(clusterState), true);
-            // clean work queue
-            while (workQueue.poll() != null) ;
-
+            head = stateUpdateQueue.peek(true);
           } catch (KeeperException e) {
             if (e.code() == KeeperException.Code.SESSIONEXPIRED) {
-              log.warn("Solr cannot talk to ZK, exiting Overseer main queue loop", e);
+              log.warn(
+                  "Solr cannot talk to ZK, exiting Overseer main queue loop", e);
               return;
             }
             log.error("Exception in Overseer main queue loop", e);
           } catch (InterruptedException e) {
             Thread.currentThread().interrupt();
             return;
-            
+
           } catch (Exception e) {
             log.error("Exception in Overseer main queue loop", e);
           }
+          synchronized (reader.getUpdateLock()) {
+            try {
+              reader.updateClusterState(true);
+              ClusterState clusterState = reader.getClusterState();
+
+              while (head != null) {
+                final ZkNodeProps message = ZkNodeProps.load(head.getBytes());
+                final String operation = message.getStr(QUEUE_OPERATION);
+                final TimerContext timerContext = stats.time(operation);
+                try {
+                  clusterState = processMessage(clusterState, message, operation);
+                  stats.success(operation);
+                } catch (Exception e) {
+                  // generally there is nothing we can do - in most cases, we have
+                  // an issue that will fail again on retry or we cannot communicate with
+                  // ZooKeeper in which case another Overseer should take over
+                  // TODO: if ordering for the message is not important, we could
+                  // track retries and put it back on the end of the queue
+                  log.error("Overseer could not process the current clusterstate state update message, skipping the message.", e);
+                  stats.error(operation);
+                } finally {
+                  timerContext.stop();
+                }
+                workQueue.offer(head.getBytes());
+
+                stateUpdateQueue.poll();
+
+                if (isClosed || System.nanoTime() - lastUpdatedTime > TimeUnit.NANOSECONDS.convert(STATE_UPDATE_DELAY, TimeUnit.MILLISECONDS)) break;
+
+                // if an event comes in the next 100ms batch it together
+                head = stateUpdateQueue.peek(100);
+              }
+              lastUpdatedTime = System.nanoTime();
+              zkClient.setData(ZkStateReader.CLUSTER_STATE,
+                  ZkStateReader.toJSON(clusterState), true);
+              // clean work queue
+              while (workQueue.poll() != null) ;
+
+            } catch (KeeperException e) {
+              if (e.code() == KeeperException.Code.SESSIONEXPIRED) {
+                log.warn("Solr cannot talk to ZK, exiting Overseer main queue loop", e);
+                return;
+              }
+              log.error("Exception in Overseer main queue loop", e);
+            } catch (InterruptedException e) {
+              Thread.currentThread().interrupt();
+              return;
+
+            } catch (Exception e) {
+              log.error("Exception in Overseer main queue loop", e);
+            }
+          }
+
         }
-        
+      } finally {
+        new Thread("OverseerExitThread"){
+          //do this in a separate thread because any wait is interrupted in this main thread
+          @Override
+          public void run() {
+            checkIfIamStillLeader();
+          }
+        }.start();
+      }
+    }
+
+    private void checkIfIamStillLeader() {
+      org.apache.zookeeper.data.Stat stat = new org.apache.zookeeper.data.Stat();
+      String path = "/overseer_elect/leader";
+      byte[] data = null;
+      try {
+        data = zkClient.getData(path, null, stat, true);
+      } catch (Exception e) {
+        log.error("could not read the data" ,e);
+        return;
+      }
+      Map m = (Map) ZkStateReader.fromJSON(data);
+      String id = (String) m.get("id");
+      if(overseerCollectionProcessor.getId().equals(id)){
+        try {
+          log.info("I'm exiting , but I'm still the leader");
+          zkClient.delete(path,stat.getVersion(),true);
+        } catch (KeeperException.BadVersionException e) {
+          //no problem ignore it some other Overseer has already taken over
+        } catch (Exception e) {
+          log.error("Could not delete my leader node ", e);
+        } finally {
+          try {
+            if(zkController !=null && !zkController.getCoreContainer().isShutDown()){
+              zkController.rejoinOverseerElection();
+            }
+
+          } catch (Exception e) {
+            log.error("error canceling overseer election election  ",e);
+          }
+        }
+
+      } else{
+        log.info("somebody else has already taken up the overseer position");
       }
     }
 
@@ -324,7 +371,11 @@ public class Overseer {
         clusterState = removeRoutingRule(clusterState, message);
       } else if(CLUSTERPROP.isEqual(operation)){
            handleProp(message);
-      } else {
+      } else if( QUIT.equals(operation)){
+        log.info("################Quit command receive");
+        overseerCollectionProcessor.close();
+        close();
+      } else{
         throw new RuntimeException("unknown operation:" + operation
             + " contents:" + message.getProperties());
       }
@@ -1107,15 +1158,18 @@ public class Overseer {
 
   private String adminPath;
 
-  private OverseerCollectionProcessor ocp;
+  private OverseerCollectionProcessor overseerCollectionProcessor;
+
+  private ZkController zkController;
 
   private Stats stats;
 
   // overseer not responsible for closing reader
-  public Overseer(ShardHandler shardHandler, String adminPath, final ZkStateReader reader) throws KeeperException, InterruptedException {
+  public Overseer(ShardHandler shardHandler, String adminPath, final ZkStateReader reader, ZkController zkController) throws KeeperException, InterruptedException {
     this.reader = reader;
     this.shardHandler = shardHandler;
     this.adminPath = adminPath;
+    this.zkController = zkController;
     this.stats = new Stats();
   }
   
@@ -1130,8 +1184,8 @@ public class Overseer {
 
     ThreadGroup ccTg = new ThreadGroup("Overseer collection creation process.");
 
-    ocp = new OverseerCollectionProcessor(reader, id, shardHandler, adminPath, stats);
-    ccThread = new OverseerThread(ccTg, ocp, "Overseer-" + id);
+    overseerCollectionProcessor = new OverseerCollectionProcessor(reader, id, shardHandler, adminPath, stats);
+    ccThread = new OverseerThread(ccTg, overseerCollectionProcessor, "Overseer-" + id);
     ccThread.setDaemon(true);
     
     updaterThread.start();

Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java?rev=1584085&r1=1584084&r2=1584085&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java Wed Apr  2 16:34:34 2014
@@ -226,6 +226,7 @@ public class OverseerCollectionProcessor
            }
            
            QueueEvent head = workQueue.peek(true);
+           if(isClosed) break;
            final ZkNodeProps message = ZkNodeProps.load(head.getBytes());
 
            final String asyncId = (message.containsKey(ASYNC) && message.get(ASYNC) != null) ? (String) message.get(ASYNC) : null;
@@ -312,15 +313,16 @@ public class OverseerCollectionProcessor
     List<String> availableDesignates = new ArrayList<>();
 
     log.info("sorted nodes {}", nodeNames);//TODO to be removed
-    for (int i = 0; i < nodeNames.size(); i++) {
+    for (int i = 1; i < nodeNames.size(); i++) {
       String s = nodeNames.get(i);
 
       if (overseerDesignates.contains(s)) {
         availableDesignates.add(s);
 
-        for(int j=0;j<i;j++){
-          if(!overseerDesignates.contains(nodeNames.get(j))) {
-            if(!nodesTobePushedBack.contains(nodeNames.get(j))) nodesTobePushedBack.add(nodeNames.get(j));
+        for(int j=1;j<i;j++){
+          String n = nodeNames.get(j);
+          if(!overseerDesignates.contains(n)) {
+            if(!nodesTobePushedBack.contains(n)) nodesTobePushedBack.add(n);
           }
         }
 
@@ -329,8 +331,7 @@ public class OverseerCollectionProcessor
     }
 
     if(!availableDesignates.isEmpty()){
-      for (int i = nodesTobePushedBack.size() - 1; i >= 0; i--) {
-         String s = nodesTobePushedBack.get(i);
+      for (String s : nodesTobePushedBack) {
         log.info("pushing back {} ", s);
         invokeOverseerOp(s, "rejoin");
       }
@@ -373,9 +374,8 @@ public class OverseerCollectionProcessor
     if(leaderNode ==null) return;
     if(!overseerDesignates.contains(leaderNode) && !availableDesignates.isEmpty()){
       //this means there are designated Overseer nodes and I am not one of them , kill myself
-      String newLeader = availableDesignates.get(0);
-      log.info("I am not an overseerdesignate , forcing a new leader {} ", newLeader);
-      invokeOverseerOp(newLeader, "leader");
+      log.info("I am not an overseer designate , forcing myself out {} ", leaderNode);
+      Overseer.getInQueue(zkStateReader.getZkClient()).offer(ZkStateReader.toJSON(new ZkNodeProps( Overseer.QUEUE_OPERATION, Overseer.QUIT)));
     }
 
   }
@@ -476,6 +476,8 @@ public class OverseerCollectionProcessor
         processRoleCommand(message, operation);
       } else if (ADDREPLICA.isEqual(operation))  {
         addReplica(zkStateReader.getClusterState(), message, results);
+      } else if (REQUESTSTATUS.equals(operation)) {
+        requestStatus(message, results);
       } else if (OVERSEERSTATUS.isEqual(operation)) {
         getOverseerStatus(message, results);
       } else if(LIST.isEqual(operation)) {
@@ -660,6 +662,12 @@ public class OverseerCollectionProcessor
   /**
    * Get collection status from cluster state.
    * Can return collection status by given shard name.
+   *
+   *
+   * @param clusterState
+   * @param name  collection name
+   * @param shardStr comma separated shard names
+   * @return map of collection properties
    */
   private Map<String, Object> getCollectionStatus(Map<String, Object> clusterState, String name, String shardStr) {
     Map<String, Object> docCollection = (Map<String, Object>) clusterState.get(name);
@@ -1497,6 +1505,40 @@ public class OverseerCollectionProcessor
     } while (srsp != null);
   }
 
+  private void requestStatus(ZkNodeProps message, NamedList results) throws KeeperException, InterruptedException {
+    log.info("Request status invoked");
+    String requestId = message.getStr(REQUESTID);
+
+    // Special taskId (-1), clears up the request state maps.
+    if(requestId.equals("-1")) {
+      completedMap.clear();
+      failureMap.clear();
+      return;
+    }
+
+    if(completedMap.contains(requestId)) {
+      SimpleOrderedMap success = new SimpleOrderedMap();
+      success.add("state", "completed");
+      success.add("msg", "found " + requestId + " in completed tasks");
+      results.add("status", success);
+    } else if (runningMap.contains(requestId)) {
+      SimpleOrderedMap success = new SimpleOrderedMap();
+      success.add("state", "running");
+      success.add("msg", "found " + requestId + " in submitted tasks");
+      results.add("status", success);
+    } else if (failureMap.contains(requestId)) {
+      SimpleOrderedMap success = new SimpleOrderedMap();
+      success.add("state", "failed");
+      success.add("msg", "found " + requestId + " in failed tasks");
+      results.add("status", success);
+    } else {
+      SimpleOrderedMap failure = new SimpleOrderedMap();
+      failure.add("state", "notfound");
+      failure.add("msg", "Did not find taskid [" + requestId + "] in any tasks queue");
+      results.add("status", failure);
+    }
+  }
+
   private void deleteShard(ClusterState clusterState, ZkNodeProps message, NamedList results) {
     log.info("Delete shard invoked");
     String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
@@ -2368,5 +2410,9 @@ public class OverseerCollectionProcessor
       } while (srsp != null);
     } while(true);
   }
+  String getId(){
+    return myId;
+  }
+
 
 }

Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/ZkController.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/ZkController.java?rev=1584085&r1=1584084&r2=1584085&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/ZkController.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/ZkController.java Wed Apr  2 16:34:34 2014
@@ -304,18 +304,6 @@ public final class ZkController {
     return leaderConflictResolveWait;
   }
 
-  public void forceOverSeer(){
-    try {
-      zkClient.delete("/overseer_elect/leader",-1, true);
-      log.info("Forcing me to be leader  {} ", getBaseUrl());
-      overseerElector.getContext().runLeaderProcess(true, Overseer.STATE_UPDATE_DELAY + 100);
-    } catch (Exception e) {
-      throw new SolrException(ErrorCode.SERVER_ERROR, " Error becoming overseer ",e);
-
-    }
-
-  }
-
   private void registerAllCoresAsDown(
       final CurrentCoreDescriptorProvider registerOnReconnect, boolean updateLastPublished) {
     List<CoreDescriptor> descriptors = registerOnReconnect
@@ -558,7 +546,7 @@ public final class ZkController {
       adminPath = cc.getAdminPath();
       
       overseerElector = new LeaderElector(zkClient);
-      this.overseer = new Overseer(shardHandler, adminPath, zkStateReader);
+      this.overseer = new Overseer(shardHandler, adminPath, zkStateReader,this);
       ElectionContext context = new OverseerElectionContext(zkClient, overseer, getNodeName());
       overseerElector.setup(context);
       overseerElector.joinElection(context, false);
@@ -1679,4 +1667,8 @@ public final class ZkController {
     }
   }
 
+  CoreContainer getCoreContainer(){
+    return cc;
+  }
+
 }

Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java?rev=1584085&r1=1584084&r2=1584085&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java Wed Apr  2 16:34:34 2014
@@ -273,10 +273,8 @@ public class CoreAdminHandler extends Re
         case OVERSEEROP:{
           ZkController zkController = coreContainer.getZkController();
           if(zkController != null){
-            String op = req.getParams().get("op");
-            if("leader".equals(op)){
-              zkController.forceOverSeer();
-            } else if ("rejoin".equals(op)) zkController.rejoinOverseerElection();
+           String op = req.getParams().get("op");
+           if ("rejoin".equals(op)) zkController.rejoinOverseerElection();
           }
           break;
         }

Modified: lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyShardSplitTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyShardSplitTest.java?rev=1584085&r1=1584084&r2=1584085&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyShardSplitTest.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyShardSplitTest.java Wed Apr  2 16:34:34 2014
@@ -256,7 +256,7 @@ public class ChaosMonkeyShardSplitTest e
 
     // TODO: close Overseer
     Overseer overseer = new Overseer(
-        new HttpShardHandlerFactory().getShardHandler(), "/admin/cores", reader);
+        new HttpShardHandlerFactory().getShardHandler(), "/admin/cores", reader,null);
     overseer.close();
     ElectionContext ec = new OverseerElectionContext(zkClient, overseer,
         address.replaceAll("/", "_"));

Modified: lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/OverseerRolesTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/OverseerRolesTest.java?rev=1584085&r1=1584084&r2=1584085&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/OverseerRolesTest.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/OverseerRolesTest.java Wed Apr  2 16:34:34 2014
@@ -40,12 +40,17 @@ import org.apache.solr.client.solrj.Solr
 import org.apache.solr.client.solrj.embedded.JettySolrRunner;
 import org.apache.solr.client.solrj.impl.CloudSolrServer;
 import org.apache.solr.client.solrj.request.QueryRequest;
+import org.apache.solr.common.cloud.SolrZkClient;
+import org.apache.solr.common.cloud.ZkNodeProps;
+import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.solr.common.params.CollectionParams.CollectionAction;
 import org.apache.solr.common.params.MapSolrParams;
 import org.apache.solr.common.params.SolrParams;
+import org.apache.zookeeper.data.Stat;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.BeforeClass;
+
 @LuceneTestCase.Slow
 @SuppressSSL     // Currently unknown why SSL does not work
 public class OverseerRolesTest  extends AbstractFullDistribZkTestBase{
@@ -86,11 +91,43 @@ public class OverseerRolesTest  extends 
 
   @Override
   public void doTest() throws Exception {
-    addOverseerRole2ExistingNodes();
+    testOverseerRole();
+    testQuitCommand();
+
+  }
+
+  private void testQuitCommand() throws Exception{
+    String collectionName = "testOverseerQuit";
+
+    createCollection(collectionName, client);
+
+    waitForRecoveriesToFinish(collectionName, false);
+
+    SolrZkClient zk = client.getZkStateReader().getZkClient();
+    byte[] data = new byte[0];
+    data = zk.getData("/overseer_elect/leader", null, new Stat(), true);
+    Map m = (Map) ZkStateReader.fromJSON(data);
+    String s = (String) m.get("id");
+    String leader = LeaderElector.getNodeName(s);
+    Overseer.getInQueue(zk).offer(ZkStateReader.toJSON(new ZkNodeProps(Overseer.QUEUE_OPERATION, Overseer.QUIT)));
+    long timeout = System.currentTimeMillis()+5000;
+    String newLeader=null;
+    for(;System.currentTimeMillis() < timeout;){
+      newLeader = OverseerCollectionProcessor.getLeaderNode(zk);
+      if(!newLeader.equals(leader)) break;
+      Thread.sleep(100);
+    }
+    assertNotSame( "Leader not changed yet",newLeader,leader);
 
+
+
+    assertTrue("The old leader should have rejoined election ", OverseerCollectionProcessor.getSortedOverseerNodeNames(zk).contains(leader));
   }
 
-  private void addOverseerRole2ExistingNodes() throws Exception {
+
+
+
+  private void testOverseerRole() throws Exception {
     String collectionName = "testOverseerCol";
 
     createCollection(collectionName, client);
@@ -203,13 +240,6 @@ public class OverseerRolesTest  extends 
 
     assertTrue("New overseer not the frontrunner : "+ getSortedOverseerNodeNames(client.getZkStateReader().getZkClient()) + " expected : "+ killedOverseer, leaderchanged);
 
-
-
-
-
-    client.shutdown();
-
-
   }
 
   private void setOverseerRole(CollectionAction action, String overseerDesignate) throws Exception, IOException {

Modified: lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java?rev=1584085&r1=1584084&r2=1584085&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java Wed Apr  2 16:34:34 2014
@@ -992,7 +992,7 @@ public class OverseerTest extends SolrTe
       overseers.get(overseers.size() -1).getZkStateReader().getZkClient().close();
     }
     Overseer overseer = new Overseer(
-        new HttpShardHandlerFactory().getShardHandler(), "/admin/cores", reader);
+        new HttpShardHandlerFactory().getShardHandler(), "/admin/cores", reader,null);
     overseers.add(overseer);
     ElectionContext ec = new OverseerElectionContext(zkClient, overseer,
         address.replaceAll("/", "_"));