You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2011/11/22 22:25:46 UTC

svn commit: r1205182 - in /lucene/dev/branches/solrcloud/solr: core/src/java/org/apache/solr/cloud/ core/src/java/org/apache/solr/core/ core/src/java/org/apache/solr/handler/ core/src/test/org/apache/solr/cloud/ solrj/src/java/org/apache/solr/common/cl...

Author: markrmiller
Date: Tue Nov 22 21:25:44 2011
New Revision: 1205182

URL: http://svn.apache.org/viewvc?rev=1205182&view=rev
Log:
dont do recovery on core reload

Modified:
    lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/ZkController.java
    lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/core/CoreContainer.java
    lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/core/SolrCore.java
    lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/handler/SnapPuller.java
    lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullDistributedZkTest.java
    lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java
    lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/CloudState.java
    lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java

Modified: lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/ZkController.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/ZkController.java?rev=1205182&r1=1205181&r2=1205182&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/ZkController.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/ZkController.java Tue Nov 22 21:25:44 2011
@@ -40,6 +40,7 @@ import org.apache.solr.common.cloud.ZkSt
 import org.apache.solr.common.cloud.ZooKeeperException;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.core.CoreContainer;
 import org.apache.solr.core.CoreDescriptor;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.handler.ReplicationHandler;
@@ -156,8 +157,7 @@ public final class ZkController {
                   .getCurrentDescriptors();
               if (descriptors != null) {
                 for (CoreDescriptor descriptor : descriptors) {
-                  register(descriptor.getName(), descriptor,
-                      descriptor.getCloudDescriptor());
+                  register(descriptor.getName(), descriptor);
                 }
               }
 
@@ -409,12 +409,13 @@ public final class ZkController {
    * @return
    * @throws Exception 
    */
-  public String register(String coreName, final CoreDescriptor desc, final CloudDescriptor cloudDesc) throws Exception {
+  public String register(String coreName, final CoreDescriptor desc) throws Exception {
     // nocommit: TODO: on core reload we don't want to do recovery or anything...
     
     String shardUrl = localHostName + ":" + localHostPort + "/" + localHostContext
         + "/" + coreName;
     
+    CloudDescriptor cloudDesc = desc.getCloudDescriptor();
     final String collection = cloudDesc.getCollectionName();
     
     byte[] data = zkClient.getData(ZkStateReader.CLUSTER_STATE,
@@ -424,17 +425,17 @@ public final class ZkController {
     CloudState state = CloudState.load(data);
     String shardZkNodeName = getNodeName() + "_" + coreName;
     
-    boolean doRecovery = checkRecovery(cloudDesc, state, shardZkNodeName);
-    
+    // checkRecovery will have updated the shardId if it already exists...
     String shardId = cloudDesc.getShardId();
-    if (shardId == null && !doRecovery) {
+
+    if (shardId == null && getShardId(desc, state, shardZkNodeName)) {
       shardId = assignShard.assignShard(collection, numShards);
       cloudDesc.setShardId(shardId);
     }
     
     if (log.isInfoEnabled()) {
         log.info("Register shard - core:" + coreName + " address:"
-            + shardUrl);
+            + shardUrl + "shardId:" + shardId);
       }
     
     leaderElector.setupForSlice(shardId, collection);
@@ -449,31 +450,46 @@ public final class ZkController {
     System.out.println("leader url: "+ leaderUrl);
     System.out.println("shard url: "+ shardUrl);
     boolean iamleader = false;
+    boolean doRecovery = true;
     if (leaderUrl.equals(shardUrl)) {
       iamleader = true;
+      // TODO: this should really be figured in checkRecovery
+      doRecovery = false;
     } else {
-      // we are not the leader, so catch up with recovery
-      doRecovery = true;
+      CoreContainer cc = desc.getCoreContainer();
+      if (cc != null) {
+        SolrCore core = cc.getCore(desc.getName());
+        try {
+          if (core.isReloaded()) {
+            doRecovery = false;
+          }
+        } finally {
+          core.close();
+        }
+      } else {
+        log.warn("Cannot recover without access to CoreConatiner");
+        return shardId;
+      }
+
     }
     
     if (doRecovery) {
-      if (desc.getCoreContainer() != null) {
-        doRecovery(collection, desc, cloudDesc, iamleader);
-      } else {
-        log.warn("For some odd reason a SolrCore is trying to recover but does not have access to a CoreContainer - skipping recovery.");
-      }
+      doRecovery(collection, desc, cloudDesc, iamleader);
+    } else {
+      System.out.println("dont do recovery");
     }
-    addToZk(collection, desc, cloudDesc, shardUrl, shardZkNodeName, "active");
+    addToZk(collection, desc, cloudDesc, shardUrl, shardZkNodeName, ZkStateReader.ACTIVE);
 
     return shardId;
   }
 
 
-  private boolean checkRecovery(final CloudDescriptor cloudDesc,
+  private boolean getShardId(final CoreDescriptor desc,
       CloudState state, String shardZkNodeName) {
-    boolean recover = false;
-    Map<String,Slice> slices = state.getSlices(cloudDesc.getCollectionName());
 
+    CloudDescriptor cloudDesc = desc.getCloudDescriptor();
+    
+    Map<String,Slice> slices = state.getSlices(cloudDesc.getCollectionName());
     if (slices != null) {
       Map<String,String> nodes = new HashMap<String,String>();
 
@@ -485,10 +501,10 @@ public final class ZkController {
       if (nodes.containsKey(shardZkNodeName)) {
         // TODO: we where already registered - go into recovery mode
         cloudDesc.setShardId(nodes.get(shardZkNodeName));
-        recover = true;
+        return false;
       }
     }
-    return recover;
+    return true;
   }
 
 
@@ -568,15 +584,16 @@ public final class ZkController {
   private void doRecovery(String collection, final CoreDescriptor desc,
       final CloudDescriptor cloudDesc, boolean iamleader) throws Exception,
       SolrServerException, IOException {
-    // nocommit: joke code
     System.out.println("do recovery");
+    
     // start buffer updates to tran log
     // and do recovery - either replay via realtime get 
     // or full index replication
 
     // seems perhaps we cannot do this here since we are not fully running - 
-    // we need to trigger a recovery that happens later
+    // we may need to trigger a recovery that happens later
     System.out.println("shard is:" + cloudDesc.getShardId());
+    System.out.println("leader:" + iamleader);
     
     String leaderUrl = zkStateReader.getLeader(collection, cloudDesc.getShardId());
     
@@ -588,9 +605,9 @@ public final class ZkController {
       
       
       // if we want to buffer updates while recovering, this
-      // will have to trigger later - http is not yet up
+      // will have to trigger later - http is not yet up ???
       
-      // use rep handler and SnapPuller directly, so we can do this sync rather than async
+      // use rep handler directly, so we can do this sync rather than async
       SolrCore core = desc.getCoreContainer().getCore(desc.getName());
       try {
         ReplicationHandler replicationHandler = (ReplicationHandler) core

Modified: lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/core/CoreContainer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/core/CoreContainer.java?rev=1205182&r1=1205181&r2=1205182&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/core/CoreContainer.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/core/CoreContainer.java Tue Nov 22 21:25:44 2011
@@ -529,7 +529,7 @@ public class CoreContainer 
   private void registerInZk(SolrCore core) {
     if (zkController != null) {
       try {
-        zkController.register(core.getName(), core.getCoreDescriptor(), core.getCoreDescriptor().getCloudDescriptor());
+        zkController.register(core.getName(), core.getCoreDescriptor());
       } catch (InterruptedException e) {
         // Restore the interrupted status
         Thread.currentThread().interrupt();

Modified: lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/core/SolrCore.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/core/SolrCore.java?rev=1205182&r1=1205181&r2=1205182&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/core/SolrCore.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/core/SolrCore.java Tue Nov 22 21:25:44 2011
@@ -48,7 +48,6 @@ import org.apache.solr.util.plugin.Named
 import org.apache.solr.util.plugin.SolrCoreAware;
 import org.apache.solr.util.plugin.PluginInfoInitialized;
 import org.apache.commons.io.IOUtils;
-import org.eclipse.jdt.core.dom.ThisExpression;
 import org.xml.sax.SAXException;
 
 import javax.xml.parsers.ParserConfigurationException;
@@ -75,6 +74,8 @@ public final class SolrCore implements S
   private String logid; // used to show what name is set
   private final CoreDescriptor coreDescriptor;
 
+  private boolean isReloaded = false;
+
   private final SolrConfig solrConfig;
   private final SolrResourceLoader resourceLoader;
   private final IndexSchema schema;
@@ -562,6 +563,7 @@ public final class SolrCore implements S
       initDirectoryFactory();
     } else {
       directoryFactory = updateHandler.getSolrCoreState().getDirectoryFactory();
+      this.isReloaded = true;
     }
     
     initIndex();
@@ -1390,6 +1392,9 @@ public final class SolrCore implements S
     return holder;
   }
 
+  public boolean isReloaded() {
+    return isReloaded;
+  }
 
   // Take control of newSearcherHolder (which should have a reference count of at
   // least 1 already.  If the caller wishes to use the newSearcherHolder directly

Modified: lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/handler/SnapPuller.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/handler/SnapPuller.java?rev=1205182&r1=1205181&r2=1205182&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/handler/SnapPuller.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/handler/SnapPuller.java Tue Nov 22 21:25:44 2011
@@ -301,7 +301,7 @@ public class SnapPuller {
       boolean deleteTmpIdxDir = true;
       File indexDir = null ;
       try {
-        indexDir = new File(core.getIndexDir());
+        indexDir = new File(core.getNewIndexDir());
         downloadIndexFiles(isFullCopyNeeded, tmpIndexDir, latestVersion);
         LOG.info("Total time taken for download : " + ((System.currentTimeMillis() - replicationStartTime) / 1000) + " secs");
         Collection<Map<String, Object>> modifiedConfFiles = getModifiedConfFiles(confFilesToDownload);
@@ -547,7 +547,9 @@ public class SnapPuller {
    */
   private void downloadIndexFiles(boolean downloadCompleteIndex, File tmpIdxDir, long latestVersion) throws Exception {
     for (Map<String, Object> file : filesToDownload) {
-      File localIndexFile = new File(solrCore.getIndexDir(), (String) file.get(NAME));
+      File localIndexFile = new File(solrCore.getNewIndexDir(), (String) file.get(NAME));
+      System.out.println("look at file:" + localIndexFile);
+      System.out.println("exits" + localIndexFile.exists());
       if (!localIndexFile.exists() || downloadCompleteIndex) {
         fileFetcher = new FileFetcher(tmpIdxDir, file, (String) file.get(NAME), false, latestVersion);
         currentFile = file;
@@ -567,7 +569,7 @@ public class SnapPuller {
    */
   private boolean isIndexStale() {
     for (Map<String, Object> file : filesToDownload) {
-      File localIndexFile = new File(solrCore.getIndexDir(), (String) file
+      File localIndexFile = new File(solrCore.getNewIndexDir(), (String) file
               .get(NAME));
       if (localIndexFile.exists()
               && localIndexFile.length() != (Long) file.get(SIZE)) {

Modified: lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullDistributedZkTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullDistributedZkTest.java?rev=1205182&r1=1205181&r2=1205182&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullDistributedZkTest.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullDistributedZkTest.java Tue Nov 22 21:25:44 2011
@@ -528,6 +528,7 @@ public class FullDistributedZkTest exten
     System.out.println("shard2_2 port:" + ((CommonsHttpSolrServer)s2c.get(1)).getBaseURL());
     
 
+    //assertDocCounts();
     // if we properly recovered, we should now have the couple missing docs that
     // came in while shard was down
     assertEquals(s2c.get(0).query(new SolrQuery("*:*")).getResults()
@@ -550,9 +551,10 @@ public class FullDistributedZkTest exten
     for (SolrServer client : shardToClient.get("shard1")) {
       System.out.println("total:" + client.query(new SolrQuery("*:*")).getResults().getNumFound());
     }
-    
+    Thread.sleep(5000);
     // assert the new server has the same number of docs as another server in
     // that shard
+    // TODO: make a new call that checks each shard in slice has equal docs
     assertEquals(shardToClient.get("shard1").get(0).query(new SolrQuery("*:*"))
         .getResults().getNumFound(),
         shardToClient.get("shard1").get(shardToClient.get("shard1").size() - 1)

Modified: lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java?rev=1205182&r1=1205181&r2=1205182&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java Tue Nov 22 21:25:44 2011
@@ -20,11 +20,8 @@ package org.apache.solr.cloud;
 import java.io.File;
 import java.io.IOException;
 import java.util.List;
-import java.util.Map;
 
 import org.apache.solr.SolrTestCaseJ4;
-import org.apache.solr.common.cloud.CloudState;
-import org.apache.solr.common.cloud.Slice;
 import org.apache.solr.common.cloud.SolrZkClient;
 import org.apache.solr.common.cloud.ZkNodeProps;
 import org.apache.solr.common.cloud.ZkStateReader;
@@ -32,7 +29,6 @@ import org.apache.solr.core.CoreDescript
 import org.apache.solr.core.SolrConfig;
 import org.apache.zookeeper.CreateMode;
 import org.apache.zookeeper.KeeperException;
-
 import org.junit.BeforeClass;
 import org.junit.Test;
 
@@ -177,19 +173,33 @@ public class ZkControllerTest extends So
       
       CloudDescriptor cloudDesc = new CloudDescriptor();
       cloudDesc.setCollectionName("collection1");
+      
+      
       zkController.createCollectionZkNode(cloudDesc);
      
-      String shard1 = zkController.register("core1", new CoreDescriptor(null, "core1", "instanceDir"), cloudDesc);
-      cloudDesc.setShardId(null);
-      String shard2 = zkController.register("core2", new CoreDescriptor(null, "core2", "instanceDir"), cloudDesc);
-      cloudDesc.setShardId(null);
-      String shard3 = zkController.register("core3", new CoreDescriptor(null, "core3", "instanceDir"), cloudDesc);
-      cloudDesc.setShardId(null);
-      String shard4 = zkController.register("core4", new CoreDescriptor(null, "core4", "instanceDir"), cloudDesc);
-      cloudDesc.setShardId(null);
-      String shard5 = zkController.register("core5", new CoreDescriptor(null, "core5", "instanceDir"), cloudDesc);
-      cloudDesc.setShardId(null);
-      String shard6 = zkController.register("core6", new CoreDescriptor(null, "core6", "instanceDir"), cloudDesc);
+      CoreDescriptor desc = new CoreDescriptor(null, "core1", "");
+      desc.setCloudDescriptor(cloudDesc);
+      String shard1 = zkController.register("core1", desc);
+      cloudDesc.setShardId(null);
+      desc = new CoreDescriptor(null, "core2", "");
+      desc.setCloudDescriptor(cloudDesc);
+      String shard2 = zkController.register("core2", desc);
+      cloudDesc.setShardId(null);
+      desc = new CoreDescriptor(null, "core3", "");
+      desc.setCloudDescriptor(cloudDesc);
+      String shard3 = zkController.register("core3", desc);
+      cloudDesc.setShardId(null);
+      desc = new CoreDescriptor(null, "core4", "");
+      desc.setCloudDescriptor(cloudDesc);
+      String shard4 = zkController.register("core4", desc);
+      cloudDesc.setShardId(null);
+      desc = new CoreDescriptor(null, "core5", "");
+      desc.setCloudDescriptor(cloudDesc);
+      String shard5 = zkController.register("core5", desc);
+      cloudDesc.setShardId(null);
+      desc = new CoreDescriptor(null, "core6", "");
+      desc.setCloudDescriptor(cloudDesc);
+      String shard6 = zkController.register("core6", desc);
       cloudDesc.setShardId(null);
 
       assertEquals("shard1", shard1);

Modified: lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/CloudState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/CloudState.java?rev=1205182&r1=1205181&r2=1205182&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/CloudState.java (original)
+++ lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/CloudState.java Tue Nov 22 21:25:44 2011
@@ -78,11 +78,8 @@ public class CloudState {
 			collectionStates.put(collection, new HashMap<String, Slice>());
 		}
 		if (!collectionStates.get(collection).containsKey(slice.getName())) {
-			log.info("New slice: " + slice.getName());
 			collectionStates.get(collection).put(slice.getName(), slice);
 		} else {
-			log.info("Updating existing slice");
-			
 			Map<String, ZkNodeProps> shards = new HashMap<String, ZkNodeProps>();
 			
 			Slice existingSlice = collectionStates.get(collection).get(slice.getName());

Modified: lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java?rev=1205182&r1=1205181&r2=1205182&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java (original)
+++ lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java Tue Nov 22 21:25:44 2011
@@ -45,6 +45,7 @@ public class ZkStateReader {
   public static final String CLUSTER_STATE = "/clusterstate.xml";
 
   public static final String RECOVERING = "recovering";
+  public static final String ACTIVE = "active";
   
   private volatile CloudState cloudState = new CloudState();