You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2011/11/22 22:25:46 UTC
svn commit: r1205182 - in /lucene/dev/branches/solrcloud/solr:
core/src/java/org/apache/solr/cloud/ core/src/java/org/apache/solr/core/
core/src/java/org/apache/solr/handler/ core/src/test/org/apache/solr/cloud/
solrj/src/java/org/apache/solr/common/cl...
Author: markrmiller
Date: Tue Nov 22 21:25:44 2011
New Revision: 1205182
URL: http://svn.apache.org/viewvc?rev=1205182&view=rev
Log:
dont do recovery on core reload
Modified:
lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/ZkController.java
lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/core/CoreContainer.java
lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/core/SolrCore.java
lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/handler/SnapPuller.java
lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullDistributedZkTest.java
lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java
lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/CloudState.java
lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java
Modified: lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/ZkController.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/ZkController.java?rev=1205182&r1=1205181&r2=1205182&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/ZkController.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/ZkController.java Tue Nov 22 21:25:44 2011
@@ -40,6 +40,7 @@ import org.apache.solr.common.cloud.ZkSt
import org.apache.solr.common.cloud.ZooKeeperException;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.core.CoreContainer;
import org.apache.solr.core.CoreDescriptor;
import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.ReplicationHandler;
@@ -156,8 +157,7 @@ public final class ZkController {
.getCurrentDescriptors();
if (descriptors != null) {
for (CoreDescriptor descriptor : descriptors) {
- register(descriptor.getName(), descriptor,
- descriptor.getCloudDescriptor());
+ register(descriptor.getName(), descriptor);
}
}
@@ -409,12 +409,13 @@ public final class ZkController {
* @return
* @throws Exception
*/
- public String register(String coreName, final CoreDescriptor desc, final CloudDescriptor cloudDesc) throws Exception {
+ public String register(String coreName, final CoreDescriptor desc) throws Exception {
// nocommit: TODO: on core reload we don't want to do recovery or anything...
String shardUrl = localHostName + ":" + localHostPort + "/" + localHostContext
+ "/" + coreName;
+ CloudDescriptor cloudDesc = desc.getCloudDescriptor();
final String collection = cloudDesc.getCollectionName();
byte[] data = zkClient.getData(ZkStateReader.CLUSTER_STATE,
@@ -424,17 +425,17 @@ public final class ZkController {
CloudState state = CloudState.load(data);
String shardZkNodeName = getNodeName() + "_" + coreName;
- boolean doRecovery = checkRecovery(cloudDesc, state, shardZkNodeName);
-
+ // checkRecovery will have updated the shardId if it already exists...
String shardId = cloudDesc.getShardId();
- if (shardId == null && !doRecovery) {
+
+ if (shardId == null && getShardId(desc, state, shardZkNodeName)) {
shardId = assignShard.assignShard(collection, numShards);
cloudDesc.setShardId(shardId);
}
if (log.isInfoEnabled()) {
log.info("Register shard - core:" + coreName + " address:"
- + shardUrl);
+ + shardUrl + "shardId:" + shardId);
}
leaderElector.setupForSlice(shardId, collection);
@@ -449,31 +450,46 @@ public final class ZkController {
System.out.println("leader url: "+ leaderUrl);
System.out.println("shard url: "+ shardUrl);
boolean iamleader = false;
+ boolean doRecovery = true;
if (leaderUrl.equals(shardUrl)) {
iamleader = true;
+ // TODO: this should really be figured in checkRecovery
+ doRecovery = false;
} else {
- // we are not the leader, so catch up with recovery
- doRecovery = true;
+ CoreContainer cc = desc.getCoreContainer();
+ if (cc != null) {
+ SolrCore core = cc.getCore(desc.getName());
+ try {
+ if (core.isReloaded()) {
+ doRecovery = false;
+ }
+ } finally {
+ core.close();
+ }
+ } else {
+ log.warn("Cannot recover without access to CoreConatiner");
+ return shardId;
+ }
+
}
if (doRecovery) {
- if (desc.getCoreContainer() != null) {
- doRecovery(collection, desc, cloudDesc, iamleader);
- } else {
- log.warn("For some odd reason a SolrCore is trying to recover but does not have access to a CoreContainer - skipping recovery.");
- }
+ doRecovery(collection, desc, cloudDesc, iamleader);
+ } else {
+ System.out.println("dont do recovery");
}
- addToZk(collection, desc, cloudDesc, shardUrl, shardZkNodeName, "active");
+ addToZk(collection, desc, cloudDesc, shardUrl, shardZkNodeName, ZkStateReader.ACTIVE);
return shardId;
}
- private boolean checkRecovery(final CloudDescriptor cloudDesc,
+ private boolean getShardId(final CoreDescriptor desc,
CloudState state, String shardZkNodeName) {
- boolean recover = false;
- Map<String,Slice> slices = state.getSlices(cloudDesc.getCollectionName());
+ CloudDescriptor cloudDesc = desc.getCloudDescriptor();
+
+ Map<String,Slice> slices = state.getSlices(cloudDesc.getCollectionName());
if (slices != null) {
Map<String,String> nodes = new HashMap<String,String>();
@@ -485,10 +501,10 @@ public final class ZkController {
if (nodes.containsKey(shardZkNodeName)) {
// TODO: we where already registered - go into recovery mode
cloudDesc.setShardId(nodes.get(shardZkNodeName));
- recover = true;
+ return false;
}
}
- return recover;
+ return true;
}
@@ -568,15 +584,16 @@ public final class ZkController {
private void doRecovery(String collection, final CoreDescriptor desc,
final CloudDescriptor cloudDesc, boolean iamleader) throws Exception,
SolrServerException, IOException {
- // nocommit: joke code
System.out.println("do recovery");
+
// start buffer updates to tran log
// and do recovery - either replay via realtime get
// or full index replication
// seems perhaps we cannot do this here since we are not fully running -
- // we need to trigger a recovery that happens later
+ // we may need to trigger a recovery that happens later
System.out.println("shard is:" + cloudDesc.getShardId());
+ System.out.println("leader:" + iamleader);
String leaderUrl = zkStateReader.getLeader(collection, cloudDesc.getShardId());
@@ -588,9 +605,9 @@ public final class ZkController {
// if we want to buffer updates while recovering, this
- // will have to trigger later - http is not yet up
+ // will have to trigger later - http is not yet up ???
- // use rep handler and SnapPuller directly, so we can do this sync rather than async
+ // use rep handler directly, so we can do this sync rather than async
SolrCore core = desc.getCoreContainer().getCore(desc.getName());
try {
ReplicationHandler replicationHandler = (ReplicationHandler) core
Modified: lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/core/CoreContainer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/core/CoreContainer.java?rev=1205182&r1=1205181&r2=1205182&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/core/CoreContainer.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/core/CoreContainer.java Tue Nov 22 21:25:44 2011
@@ -529,7 +529,7 @@ public class CoreContainer
private void registerInZk(SolrCore core) {
if (zkController != null) {
try {
- zkController.register(core.getName(), core.getCoreDescriptor(), core.getCoreDescriptor().getCloudDescriptor());
+ zkController.register(core.getName(), core.getCoreDescriptor());
} catch (InterruptedException e) {
// Restore the interrupted status
Thread.currentThread().interrupt();
Modified: lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/core/SolrCore.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/core/SolrCore.java?rev=1205182&r1=1205181&r2=1205182&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/core/SolrCore.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/core/SolrCore.java Tue Nov 22 21:25:44 2011
@@ -48,7 +48,6 @@ import org.apache.solr.util.plugin.Named
import org.apache.solr.util.plugin.SolrCoreAware;
import org.apache.solr.util.plugin.PluginInfoInitialized;
import org.apache.commons.io.IOUtils;
-import org.eclipse.jdt.core.dom.ThisExpression;
import org.xml.sax.SAXException;
import javax.xml.parsers.ParserConfigurationException;
@@ -75,6 +74,8 @@ public final class SolrCore implements S
private String logid; // used to show what name is set
private final CoreDescriptor coreDescriptor;
+ private boolean isReloaded = false;
+
private final SolrConfig solrConfig;
private final SolrResourceLoader resourceLoader;
private final IndexSchema schema;
@@ -562,6 +563,7 @@ public final class SolrCore implements S
initDirectoryFactory();
} else {
directoryFactory = updateHandler.getSolrCoreState().getDirectoryFactory();
+ this.isReloaded = true;
}
initIndex();
@@ -1390,6 +1392,9 @@ public final class SolrCore implements S
return holder;
}
+ public boolean isReloaded() {
+ return isReloaded;
+ }
// Take control of newSearcherHolder (which should have a reference count of at
// least 1 already. If the caller wishes to use the newSearcherHolder directly
Modified: lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/handler/SnapPuller.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/handler/SnapPuller.java?rev=1205182&r1=1205181&r2=1205182&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/handler/SnapPuller.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/handler/SnapPuller.java Tue Nov 22 21:25:44 2011
@@ -301,7 +301,7 @@ public class SnapPuller {
boolean deleteTmpIdxDir = true;
File indexDir = null ;
try {
- indexDir = new File(core.getIndexDir());
+ indexDir = new File(core.getNewIndexDir());
downloadIndexFiles(isFullCopyNeeded, tmpIndexDir, latestVersion);
LOG.info("Total time taken for download : " + ((System.currentTimeMillis() - replicationStartTime) / 1000) + " secs");
Collection<Map<String, Object>> modifiedConfFiles = getModifiedConfFiles(confFilesToDownload);
@@ -547,7 +547,9 @@ public class SnapPuller {
*/
private void downloadIndexFiles(boolean downloadCompleteIndex, File tmpIdxDir, long latestVersion) throws Exception {
for (Map<String, Object> file : filesToDownload) {
- File localIndexFile = new File(solrCore.getIndexDir(), (String) file.get(NAME));
+ File localIndexFile = new File(solrCore.getNewIndexDir(), (String) file.get(NAME));
+ System.out.println("look at file:" + localIndexFile);
+ System.out.println("exits" + localIndexFile.exists());
if (!localIndexFile.exists() || downloadCompleteIndex) {
fileFetcher = new FileFetcher(tmpIdxDir, file, (String) file.get(NAME), false, latestVersion);
currentFile = file;
@@ -567,7 +569,7 @@ public class SnapPuller {
*/
private boolean isIndexStale() {
for (Map<String, Object> file : filesToDownload) {
- File localIndexFile = new File(solrCore.getIndexDir(), (String) file
+ File localIndexFile = new File(solrCore.getNewIndexDir(), (String) file
.get(NAME));
if (localIndexFile.exists()
&& localIndexFile.length() != (Long) file.get(SIZE)) {
Modified: lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullDistributedZkTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullDistributedZkTest.java?rev=1205182&r1=1205181&r2=1205182&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullDistributedZkTest.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullDistributedZkTest.java Tue Nov 22 21:25:44 2011
@@ -528,6 +528,7 @@ public class FullDistributedZkTest exten
System.out.println("shard2_2 port:" + ((CommonsHttpSolrServer)s2c.get(1)).getBaseURL());
+ //assertDocCounts();
// if we properly recovered, we should now have the couple missing docs that
// came in while shard was down
assertEquals(s2c.get(0).query(new SolrQuery("*:*")).getResults()
@@ -550,9 +551,10 @@ public class FullDistributedZkTest exten
for (SolrServer client : shardToClient.get("shard1")) {
System.out.println("total:" + client.query(new SolrQuery("*:*")).getResults().getNumFound());
}
-
+ Thread.sleep(5000);
// assert the new server has the same number of docs as another server in
// that shard
+ // TODO: make a new call that checks each shard in slice has equal docs
assertEquals(shardToClient.get("shard1").get(0).query(new SolrQuery("*:*"))
.getResults().getNumFound(),
shardToClient.get("shard1").get(shardToClient.get("shard1").size() - 1)
Modified: lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java?rev=1205182&r1=1205181&r2=1205182&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java Tue Nov 22 21:25:44 2011
@@ -20,11 +20,8 @@ package org.apache.solr.cloud;
import java.io.File;
import java.io.IOException;
import java.util.List;
-import java.util.Map;
import org.apache.solr.SolrTestCaseJ4;
-import org.apache.solr.common.cloud.CloudState;
-import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.cloud.ZkStateReader;
@@ -32,7 +29,6 @@ import org.apache.solr.core.CoreDescript
import org.apache.solr.core.SolrConfig;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
-
import org.junit.BeforeClass;
import org.junit.Test;
@@ -177,19 +173,33 @@ public class ZkControllerTest extends So
CloudDescriptor cloudDesc = new CloudDescriptor();
cloudDesc.setCollectionName("collection1");
+
+
zkController.createCollectionZkNode(cloudDesc);
- String shard1 = zkController.register("core1", new CoreDescriptor(null, "core1", "instanceDir"), cloudDesc);
- cloudDesc.setShardId(null);
- String shard2 = zkController.register("core2", new CoreDescriptor(null, "core2", "instanceDir"), cloudDesc);
- cloudDesc.setShardId(null);
- String shard3 = zkController.register("core3", new CoreDescriptor(null, "core3", "instanceDir"), cloudDesc);
- cloudDesc.setShardId(null);
- String shard4 = zkController.register("core4", new CoreDescriptor(null, "core4", "instanceDir"), cloudDesc);
- cloudDesc.setShardId(null);
- String shard5 = zkController.register("core5", new CoreDescriptor(null, "core5", "instanceDir"), cloudDesc);
- cloudDesc.setShardId(null);
- String shard6 = zkController.register("core6", new CoreDescriptor(null, "core6", "instanceDir"), cloudDesc);
+ CoreDescriptor desc = new CoreDescriptor(null, "core1", "");
+ desc.setCloudDescriptor(cloudDesc);
+ String shard1 = zkController.register("core1", desc);
+ cloudDesc.setShardId(null);
+ desc = new CoreDescriptor(null, "core2", "");
+ desc.setCloudDescriptor(cloudDesc);
+ String shard2 = zkController.register("core2", desc);
+ cloudDesc.setShardId(null);
+ desc = new CoreDescriptor(null, "core3", "");
+ desc.setCloudDescriptor(cloudDesc);
+ String shard3 = zkController.register("core3", desc);
+ cloudDesc.setShardId(null);
+ desc = new CoreDescriptor(null, "core4", "");
+ desc.setCloudDescriptor(cloudDesc);
+ String shard4 = zkController.register("core4", desc);
+ cloudDesc.setShardId(null);
+ desc = new CoreDescriptor(null, "core5", "");
+ desc.setCloudDescriptor(cloudDesc);
+ String shard5 = zkController.register("core5", desc);
+ cloudDesc.setShardId(null);
+ desc = new CoreDescriptor(null, "core6", "");
+ desc.setCloudDescriptor(cloudDesc);
+ String shard6 = zkController.register("core6", desc);
cloudDesc.setShardId(null);
assertEquals("shard1", shard1);
Modified: lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/CloudState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/CloudState.java?rev=1205182&r1=1205181&r2=1205182&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/CloudState.java (original)
+++ lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/CloudState.java Tue Nov 22 21:25:44 2011
@@ -78,11 +78,8 @@ public class CloudState {
collectionStates.put(collection, new HashMap<String, Slice>());
}
if (!collectionStates.get(collection).containsKey(slice.getName())) {
- log.info("New slice: " + slice.getName());
collectionStates.get(collection).put(slice.getName(), slice);
} else {
- log.info("Updating existing slice");
-
Map<String, ZkNodeProps> shards = new HashMap<String, ZkNodeProps>();
Slice existingSlice = collectionStates.get(collection).get(slice.getName());
Modified: lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java?rev=1205182&r1=1205181&r2=1205182&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java (original)
+++ lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java Tue Nov 22 21:25:44 2011
@@ -45,6 +45,7 @@ public class ZkStateReader {
public static final String CLUSTER_STATE = "/clusterstate.xml";
public static final String RECOVERING = "recovering";
+ public static final String ACTIVE = "active";
private volatile CloudState cloudState = new CloudState();