You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ds...@apache.org on 2016/04/14 07:37:48 UTC
[7/7] lucene-solr:solr-5750: Enhance restore: * Restored conf name is
configurable;
won't overwrite existing. Defaults to original. * Customize replicationFactor
and some other settings on restoration. * Shard/slice info is restored
instead of reconstitu
Enhance restore:
* Restored conf name is configurable; won't overwrite existing. Defaults to original.
* Customize replicationFactor and some other settings on restoration.
* Shard/slice info is restored instead of reconstituted. Thus shard hash ranges (from e.g. shard split) is restored.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/e8d29686
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/e8d29686
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/e8d29686
Branch: refs/heads/solr-5750
Commit: e8d296864c1fdab023624c12f7350a7a79c50ce8
Parents: 7e80651
Author: David Smiley <ds...@apache.org>
Authored: Thu Apr 14 01:37:33 2016 -0400
Committer: David Smiley <ds...@apache.org>
Committed: Thu Apr 14 01:37:33 2016 -0400
----------------------------------------------------------------------
.../cloud/OverseerCollectionMessageHandler.java | 213 +++++++++----------
.../solr/cloud/TestCloudBackupRestore.java | 165 ++++++++++----
2 files changed, 220 insertions(+), 158 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e8d29686/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java
index 8a35b6d..1832ead 100644
--- a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java
+++ b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java
@@ -16,24 +16,19 @@
*/
package org.apache.solr.cloud;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.OutputStreamWriter;
+import java.io.Reader;
+import java.io.Writer;
import java.lang.invoke.MethodHandles;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
-import java.text.SimpleDateFormat;
+import java.time.Instant;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
-import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
@@ -52,7 +47,6 @@ import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.impl.HttpSolrClient.RemoteSolrException;
import org.apache.solr.client.solrj.request.AbstractUpdateRequest;
-import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.request.CoreAdminRequest;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.response.UpdateResponse;
@@ -85,12 +79,10 @@ import org.apache.solr.common.params.CoreAdminParams;
import org.apache.solr.common.params.CoreAdminParams.CoreAdminAction;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.ShardParams;
-import org.apache.solr.common.util.IOUtils;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.common.util.Utils;
-import org.apache.solr.handler.SnapShooter;
import org.apache.solr.handler.component.ShardHandler;
import org.apache.solr.handler.component.ShardHandlerFactory;
import org.apache.solr.handler.component.ShardRequest;
@@ -111,9 +103,9 @@ import static org.apache.solr.common.cloud.DocCollection.SNITCH;
import static org.apache.solr.common.cloud.ZkStateReader.BASE_URL_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.CORE_NAME_PROP;
+import static org.apache.solr.common.cloud.ZkStateReader.CORE_NODE_NAME_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.ELECTION_NODE_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.MAX_SHARDS_PER_NODE;
-import static org.apache.solr.common.cloud.ZkStateReader.CORE_NODE_NAME_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.PROPERTY_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.PROPERTY_VALUE_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.REJOIN_AT_HEAD_PROP;
@@ -333,86 +325,90 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler
//TODO move this out into it's own class.
private void processRestoreAction(ZkNodeProps message, NamedList results) throws IOException, KeeperException, InterruptedException {
+ // TODO maybe we can inherit createCollection's options/code
String restoreCollectionName = message.getStr(COLLECTION_PROP);
- String name = message.getStr(NAME);
+ String backupName = message.getStr(NAME); // of backup
String location = message.getStr("location");
ShardHandler shardHandler = shardHandlerFactory.getShardHandler();
- final String asyncId = message.getStr(ASYNC);
+ String asyncId = message.getStr(ASYNC);
Map<String, String> requestMap = new HashMap<>();
- Path backupPath = Paths.get(location).resolve(name).toAbsolutePath();
-
+ Path backupPath = Paths.get(location).resolve(backupName).toAbsolutePath();
+ if (!Files.exists(backupPath)) {
+ throw new SolrException(ErrorCode.SERVER_ERROR, "Couldn't restore since doesn't exist: " + backupPath);
+ }
Path backupZkPath = backupPath.resolve("zk_backup");
- Path backupZkPropsPath = backupZkPath.resolve("backup.properties");
-
Properties properties = new Properties();
- try (InputStream in = Files.newInputStream(backupZkPropsPath)) {
- properties.load(new InputStreamReader(in, StandardCharsets.UTF_8));
- } catch (IOException e) {
- String errorMsg = String.format(Locale.ROOT, "Could not load properties from %s: %s:",
- backupZkPropsPath.toAbsolutePath().toString(), e.toString());
- throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, errorMsg);
+ try (Reader in = Files.newBufferedReader(backupZkPath.resolve("backup.properties"), StandardCharsets.UTF_8)) {
+ properties.load(in);
}
- String backupCollection = (String) properties.get("collectionName");
- Path collectionStatePath = backupZkPath.resolve("collection_state_backup.json");
- byte[] data = Files.readAllBytes(collectionStatePath);
- @SuppressWarnings("unchecked")
- Map<String, Object> collectionProps = (Map<String, Object>) ((Map<String, Object>) Utils.fromJSON(data)).get(backupCollection);
-
- //Download the configs
- String configName = (String) properties.get("collection.configName");
+ String backupCollection = (String) properties.get("collection");
+ byte[] data = Files.readAllBytes(backupZkPath.resolve("collection_state_backup.json"));
+ ClusterState backupClusterState = ClusterState.load(-1, data, Collections.emptySet());
+ DocCollection backupCollectionState = backupClusterState.getCollection(backupCollection);
- //Use a name such as restore.<restore_name>.<original_config_name>
- // in ZK for the configs
- String restoreConfigName = "restore." + configName;
- zkStateReader.getConfigManager().uploadConfigDir(backupZkPath.resolve("configs").resolve(configName), restoreConfigName);
+ //Upload the configs
+ String configName = (String) properties.get(COLL_CONF);
+ String restoreConfigName = message.getStr(COLL_CONF, configName);
+ if (zkStateReader.getConfigManager().configExists(restoreConfigName)) {
+ log.info("Using existing config {}", restoreConfigName);
+ //TODO add overwrite option?
+ } else {
+ log.info("Uploading config {}", restoreConfigName);
+ zkStateReader.getConfigManager().uploadConfigDir(backupZkPath.resolve("configs").resolve(configName), restoreConfigName);
+ }
- log.debug("Starting restore into collection={} with backup_name={} at location={}", restoreCollectionName, name,
+ log.info("Starting restore into collection={} with backup_name={} at location={}", restoreCollectionName, backupName,
backupPath);
//Create core-less collection
{
Map<String, Object> propMap = new HashMap<>();
+ propMap.put(Overseer.QUEUE_OPERATION, CREATE.toString());
+ propMap.put("fromApi", "true"); // mostly true. Prevents autoCreated=true in the collection state.
+
+ // inherit settings from input API, defaulting to the backup's setting. Ex: replicationFactor
+ for (String collProp : COLL_PROPS.keySet()) {
+ Object val = message.getProperties().getOrDefault(collProp, backupCollectionState.get(collProp));
+ if (val != null) {
+ propMap.put(collProp, val);
+ }
+ }
+
propMap.put(NAME, restoreCollectionName);
propMap.put(CREATE_NODE_SET, CREATE_NODE_SET_EMPTY); //no cores
- propMap.put("collection.configName", restoreConfigName);
- // add async param
- if (asyncId != null) {
- propMap.put(ASYNC, asyncId);
+ propMap.put(COLL_CONF, restoreConfigName);
+
+ // router.*
+ @SuppressWarnings("unchecked")
+ Map<String, Object> routerProps = (Map<String, Object>) backupCollectionState.getProperties().get(DocCollection.DOC_ROUTER);
+ for (Map.Entry<String, Object> pair : routerProps.entrySet()) {
+ propMap.put(DocCollection.DOC_ROUTER + "." + pair.getKey(), pair.getValue());
}
- String router = (String) ((Map)collectionProps.get("router")).get("name");
- propMap.put("router.name", router);
- Map slices = (Map) collectionProps.get(SHARDS_PROP);
- if (slices != null) { //Implicit routers may not have a shards defined
- propMap.put(NUM_SLICES, slices.size());
- }
- if (ImplicitDocRouter.NAME.equals(router)) {
- Iterator keys = ((Map) collectionProps.get(SHARDS_PROP)).keySet().iterator();
- StringBuilder shardsBuilder = new StringBuilder();
- while (keys.hasNext()) {
- String shard = (String) keys.next();
- shardsBuilder.append(shard);
- shardsBuilder.append(",");
+
+ Set<String> sliceNames = backupCollectionState.getActiveSlicesMap().keySet();
+ if (backupCollectionState.getRouter() instanceof ImplicitDocRouter) {
+ propMap.put(SHARDS_PROP, StrUtils.join(sliceNames, ','));
+ } else {
+ propMap.put(NUM_SLICES, sliceNames.size());
+ // ClusterStateMutator.createCollection detects that "slices" is in fact a slice structure instead of a
+ // list of names, and if so uses this instead of building it. We clear the replica list.
+ Collection<Slice> backupSlices = backupCollectionState.getActiveSlices();
+ Map<String,Slice> newSlices = new LinkedHashMap<>(backupSlices.size());
+ for (Slice backupSlice : backupSlices) {
+ newSlices.put(backupSlice.getName(),
+ new Slice(backupSlice.getName(), Collections.emptyMap(), backupSlice.getProperties()));
}
- String shards = shardsBuilder.deleteCharAt(shardsBuilder.length()-1).toString();//delete trailing comma
- propMap.put(SHARDS_PROP, shards);
+ propMap.put(SHARDS_PROP, newSlices);
}
- // TODO nocommit loop all from a list and blacklist those we know could never work.
- // The user could always edit the properties file if they need to.
- propMap.put(MAX_SHARDS_PER_NODE, Integer.parseInt((String) collectionProps.get(MAX_SHARDS_PER_NODE)));
- propMap.put(ZkStateReader.AUTO_ADD_REPLICAS, Boolean.parseBoolean((String) collectionProps.get(ZkStateReader.AUTO_ADD_REPLICAS)));
- propMap.put(Overseer.QUEUE_OPERATION, CREATE.toString());
createCollection(zkStateReader.getClusterState(), new ZkNodeProps(propMap), new NamedList());
+ // note: when createCollection() returns, the collection exists (no race)
}
- //No need to wait. CreateCollection takes care of it by calling waitToSeeReplicasInState()
DocCollection restoreCollection = zkStateReader.getClusterState().getCollection(restoreCollectionName);
- if (restoreCollection == null) {
- throw new SolrException(ErrorCode.SERVER_ERROR, "Could not create restore collection");
- }
DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkStateReader.getZkClient());
@@ -427,18 +423,22 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler
inQueue.offer(Utils.toJSON(new ZkNodeProps(propMap)));
}
+ // TODO how do we leverage the CREATE_NODE_SET / RULE / SNITCH logic in createCollection?
+
ClusterState clusterState = zkStateReader.getClusterState();
//Create one replica per shard and copy backed up data to it
for (Slice slice: restoreCollection.getSlices()) {
log.debug("Adding replica for shard={} collection={} ", slice.getName(), restoreCollection);
HashMap<String, Object> propMap = new HashMap<>();
- propMap.put(Overseer.QUEUE_OPERATION, ADDREPLICA.toLower());
+ propMap.put(Overseer.QUEUE_OPERATION, CREATESHARD);
propMap.put(COLLECTION_PROP, restoreCollectionName);
propMap.put(SHARD_ID_PROP, slice.getName());
// add async param
if (asyncId != null) {
propMap.put(ASYNC, asyncId);
}
+ addPropertyParams(message, propMap);
+
addReplica(clusterState, new ZkNodeProps(propMap), new NamedList());
}
@@ -456,7 +456,6 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler
processResponses(new NamedList(), shardHandler, true, "Could not restore core", asyncId, requestMap);
//Mark all shards in ACTIVE STATE
-
{
HashMap<String, Object> propMap = new HashMap<>();
propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower());
@@ -470,18 +469,11 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler
//refresh the location copy of collection state
restoreCollection = zkStateReader.getClusterState().getCollection(restoreCollectionName);
- //Update the replicationFactor to be 1 as that's what it is currently. Otherwise addreplica assigns wrong core names
- {
- HashMap<String, Object> propMap = new HashMap<>();
- propMap.put(Overseer.QUEUE_OPERATION, CollectionParams.CollectionAction.MODIFYCOLLECTION.toLower());
- propMap.put(COLLECTION_PROP, restoreCollectionName);
- propMap.put(REPLICATION_FACTOR, 1);
- inQueue.offer(Utils.toJSON(new ZkNodeProps(propMap)));
- }
-
//Add the remaining replicas for each shard
- int numReplicas = Integer.parseInt((String) collectionProps.get(REPLICATION_FACTOR));
- if (numReplicas > 1) {
+ Integer numReplicas = restoreCollection.getReplicationFactor();
+ if (numReplicas != null && numReplicas > 1) {
+ log.info("Adding replicas to restored collection={}", restoreCollection);
+
for (Slice slice: restoreCollection.getSlices()) {
for(int i=1; i<numReplicas; i++) {
log.debug("Adding replica for shard={} collection={} ", slice.getName(), restoreCollection);
@@ -492,19 +484,14 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler
if (asyncId != null) {
propMap.put(ASYNC, asyncId);
}
+ addPropertyParams(message, propMap);
+
addReplica(zkStateReader.getClusterState(), new ZkNodeProps(propMap), results);
}
}
-
- //Update the replicationFactor property in cluster state for this collection
- log.info("Modifying replication factor to the expected value of={}", numReplicas);
- HashMap<String, Object>propMap = new HashMap<>();
- propMap.put(Overseer.QUEUE_OPERATION, CollectionParams.CollectionAction.MODIFYCOLLECTION.toLower());
- propMap.put(COLLECTION_PROP, restoreCollectionName);
- propMap.put(REPLICATION_FACTOR, numReplicas);
- inQueue.offer(Utils.toJSON(new ZkNodeProps(propMap)));
- //nocommit do we need to wait for the result to be done before returning?
}
+
+ log.info("Completed restoring collection={} backup={}", restoreCollection, backupName);
}
private void processBackupAction(ZkNodeProps message, NamedList results) throws IOException, KeeperException, InterruptedException {
@@ -513,11 +500,10 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler
String location = message.getStr("location");
ShardHandler shardHandler = shardHandlerFactory.getShardHandler();
String asyncId = message.getStr(ASYNC);
- Map<String, String> requestMap = null;
- if (asyncId != null) {
- requestMap = new HashMap<>();
- }
+ Map<String, String> requestMap = new HashMap<>();
+ Instant startTime = Instant.now();
+ // note: we assume a shared files system to backup a collection, since a collection is distributed
Path backupPath = Paths.get(location).resolve(name).toAbsolutePath();
//Validating if the directory already exists.
@@ -525,20 +511,21 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Backup directory already exists: " + backupPath);
}
+ Files.createDirectory(backupPath); // create now
- log.debug("Starting backup of collection={} with backup_name={} at location={}", collectionName, name,
+ log.info("Starting backup of collection={} with backup_name={} at location={}", collectionName, name,
backupPath);
- for (Slice slice : zkStateReader.getClusterState().getActiveSlices(collectionName)) {
+ for (Slice slice : zkStateReader.getClusterState().getCollection(collectionName).getActiveSlices()) {
Replica replica = slice.getLeader();
String coreName = replica.getStr(CORE_NAME_PROP);
ModifiableSolrParams params = new ModifiableSolrParams();
+ params.set(CoreAdminParams.ACTION, CoreAdminAction.BACKUPCORE.toString());
params.set(NAME, slice.getName());
- params.set("location", backupPath.toString());
+ params.set("location", backupPath.toString()); // note: index dir will be here then the "snapshot." + slice name
params.set(CORE_NAME_PROP, coreName);
- params.set(CoreAdminParams.ACTION, CoreAdminAction.BACKUPCORE.toString());
sendShardRequest(replica.getNodeName(), params, shardHandler, asyncId, requestMap);
log.debug("Sent backup request to core={} for backup_name={}", coreName, name);
@@ -547,7 +534,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler
processResponses(results, shardHandler, true, "Could not backup all replicas", asyncId, requestMap);
- log.debug("Starting to backup ZK data for backup_name={}", name);
+ log.info("Starting to backup ZK data for backup_name={}", name);
//Download the configs
String configName = zkStateReader.readConfigName(collectionName);
@@ -556,32 +543,27 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler
//Save the collection's state. Can be part of the monolithic clusterstate.json or a individual state.json
//Since we don't want to distinguish we extract the state and back it up as a separate json
- Path collectionStatePath = zkBackup.resolve("collection_state_backup.json");
DocCollection collection = zkStateReader.getClusterState().getCollection(collectionName);
- byte[] bytes = Utils.toJSON(Collections.singletonMap(collectionName, collection));
- Files.write(collectionStatePath, bytes);
+ Files.write(zkBackup.resolve("collection_state_backup.json"),//nocommit or simply clusterstate.json?
+ Utils.toJSON(Collections.singletonMap(collectionName, collection)));
+ //nocommit why is it stored in zk_backup; shouldn't it be in backupPath?
Path propertiesPath = zkBackup.resolve("backup.properties");
Properties properties = new Properties();
- properties.put("collectionName", collectionName);
- properties.put("snapshotName", name);
+
+ properties.put("snapshotName", name); //nocommit or simply "backupName" (consistent with "backup" term?)
+ properties.put("collection", collectionName);
properties.put("collection.configName", configName);
- properties.put("stateFormat", Integer.toString(collection.getStateFormat()));
- properties.put("startTime", new SimpleDateFormat(SnapShooter.DATE_FMT, Locale.ROOT).format(new Date()));
+ properties.put("startTime", startTime.toString());
//TODO: Add MD5 of the configset. If during restore the same name configset exists then we can compare checksums to see if they are the same.
- //if they are not the same then we can throw and error or have a 'overwriteConfig' flag
+ //if they are not the same then we can throw an error or have an 'overwriteConfig' flag
//TODO save numDocs for the shardLeader. We can use it to sanity check the restore.
- try (OutputStreamWriter os = new OutputStreamWriter(
- new FileOutputStream(propertiesPath.toAbsolutePath().toString()), StandardCharsets.UTF_8)) {
+ try (Writer os = Files.newBufferedWriter(propertiesPath, StandardCharsets.UTF_8)) {
properties.store(os, "Snapshot properties file");
- } catch (IOException e) {
- String errorMsg = String.format(Locale.ROOT, "Could not write properties to %s: %s:",
- propertiesPath.toAbsolutePath().toString(), e.toString());
- throw new SolrException(ErrorCode.SERVER_ERROR, errorMsg);
}
- log.debug("Completed backing up ZK data for backup={}", name);
+ log.info("Completed backing up ZK data for backup={}", name);
}
@SuppressWarnings("unchecked")
@@ -2033,6 +2015,15 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler
}
}
+ private void addPropertyParams(ZkNodeProps message, Map<String,Object> map) {
+ // Now add the property.key=value pairs
+ for (String key : message.keySet()) {
+ if (key.startsWith(COLL_PROP_PREFIX)) {
+ map.put(key, message.getStr(key));
+ }
+ }
+ }
+
private static List<String> getLiveOrLiveAndCreateNodeSetList(final Set<String> liveNodes, final ZkNodeProps message, final Random random) {
// TODO: add smarter options that look at the current number of cores per
// node?
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e8d29686/solr/core/src/test/org/apache/solr/cloud/TestCloudBackupRestore.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestCloudBackupRestore.java b/solr/core/src/test/org/apache/solr/cloud/TestCloudBackupRestore.java
index 7dbefda..f0817e2 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestCloudBackupRestore.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestCloudBackupRestore.java
@@ -17,13 +17,24 @@
package org.apache.solr.cloud;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Random;
+import java.util.TreeMap;
+
import org.apache.lucene.util.TestUtil;
import org.apache.solr.client.solrj.SolrQuery;
+import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.ImplicitDocRouter;
+import org.apache.solr.common.cloud.Slice;
+import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.util.NamedList;
import org.junit.BeforeClass;
import org.junit.Test;
@@ -36,68 +47,101 @@ public class TestCloudBackupRestore extends SolrCloudTestCase {
private static Logger log = LoggerFactory.getLogger(TestCloudBackupRestore.class);
- private static final int NUM_SHARDS = 2;
+ private static final int NUM_SHARDS = 2;//granted we sometimes shard split to get more
+
+ private static long docsSeed; // see indexDocs()
@BeforeClass
public static void createCluster() throws Exception {
- configureCluster(2)
+ configureCluster(2)// nodes
.addConfig("conf1", TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf"))
.configure();
+
+ docsSeed = random().nextLong();
}
@Test
public void test() throws Exception {
String collectionName = "backuprestore";
- String restoreCollectionName = collectionName + "_restored";
boolean isImplicit = random().nextBoolean();
- int numReplicas = TestUtil.nextInt(random(), 1, 2);
+ int replFactor = TestUtil.nextInt(random(), 1, 2);
CollectionAdminRequest.Create create =
- CollectionAdminRequest.createCollection(collectionName, "conf1", NUM_SHARDS, numReplicas);
- create.setMaxShardsPerNode(NUM_SHARDS);
+ CollectionAdminRequest.createCollection(collectionName, "conf1", NUM_SHARDS, replFactor);
+ if (NUM_SHARDS * replFactor > cluster.getJettySolrRunners().size() || random().nextBoolean()) {
+ create.setMaxShardsPerNode(NUM_SHARDS);//just to assert it survives the restoration
+ }
+ if (random().nextBoolean()) {
+ create.setAutoAddReplicas(true);//just to assert it survives the restoration
+ }
+ Properties coreProps = new Properties();
+ coreProps.put("customKey", "customValue");//just to assert it survives the restoration
+ create.setProperties(coreProps);
if (isImplicit) { //implicit router
create.setRouterName(ImplicitDocRouter.NAME);
- create.setNumShards(null);//erase it
+ create.setNumShards(null);//erase it. TODO suggest a new createCollectionWithImplicitRouter method
create.setShards("shard1,shard2"); // however still same number as NUM_SHARDS; we assume this later
create.setRouterField("shard_s");
+ } else {//composite id router
+ if (random().nextBoolean()) {
+ create.setRouterField("shard_s");
+ }
}
-//TODO nocommit test shard split & custom doc route?
+
create.process(cluster.getSolrClient());
waitForCollection(collectionName);
indexDocs(collectionName);
- testBackupAndRestore(collectionName, restoreCollectionName, isImplicit);
+
+ if (!isImplicit && random().nextBoolean()) {
+ // shard split the first shard
+ int prevActiveSliceCount = getActiveSliceCount(collectionName);
+ CollectionAdminRequest.SplitShard splitShard = CollectionAdminRequest.splitShard(collectionName);
+ splitShard.setShardName("shard1");
+ splitShard.process(cluster.getSolrClient());
+ // wait until we see one more active slice...
+ for (int i = 0; getActiveSliceCount(collectionName) != prevActiveSliceCount + 1; i++) {
+ assertTrue(i < 30);
+ Thread.sleep(500);
+ }
+ // issue a hard commit. Split shard does a soft commit which isn't good enough for the backup/snapshooter to see
+ cluster.getSolrClient().commit();
+ }
+
+ testBackupAndRestore(collectionName);
+ }
+
+ private int getActiveSliceCount(String collectionName) {
+ return cluster.getSolrClient().getZkStateReader().getClusterState().getCollection(collectionName).getActiveSlices().size();
}
private void indexDocs(String collectionName) throws Exception {
- int numDocs = TestUtil.nextInt(random(), 10, 100);
+ Random random = new Random(docsSeed);// use a constant seed for the whole test run so that we can easily re-index.
+ int numDocs = random.nextInt(100);
+ if (numDocs == 0) {
+ log.info("Indexing ZERO test docs");
+ return;
+ }
CloudSolrClient client = cluster.getSolrClient();
client.setDefaultCollection(collectionName);
+ List<SolrInputDocument> docs = new ArrayList<>(numDocs);
for (int i=0; i<numDocs; i++) {
- //We index the shard_s fields for whichever router gets chosen but only use it when implicit router was selected
- if (random().nextBoolean()) {
- SolrInputDocument doc = new SolrInputDocument();
- doc.addField("id", i);
- doc.addField("shard_s", "shard1");
- client.add(doc);
- } else {
- SolrInputDocument doc = new SolrInputDocument();
- doc.addField("id", i);
- doc.addField("shard_s", "shard2");
- client.add(doc);
- }
+ SolrInputDocument doc = new SolrInputDocument();
+ doc.addField("id", i);
+ doc.addField("shard_s", "shard" + (1 + random.nextInt(NUM_SHARDS))); // for implicit router
+ docs.add(doc);
}
+ client.add(docs);// batch
client.commit();
}
- private void testBackupAndRestore(String collectionName, String restoreCollectionName, boolean isImplicit) throws Exception {
+ private void testBackupAndRestore(String collectionName) throws Exception {
String backupName = "mytestbackup";
+ String restoreCollectionName = collectionName + "_restored";
+
CloudSolrClient client = cluster.getSolrClient();
- long totalDocs = client.query(collectionName, new SolrQuery("*:*")).getResults().getNumFound();
- long shard1Docs = 0, shard2Docs = 0;
- if (isImplicit) {
- shard1Docs = client.query(collectionName, new SolrQuery("*:*").setParam(_ROUTE_, "shard1")).getResults().getNumFound();
- shard2Docs = client.query(collectionName, new SolrQuery("*:*").setParam(_ROUTE_, "shard2")).getResults().getNumFound();
- assertTrue(totalDocs == shard1Docs + shard2Docs);
- }
+ DocCollection backupCollection = client.getZkStateReader().getClusterState().getCollection(collectionName);
+
+ Map<String, Integer> origShardToDocCount = getShardToDocCountMap(client, backupCollection);
+ assert origShardToDocCount.isEmpty() == false;
String location = createTempDir().toFile().getAbsolutePath();
@@ -110,36 +154,63 @@ public class TestCloudBackupRestore extends SolrCloudTestCase {
log.info("Triggering Restore command");
+ //nocommit test with async
CollectionAdminRequest.Restore restore = CollectionAdminRequest.restoreCollection(restoreCollectionName, backupName)
.setLocation(location);
- rsp = cluster.getSolrClient().request(restore);
+ if (origShardToDocCount.size() > cluster.getJettySolrRunners().size()) {
+ // may need to increase maxShardsPerNode (e.g. if it was shard split, then now we need more)
+ restore.getCreateOptions().setMaxShardsPerNode(origShardToDocCount.size());
+ }
+ Properties props = new Properties();
+ props.setProperty("customKey", "customVal");
+ restore.getCreateOptions().setProperties(props);
+ boolean sameConfig = random().nextBoolean();
+ if (sameConfig==false) {
+ restore.getCreateOptions().setConfigName("customConfigName");//nocommit ugh, this is deprecated
+ }
+ rsp = cluster.getSolrClient().request(restore); // DO IT!
assertEquals(0, ((NamedList)rsp.get("responseHeader")).get("status"));
waitForCollection(restoreCollectionName);
//Check the number of results are the same
- long restoredNumDocs = client.query(restoreCollectionName, new SolrQuery("*:*")).getResults().getNumFound();
- assertEquals(totalDocs, restoredNumDocs);
-
- if (isImplicit) {
- long restoredShard1Docs = client.query(restoreCollectionName, new SolrQuery("*:*").setParam(_ROUTE_, "shard1")).getResults().getNumFound();
- long restoredShard2Docs = client.query(restoreCollectionName, new SolrQuery("*:*").setParam(_ROUTE_, "shard2")).getResults().getNumFound();
-
- assertEquals(shard2Docs, restoredShard2Docs);
- assertEquals(shard1Docs, restoredShard1Docs);
- }
-
- DocCollection backupCollection = client.getZkStateReader().getClusterState().getCollection(collectionName);
DocCollection restoreCollection = client.getZkStateReader().getClusterState().getCollection(restoreCollectionName);
+ assertEquals(origShardToDocCount, getShardToDocCountMap(client, restoreCollection));
+ //Re-index same docs (should be identical docs given same random seed) and test we have the same result. Helps
+ // test we reconstituted the hash ranges / doc router.
+ if (!(restoreCollection.getRouter() instanceof ImplicitDocRouter) && random().nextBoolean()) {
+ indexDocs(restoreCollectionName);
+ assertEquals(origShardToDocCount, getShardToDocCountMap(client, restoreCollection));
+ }
assertEquals(backupCollection.getReplicationFactor(), restoreCollection.getReplicationFactor());
+ assertEquals(backupCollection.getAutoAddReplicas(), restoreCollection.getAutoAddReplicas());
+ assertEquals(backupCollection.getActiveSlices().iterator().next().getReplicas().size(),
+ restoreCollection.getActiveSlices().iterator().next().getReplicas().size());
+ assertEquals(sameConfig ? "conf1" : "customConfigName",
+ cluster.getSolrClient().getZkStateReader().readConfigName(restoreCollectionName));
+
+ // assert added core properties:
+ // nocommit how?
+ }
- assertEquals("restore.conf1", cluster.getSolrClient().getZkStateReader().readConfigName(restoreCollectionName));
+ private Map<String, Integer> getShardToDocCountMap(CloudSolrClient client, DocCollection docCollection) throws SolrServerException, IOException {
+ Map<String,Integer> shardToDocCount = new TreeMap<>();
+ for (Slice slice : docCollection.getActiveSlices()) {
+ String shardName = slice.getName();
+ long docsInShard = client.query(docCollection.getName(), new SolrQuery("*:*").setParam(_ROUTE_, shardName))
+ .getResults().getNumFound();
+ shardToDocCount.put(shardName, (int) docsInShard);
+ }
+ return shardToDocCount;
}
public void waitForCollection(String collection) throws Exception {
- AbstractFullDistribZkTestBase.waitForCollection(cluster.getSolrClient().getZkStateReader(), collection, NUM_SHARDS);
- AbstractDistribZkTestBase.waitForRecoveriesToFinish(collection, cluster.getSolrClient().getZkStateReader(), log.isDebugEnabled(), true, 30);
- AbstractDistribZkTestBase.assertAllActive(collection, cluster.getSolrClient().getZkStateReader());
+ // note: NUM_SHARDS may be too small because of shard split, but that's okay?
+ ZkStateReader zkStateReader = cluster.getSolrClient().getZkStateReader();
+ AbstractFullDistribZkTestBase.waitForCollection(zkStateReader, collection, NUM_SHARDS);
+ AbstractDistribZkTestBase.waitForRecoveriesToFinish(collection, zkStateReader, log.isDebugEnabled(), true, 30);
+ AbstractDistribZkTestBase.assertAllActive(collection, zkStateReader);
+ assertFalse(zkStateReader.getClusterState().getCollection(collection).getActiveSlices().isEmpty());
}
}