You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2020/04/09 15:21:42 UTC
[lucene-solr] branch jira/solr-12847 updated: SOLR-12847: Improved
rollback after failure, and the corresponding test.
This is an automated email from the ASF dual-hosted git repository.
ab pushed a commit to branch jira/solr-12847
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/jira/solr-12847 by this push:
new bd8a0d6 SOLR-12847: Improved rollback after failure, and the corresponding test.
bd8a0d6 is described below
commit bd8a0d638d719e4e3d5a9e64ec9fd6473c4ea723
Author: Andrzej Bialecki <ab...@apache.org>
AuthorDate: Thu Apr 9 17:20:49 2020 +0200
SOLR-12847: Improved rollback after failure, and the corresponding test.
---
.../cloud/api/collections/CreateCollectionCmd.java | 16 +++++++++++++---
.../src/java/org/apache/solr/util/TestInjection.java | 19 +++++++++++++++++++
.../solr/cloud/autoscaling/TestPolicyCloud.java | 16 ++++++++++++++++
3 files changed, 48 insertions(+), 3 deletions(-)
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/CreateCollectionCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/CreateCollectionCmd.java
index e686f25..0ded334 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/CreateCollectionCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/CreateCollectionCmd.java
@@ -72,6 +72,7 @@ import org.apache.solr.common.util.Utils;
import org.apache.solr.handler.admin.ConfigSetsHandlerApi;
import org.apache.solr.handler.component.ShardHandler;
import org.apache.solr.handler.component.ShardRequest;
+import org.apache.solr.util.TestInjection;
import org.apache.solr.util.TimeOut;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
@@ -312,6 +313,7 @@ public class CreateCollectionCmd implements OverseerCollectionMessageHandler.Cmd
}
shardRequestTracker.processResponses(results, shardHandler, false, null, Collections.emptySet());
+ TestInjection.injectCollectionCreateFailure();
failure = results.get("failure") != null && ((SimpleOrderedMap)results.get("failure")).size() > 0;
if (failure) {
// Let's cleanup as we hit an exception
@@ -355,8 +357,10 @@ public class CreateCollectionCmd implements OverseerCollectionMessageHandler.Cmd
}
} catch (SolrException ex) {
+ failure = true;
throw ex;
} catch (Exception ex) {
+ failure = true;
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, null, ex);
} finally {
if (sessionWrapper.get() != null) sessionWrapper.get().release();
@@ -366,9 +370,15 @@ public class CreateCollectionCmd implements OverseerCollectionMessageHandler.Cmd
}
}
- public static void restoreAutoScalingConfig(SolrCloudManager cloudManager, AutoScalingConfig config) throws IOException, InterruptedException {
+ public static void restoreAutoScalingConfig(SolrCloudManager cloudManager, AutoScalingConfig configToRestore) throws IOException, InterruptedException {
try {
- cloudManager.getDistribStateManager().setData(SOLR_AUTOSCALING_CONF_PATH, Utils.toJSON(config), config.getZkVersion());
+ // check that only we updated the config
+ AutoScalingConfig currentConfig = cloudManager.getDistribStateManager().getAutoScalingConfig();
+ if (currentConfig.getZkVersion() == configToRestore.getZkVersion() + 1) {
+ cloudManager.getDistribStateManager().setData(SOLR_AUTOSCALING_CONF_PATH, Utils.toJSON(configToRestore), currentConfig.getZkVersion());
+ } else {
+ log.warn("Cannot restore previous autoscaling config, someone else already modified it.");
+ }
} catch (BadVersionException | KeeperException e) {
log.warn("Error restoring autoscaling config", e);
}
@@ -524,7 +534,7 @@ public class CreateCollectionCmd implements OverseerCollectionMessageHandler.Cmd
Policy.Session session = modifiedConfig.getPolicy().createSession(cloudManager);
// persist the modified config
try {
- cloudManager.getDistribStateManager().setData(SOLR_AUTOSCALING_CONF_PATH, Utils.toJSON(modifiedConfig), -1);
+ cloudManager.getDistribStateManager().setData(SOLR_AUTOSCALING_CONF_PATH, Utils.toJSON(modifiedConfig), initialConfig.getZkVersion());
configToRestore.set(initialConfig);
} catch (KeeperException | BadVersionException e) {
throw new IOException("Error adding " + MAX_SHARDS_PER_NODE + " policy rule for collection " + docCollection.getName(), e);
diff --git a/solr/core/src/java/org/apache/solr/util/TestInjection.java b/solr/core/src/java/org/apache/solr/util/TestInjection.java
index eb04f4e..fe6f3af 100644
--- a/solr/core/src/java/org/apache/solr/util/TestInjection.java
+++ b/solr/core/src/java/org/apache/solr/util/TestInjection.java
@@ -132,6 +132,8 @@ public class TestInjection {
public volatile static String reindexFailure = null;
+ public volatile static String collectionCreateFailure = null;
+
public volatile static String failIndexFingerprintRequests = null;
public volatile static String wrongIndexFingerprint = null;
@@ -178,6 +180,7 @@ public class TestInjection {
directUpdateLatch = null;
reindexLatch = null;
reindexFailure = null;
+ collectionCreateFailure = null;
prepRecoveryOpPauseForever = null;
countPrepRecoveryOpPauseForever = new AtomicInteger(0);
failIndexFingerprintRequests = null;
@@ -504,6 +507,22 @@ public class TestInjection {
return true;
}
+ public static boolean injectCollectionCreateFailure() {
+ if (collectionCreateFailure != null) {
+ Random rand = random();
+ if (null == rand) return true;
+
+ Pair<Boolean,Integer> pair = parseValue(collectionCreateFailure);
+ boolean enabled = pair.first();
+ int chanceIn100 = pair.second();
+ if (enabled && rand.nextInt(100) >= (100 - chanceIn100)) {
+ log.info("Test injection failure");
+ throw new SolrException(ErrorCode.SERVER_ERROR, "Test injection failure");
+ }
+ }
+ return true;
+ }
+
private static Pair<Boolean,Integer> parseValue(final String raw) {
if (raw == null) return new Pair<>(false, 0);
Matcher m = ENABLED_PERCENT.matcher(raw);
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/TestPolicyCloud.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/TestPolicyCloud.java
index bc086c8..59ccdd2 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/TestPolicyCloud.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/TestPolicyCloud.java
@@ -60,6 +60,7 @@ import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.TimeSource;
import org.apache.solr.common.util.Utils;
+import org.apache.solr.util.TestInjection;
import org.apache.solr.util.TimeOut;
import org.junit.After;
import org.junit.BeforeClass;
@@ -86,6 +87,7 @@ public class TestPolicyCloud extends SolrCloudTestCase {
@After
public void after() throws Exception {
+ TestInjection.reset();
cluster.deleteAllCollections();
cluster.getSolrClient().getZkStateReader().getZkClient().setData(ZkStateReader.SOLR_AUTOSCALING_CONF_PATH,
"{}".getBytes(StandardCharsets.UTF_8), true);
@@ -599,6 +601,20 @@ public class TestPolicyCloud extends SolrCloudTestCase {
policies = autoScalingConfig.getPolicy().getPolicies();
assertNull("auto-create policy still exists after collection has been deleted: " + policies, policies.get(policyName));
+ // test the cleanup after failed creation
+ TestInjection.collectionCreateFailure = "true:100";
+ try {
+ CollectionAdminRequest.createCollection(collectionName, "conf", 2, 2)
+ .setMaxShardsPerNode(1)
+ .process(cluster.getSolrClient());
+ fail("should have failed due to injection failure");
+ } catch (Exception e) {
+ assertTrue(e.toString().contains("injection failure"));
+ autoScalingConfig = cluster.getSolrClient().getZkStateReader().getAutoScalingConfig();
+ policies = autoScalingConfig.getPolicy().getPolicies();
+ assertNull("auto-create policy still exists after collection has been deleted: " + policies, policies.get(policyName));
+ }
+
}
}