You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2020/04/09 15:21:42 UTC

[lucene-solr] branch jira/solr-12847 updated: SOLR-12847: Improved rollback after failure, and the corresponding test.

This is an automated email from the ASF dual-hosted git repository.

ab pushed a commit to branch jira/solr-12847
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/jira/solr-12847 by this push:
     new bd8a0d6  SOLR-12847: Improved rollback after failure, and the corresponding test.
bd8a0d6 is described below

commit bd8a0d638d719e4e3d5a9e64ec9fd6473c4ea723
Author: Andrzej Bialecki <ab...@apache.org>
AuthorDate: Thu Apr 9 17:20:49 2020 +0200

    SOLR-12847: Improved rollback after failure, and the corresponding test.
---
 .../cloud/api/collections/CreateCollectionCmd.java    | 16 +++++++++++++---
 .../src/java/org/apache/solr/util/TestInjection.java  | 19 +++++++++++++++++++
 .../solr/cloud/autoscaling/TestPolicyCloud.java       | 16 ++++++++++++++++
 3 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/CreateCollectionCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/CreateCollectionCmd.java
index e686f25..0ded334 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/CreateCollectionCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/CreateCollectionCmd.java
@@ -72,6 +72,7 @@ import org.apache.solr.common.util.Utils;
 import org.apache.solr.handler.admin.ConfigSetsHandlerApi;
 import org.apache.solr.handler.component.ShardHandler;
 import org.apache.solr.handler.component.ShardRequest;
+import org.apache.solr.util.TestInjection;
 import org.apache.solr.util.TimeOut;
 import org.apache.zookeeper.CreateMode;
 import org.apache.zookeeper.KeeperException;
@@ -312,6 +313,7 @@ public class CreateCollectionCmd implements OverseerCollectionMessageHandler.Cmd
       }
 
       shardRequestTracker.processResponses(results, shardHandler, false, null, Collections.emptySet());
+      TestInjection.injectCollectionCreateFailure();
       failure = results.get("failure") != null && ((SimpleOrderedMap)results.get("failure")).size() > 0;
       if (failure) {
         // Let's cleanup as we hit an exception
@@ -355,8 +357,10 @@ public class CreateCollectionCmd implements OverseerCollectionMessageHandler.Cmd
       }
 
     } catch (SolrException ex) {
+      failure = true;
       throw ex;
     } catch (Exception ex) {
+      failure = true;
       throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, null, ex);
     } finally {
       if (sessionWrapper.get() != null) sessionWrapper.get().release();
@@ -366,9 +370,15 @@ public class CreateCollectionCmd implements OverseerCollectionMessageHandler.Cmd
     }
   }
 
-  public static void restoreAutoScalingConfig(SolrCloudManager cloudManager, AutoScalingConfig config) throws IOException, InterruptedException {
+  public static void restoreAutoScalingConfig(SolrCloudManager cloudManager, AutoScalingConfig configToRestore) throws IOException, InterruptedException {
     try {
-      cloudManager.getDistribStateManager().setData(SOLR_AUTOSCALING_CONF_PATH, Utils.toJSON(config), config.getZkVersion());
+      // check that only we updated the config
+      AutoScalingConfig currentConfig = cloudManager.getDistribStateManager().getAutoScalingConfig();
+      if (currentConfig.getZkVersion() == configToRestore.getZkVersion() + 1) {
+        cloudManager.getDistribStateManager().setData(SOLR_AUTOSCALING_CONF_PATH, Utils.toJSON(configToRestore), currentConfig.getZkVersion());
+      } else {
+        log.warn("Cannot restore previous autoscaling config, someone else already modified it.");
+      }
     } catch (BadVersionException | KeeperException e) {
       log.warn("Error restoring autoscaling config", e);
     }
@@ -524,7 +534,7 @@ public class CreateCollectionCmd implements OverseerCollectionMessageHandler.Cmd
     Policy.Session session = modifiedConfig.getPolicy().createSession(cloudManager);
     // persist the modified config
     try {
-      cloudManager.getDistribStateManager().setData(SOLR_AUTOSCALING_CONF_PATH, Utils.toJSON(modifiedConfig), -1);
+      cloudManager.getDistribStateManager().setData(SOLR_AUTOSCALING_CONF_PATH, Utils.toJSON(modifiedConfig), initialConfig.getZkVersion());
       configToRestore.set(initialConfig);
     } catch (KeeperException | BadVersionException e) {
       throw new IOException("Error adding " + MAX_SHARDS_PER_NODE + " policy rule for collection " + docCollection.getName(), e);
diff --git a/solr/core/src/java/org/apache/solr/util/TestInjection.java b/solr/core/src/java/org/apache/solr/util/TestInjection.java
index eb04f4e..fe6f3af 100644
--- a/solr/core/src/java/org/apache/solr/util/TestInjection.java
+++ b/solr/core/src/java/org/apache/solr/util/TestInjection.java
@@ -132,6 +132,8 @@ public class TestInjection {
 
   public volatile static String reindexFailure = null;
 
+  public volatile static String collectionCreateFailure = null;
+
   public volatile static String failIndexFingerprintRequests = null;
 
   public volatile static String wrongIndexFingerprint = null;
@@ -178,6 +180,7 @@ public class TestInjection {
     directUpdateLatch = null;
     reindexLatch = null;
     reindexFailure = null;
+    collectionCreateFailure = null;
     prepRecoveryOpPauseForever = null;
     countPrepRecoveryOpPauseForever = new AtomicInteger(0);
     failIndexFingerprintRequests = null;
@@ -504,6 +507,22 @@ public class TestInjection {
     return true;
   }
 
+  public static boolean injectCollectionCreateFailure() {
+    if (collectionCreateFailure != null) {
+      Random rand = random();
+      if (null == rand) return true;
+
+      Pair<Boolean,Integer> pair = parseValue(collectionCreateFailure);
+      boolean enabled = pair.first();
+      int chanceIn100 = pair.second();
+      if (enabled && rand.nextInt(100) >= (100 - chanceIn100)) {
+        log.info("Test injection failure");
+        throw new SolrException(ErrorCode.SERVER_ERROR, "Test injection failure");
+      }
+    }
+    return true;
+  }
+
   private static Pair<Boolean,Integer> parseValue(final String raw) {
     if (raw == null) return new Pair<>(false, 0);
     Matcher m = ENABLED_PERCENT.matcher(raw);
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/TestPolicyCloud.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/TestPolicyCloud.java
index bc086c8..59ccdd2 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/TestPolicyCloud.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/TestPolicyCloud.java
@@ -60,6 +60,7 @@ import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.TimeSource;
 import org.apache.solr.common.util.Utils;
+import org.apache.solr.util.TestInjection;
 import org.apache.solr.util.TimeOut;
 import org.junit.After;
 import org.junit.BeforeClass;
@@ -86,6 +87,7 @@ public class TestPolicyCloud extends SolrCloudTestCase {
 
   @After
   public void after() throws Exception {
+    TestInjection.reset();
     cluster.deleteAllCollections();
     cluster.getSolrClient().getZkStateReader().getZkClient().setData(ZkStateReader.SOLR_AUTOSCALING_CONF_PATH,
         "{}".getBytes(StandardCharsets.UTF_8), true);
@@ -599,6 +601,20 @@ public class TestPolicyCloud extends SolrCloudTestCase {
     policies = autoScalingConfig.getPolicy().getPolicies();
     assertNull("auto-create policy still exists after collection has been deleted: " + policies, policies.get(policyName));
 
+    // test the cleanup after failed creation
+    TestInjection.collectionCreateFailure = "true:100";
+    try {
+      CollectionAdminRequest.createCollection(collectionName, "conf", 2, 2)
+          .setMaxShardsPerNode(1)
+          .process(cluster.getSolrClient());
+      fail("should have failed due to injection failure");
+    } catch (Exception e) {
+      assertTrue(e.toString().contains("injection failure"));
+      autoScalingConfig = cluster.getSolrClient().getZkStateReader().getAutoScalingConfig();
+      policies = autoScalingConfig.getPolicy().getPolicies();
+      assertNull("auto-create policy still exists after collection has been deleted: " + policies, policies.get(policyName));
+    }
+
   }
 
 }