You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by er...@apache.org on 2020/11/01 00:17:41 UTC
[lucene-solr] branch branch_8x updated: SOLR-14969: Prevent
creating multiple cores with the same name which leads to instabilities
(race condition)
This is an automated email from the ASF dual-hosted git repository.
erick pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/branch_8x by this push:
new b27c8b9 SOLR-14969: Prevent creating multiple cores with the same name which leads to instabilities (race condition)
b27c8b9 is described below
commit b27c8b90213cd388a9aa79deec7be753d10c9bb9
Author: Erick Erickson <Er...@gmail.com>
AuthorDate: Sat Oct 31 19:33:34 2020 -0400
SOLR-14969: Prevent creating multiple cores with the same name which leads to instabilities (race condition)
---
solr/CHANGES.txt | 3 +
.../java/org/apache/solr/core/CoreContainer.java | 133 ++++++++++++---------
.../org/apache/solr/core/TestCoreContainer.java | 103 ++++++++++++++--
3 files changed, 175 insertions(+), 64 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index d2c8bff..0fe9149 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -30,6 +30,9 @@ Bug Fixes
* SOLR-14940: ReplicationHandler memory leak through SolrCore.closeHooks with unstable ZK connection. (Anver Sotnikov, Mike Drob)
+* SOLR-14969: Prevent creating multiple cores with the same name which leads to instabilities (race condition)
+ (Erick Erickson, Andreas Hubold)
+
Other Changes
---------------------
diff --git a/solr/core/src/java/org/apache/solr/core/CoreContainer.java b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
index a0f8c11..1ae652b 100644
--- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java
+++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
@@ -29,6 +29,7 @@ import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
@@ -1261,6 +1262,7 @@ public class CoreContainer {
return create(coreName, cfg.getCoreRootDirectory().resolve(coreName), parameters, false);
}
+ Set<String> inFlightCreations = new HashSet<>(); // See SOLR-14969
/**
* Creates a new core in a specified instance directory, publishing the core state to the cluster
*
@@ -1270,80 +1272,96 @@ public class CoreContainer {
* @return the newly created core
*/
public SolrCore create(String coreName, Path instancePath, Map<String, String> parameters, boolean newCollection) {
+ SolrCore core = null;
+ boolean iAdded = false;
+ try {
+ synchronized (inFlightCreations) {
+ if (inFlightCreations.add(coreName)) {
+ iAdded = true;
+ } else {
+ String msg = "Already creating a core with name '" + coreName + "', call aborted '";
+ log.warn(msg);
+ throw new SolrException(ErrorCode.SERVER_ERROR, msg);
+ }
+ }
+ CoreDescriptor cd = new CoreDescriptor(coreName, instancePath, parameters, getContainerProperties(), getZkController());
- CoreDescriptor cd = new CoreDescriptor(coreName, instancePath, parameters, getContainerProperties(), getZkController());
-
- // TODO: There's a race here, isn't there?
- // Since the core descriptor is removed when a core is unloaded, it should never be anywhere when a core is created.
- if (getAllCoreNames().contains(coreName)) {
- log.warn("Creating a core with existing name is not allowed");
- // TODO: Shouldn't this be a BAD_REQUEST?
- throw new SolrException(ErrorCode.SERVER_ERROR, "Core with name '" + coreName + "' already exists.");
- }
+ // Since the core descriptor is removed when a core is unloaded, it should never be anywhere when a core is created.
+ if (getAllCoreNames().contains(coreName)) {
+ log.warn("Creating a core with existing name is not allowed: '{}'", coreName);
+ // TODO: Shouldn't this be a BAD_REQUEST?
+ throw new SolrException(ErrorCode.SERVER_ERROR, "Core with name '" + coreName + "' already exists.");
+ }
- // Validate paths are relative to known locations to avoid path traversal
- assertPathAllowed(cd.getInstanceDir());
- assertPathAllowed(Paths.get(cd.getDataDir()));
+ // Validate paths are relative to known locations to avoid path traversal
+ assertPathAllowed(cd.getInstanceDir());
+ assertPathAllowed(Paths.get(cd.getDataDir()));
- boolean preExisitingZkEntry = false;
- try {
- if (getZkController() != null) {
+ boolean preExisitingZkEntry = false;
+ try {
+ if (getZkController() != null) {
if (!Overseer.isLegacy(getZkController().getZkStateReader())) {
if (cd.getCloudDescriptor().getCoreNodeName() == null) {
throw new SolrException(ErrorCode.SERVER_ERROR, "non legacy mode coreNodeName missing " + parameters.toString());
}
+ }
+ preExisitingZkEntry = getZkController().checkIfCoreNodeNameAlreadyExists(cd);
}
- preExisitingZkEntry = getZkController().checkIfCoreNodeNameAlreadyExists(cd);
- }
- // Much of the logic in core handling pre-supposes that the core.properties file already exists, so create it
- // first and clean it up if there's an error.
- coresLocator.create(this, cd);
-
- SolrCore core = null;
- try {
- solrCores.waitAddPendingCoreOps(cd.getName());
- core = createFromDescriptor(cd, true, newCollection);
- coresLocator.persist(this, cd); // Write out the current core properties in case anything changed when the core was created
- } finally {
- solrCores.removeFromPendingOps(cd.getName());
- }
+ // Much of the logic in core handling pre-supposes that the core.properties file already exists, so create it
+ // first and clean it up if there's an error.
+ coresLocator.create(this, cd);
- return core;
- } catch (Exception ex) {
- // First clean up any core descriptor, there should never be an existing core.properties file for any core that
- // failed to be created on-the-fly.
- coresLocator.delete(this, cd);
- if (isZooKeeperAware() && !preExisitingZkEntry) {
try {
- getZkController().unregister(coreName, cd);
- } catch (InterruptedException e) {
- Thread.currentThread().interrupt();
- SolrException.log(log, null, e);
- } catch (KeeperException e) {
- SolrException.log(log, null, e);
- } catch (Exception e) {
- SolrException.log(log, null, e);
+ solrCores.waitAddPendingCoreOps(cd.getName());
+ core = createFromDescriptor(cd, true, newCollection);
+ coresLocator.persist(this, cd); // Write out the current core properties in case anything changed when the core was created
+ } finally {
+ solrCores.removeFromPendingOps(cd.getName());
}
- }
- Throwable tc = ex;
- Throwable c = null;
- do {
- tc = tc.getCause();
- if (tc != null) {
- c = tc;
+ return core;
+ } catch (Exception ex) {
+ // First clean up any core descriptor, there should never be an existing core.properties file for any core that
+ // failed to be created on-the-fly.
+ coresLocator.delete(this, cd);
+ if (isZooKeeperAware() && !preExisitingZkEntry) {
+ try {
+ getZkController().unregister(coreName, cd);
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ SolrException.log(log, null, e);
+ } catch (KeeperException e) {
+ SolrException.log(log, null, e);
+ } catch (Exception e) {
+ SolrException.log(log, null, e);
+ }
}
- } while (tc != null);
- String rootMsg = "";
- if (c != null) {
- rootMsg = " Caused by: " + c.getMessage();
- }
+ Throwable tc = ex;
+ Throwable c = null;
+ do {
+ tc = tc.getCause();
+ if (tc != null) {
+ c = tc;
+ }
+ } while (tc != null);
+
+ String rootMsg = "";
+ if (c != null) {
+ rootMsg = " Caused by: " + c.getMessage();
+ }
- throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
- "Error CREATEing SolrCore '" + coreName + "': " + ex.getMessage() + rootMsg, ex);
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+ "Error CREATEing SolrCore '" + coreName + "': " + ex.getMessage() + rootMsg, ex);
+ }
+ } finally {
+ synchronized (inFlightCreations) {
+ if (iAdded) {
+ inFlightCreations.remove(coreName);
+ }
+ }
}
}
@@ -1798,6 +1816,7 @@ public class CoreContainer {
}
if (cd == null) {
+ log.warn("Cannot unload non-existent core '{}'", name);
throw new SolrException(ErrorCode.BAD_REQUEST, "Cannot unload non-existent core [" + name + "]");
}
diff --git a/solr/core/src/test/org/apache/solr/core/TestCoreContainer.java b/solr/core/src/test/org/apache/solr/core/TestCoreContainer.java
index 06914b8..5852764 100644
--- a/solr/core/src/test/org/apache/solr/core/TestCoreContainer.java
+++ b/solr/core/src/test/org/apache/solr/core/TestCoreContainer.java
@@ -20,6 +20,7 @@ import java.io.File;
import java.io.FileOutputStream;
import java.nio.file.Path;
import java.nio.file.Paths;
+
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
@@ -119,22 +120,27 @@ public class TestCoreContainer extends SolrTestCaseJ4 {
}
}
+ private static class TestReloadThread extends Thread {
+ private final CoreContainer cc;
+ TestReloadThread(CoreContainer cc) {
+ this.cc = cc;
+ }
+ @Override
+ public void run() {
+ cc.reload("core1");
+ }
+ }
+
@Test
public void testReloadThreaded() throws Exception {
final CoreContainer cc = init(CONFIGSETS_SOLR_XML);
cc.create("core1", ImmutableMap.of("configSet", "minimal"));
- class TestThread extends Thread {
- @Override
- public void run() {
- cc.reload("core1");
- }
- }
List<Thread> threads = new ArrayList<>();
int numThreads = 4;
for (int i = 0; i < numThreads; i++) {
- threads.add(new TestThread());
+ threads.add(new TestReloadThread(cc));
}
for (Thread thread : threads) {
@@ -149,6 +155,89 @@ public class TestCoreContainer extends SolrTestCaseJ4 {
}
+ private static class TestCreateThread extends Thread {
+ final CoreContainer cc;
+ final String coreName;
+ boolean foundExpectedError = false;
+ SolrCore core = null;
+
+ TestCreateThread(CoreContainer cc, String coreName) {
+ this.cc = cc;
+ this.coreName = coreName;
+ }
+ @Override
+ public void run() {
+ try {
+ core = cc.create(coreName, ImmutableMap.of("configSet", "minimal"));
+ } catch (SolrException e) {
+ String msg = e.getMessage();
+ foundExpectedError = msg.contains("Already creating a core with name") || msg.contains("already exists");
+ }
+ }
+
+ int verifyMe() {
+ if (foundExpectedError) {
+ assertNull("failed create should have returned null for core", core);
+ return 0;
+ } else {
+ assertNotNull("Core should not be null if there was no error", core);
+ return 1;
+ }
+ }
+ }
+
+ @Test
+ public void testCreateThreaded() throws Exception {
+ final CoreContainer cc = init(CONFIGSETS_SOLR_XML);
+ final int NUM_THREADS = 3;
+
+
+ // Try this a few times to increase the chances of failure.
+ for (int idx = 0; idx < 3; ++idx) {
+ TestCreateThread[] threads = new TestCreateThread[NUM_THREADS];
+ // Let's add a little stress in here by using the same core name each
+ // time around. The final unload in the loop should delete the core and
+ // allow the next time around to succeed.
+ // This also checks the bookkeeping in CoreContainer.create
+ // that prevents muliple simulatneous creations,
+ // currently "inFlightCreations"
+ String testName = "coreToTest";
+ for (int thread = 0; thread < NUM_THREADS; ++thread) {
+ threads[thread] = new TestCreateThread(cc, testName);
+ }
+ // only one of these should succeed.
+ for (int thread = 0; thread < NUM_THREADS; ++thread) {
+ threads[thread].start();
+ }
+
+ for (int thread = 0; thread < NUM_THREADS; ++thread) {
+ threads[thread].join();
+ }
+
+ // We can't guarantee that which thread failed, so go find it
+ int goodCount = 0;
+ for (int thread = 0; thread < NUM_THREADS; ++thread) {
+ goodCount += threads[thread].verifyMe();
+ }
+ assertEquals("Only one create should have succeeded", 1, goodCount);
+
+
+ // Check bookkeeping by removing and creating the core again, making sure
+ // we didn't leave the record of trying to create this core around.
+ // NOTE: unloading the core closes it too.
+ cc.unload(testName, true, true, true);
+ cc.create(testName, ImmutableMap.of("configSet", "minimal"));
+ // This call should fail with a different error because the core was
+ // created successfully.
+ SolrException thrown = expectThrows(SolrException.class, () ->
+ cc.create(testName, ImmutableMap.of("configSet", "minimal")));
+ assertTrue("Should have 'already exists' error", thrown.getMessage().contains("already exists"));
+
+ cc.unload(testName, true, true, true);
+ }
+ cc.shutdown();
+ }
+
@Test
public void testNoCores() throws Exception {