You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2015/09/03 17:07:24 UTC
svn commit: r1701043 - in /lucene/dev/trunk/solr: CHANGES.txt
core/src/java/org/apache/solr/update/DefaultSolrCoreState.java
core/src/java/org/apache/solr/update/DirectUpdateHandler2.java
core/src/java/org/apache/solr/update/SolrCoreState.java
Author: yonik
Date: Thu Sep 3 15:07:24 2015
New Revision: 1701043
URL: http://svn.apache.org/r1701043
Log:
SOLR-7836: change SolrCoreState to use a reentrant locking implementation to fix deadlocks
Modified:
lucene/dev/trunk/solr/CHANGES.txt
lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/SolrCoreState.java
Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1701043&r1=1701042&r2=1701043&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Thu Sep 3 15:07:24 2015
@@ -153,7 +153,7 @@ Bug Fixes
whitelist valid uses of currentTimeMillis (Ramkumar Aiyengar)
* SOLR-7836: Possible deadlock when closing refcounted index writers.
- (Jessica Cheng Mallet, Erick Erickson)
+ (Jessica Cheng Mallet, Erick Erickson, Mark Miller, yonik)
* SOLR-7869: Overseer does not handle BadVersionException correctly and, in some cases,
can go into an infinite loop if cluster state in ZooKeeper is modified externally.
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java?rev=1701043&r1=1701042&r2=1701043&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java Thu Sep 3 15:07:24 2015
@@ -18,8 +18,10 @@ package org.apache.solr.update;
*/
import java.io.IOException;
+import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
import org.apache.lucene.index.IndexWriter;
import org.apache.solr.cloud.RecoveryStrategy;
@@ -45,10 +47,11 @@ public final class DefaultSolrCoreState
private final ActionThrottle recoveryThrottle = new ActionThrottle("recovery", 10000);
private final ActionThrottle leaderThrottle = new ActionThrottle("leader", 5000);
-
- // protects pauseWriter and writerFree
- private final Object writerPauseLock = new Object();
-
+
+ // Use the readLock to retrieve the current IndexWriter (may be lazily opened)
+ // Use the writeLock for changing index writers
+ private final ReentrantReadWriteLock iwLock = new ReentrantReadWriteLock();
+
private SolrIndexWriter indexWriter = null;
private DirectoryFactory directoryFactory;
@@ -57,9 +60,6 @@ public final class DefaultSolrCoreState
private volatile boolean lastReplicationSuccess = true;
private RefCounted<IndexWriter> refCntWriter;
-
- private boolean pauseWriter;
- private boolean writerFree = true;
protected final ReentrantLock commitLock = new ReentrantLock();
@@ -86,189 +86,141 @@ public final class DefaultSolrCoreState
@Override
public RefCounted<IndexWriter> getIndexWriter(SolrCore core)
throws IOException {
- synchronized (writerPauseLock) {
- if (closed) {
- throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "SolrCoreState already closed");
- }
-
- while (pauseWriter) {
- try {
- writerPauseLock.wait(100);
- } catch (InterruptedException e) {}
-
- if (closed) {
- throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "Already closed");
- }
- }
- if (core == null) {
- // core == null is a signal to just return the current writer, or null
- // if none.
- initRefCntWriter();
- if (refCntWriter == null) return null;
- } else {
- if (indexWriter == null) {
- indexWriter = createMainIndexWriter(core, "DirectUpdateHandler2");
- }
- initRefCntWriter();
+ boolean succeeded = false;
+ lock(iwLock.readLock());
+ try {
+ // Multiple readers may be executing this, but we only want one to open the writer on demand.
+ synchronized (this) {
+ if (core == null) {
+ // core == null is a signal to just return the current writer, or null if none.
+ initRefCntWriter();
+ if (refCntWriter == null) return null;
+ } else {
+ if (indexWriter == null) {
+ indexWriter = createMainIndexWriter(core, "DirectUpdateHandler2");
+ }
+ initRefCntWriter();
+ }
+
+ refCntWriter.incref();
+ succeeded = true; // the returned RefCounted<IndexWriter> will release the readLock on a decref()
+ return refCntWriter;
+ }
+
+ } finally {
+ // if we failed to return the IW for some other reason, we should unlock.
+ if (!succeeded) {
+ iwLock.readLock().unlock();
}
-
- writerFree = false;
- refCntWriter.incref();
- writerPauseLock.notifyAll();
- return refCntWriter;
-
}
+
}
private void initRefCntWriter() {
+ // TODO: since we moved to a read-write lock, and don't rely on the count to close the writer, we don't really
+ // need this class any more. It could also be a singleton created at the same time as SolrCoreState
+ // or we could change the API of SolrCoreState to just return the writer and then add a releaseWriter() call.
if (refCntWriter == null && indexWriter != null) {
refCntWriter = new RefCounted<IndexWriter>(indexWriter) {
+
+ @Override
+ public void decref() {
+ iwLock.readLock().unlock();
+ super.decref(); // This is now redundant (since we switched to read-write locks), we don't really need to maintain our own reference count.
+ }
+
@Override
public void close() {
- synchronized (writerPauseLock) {
- writerFree = true;
- writerPauseLock.notifyAll();
- }
+ // We rely on other code to actually close the IndexWriter, and there's nothing special to do when the ref count hits 0
}
};
}
}
- @Override
- public synchronized void newIndexWriter(SolrCore core, boolean rollback) throws IOException {
- log.info("Creating new IndexWriter...");
- String coreName = core.getName();
- synchronized (writerPauseLock) {
- if (closed) {
- throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "Already closed");
- }
-
- // we need to wait for the Writer to fall out of use
- // first lets stop it from being lent out
- pauseWriter = true;
- // then lets wait until it's out of use
- log.info("Waiting until IndexWriter is unused... core=" + coreName);
-
+ // acquires the lock or throws an exception if the CoreState has been closed.
+ private void lock(Lock lock) {
+ boolean acquired = false;
+ do {
try {
- while (!writerFree) {
- try {
- writerPauseLock.wait(100);
- } catch (InterruptedException e) {
- }
- if (closed) {
- throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "SolrCoreState already closed");
- }
- }
+ acquired = lock.tryLock(100, TimeUnit.MILLISECONDS);
+ } catch (InterruptedException e) {
+ log.warn("WARNING - Dangerous interrupt", e);
+ }
- if (indexWriter != null) {
- if (!rollback) {
- try {
- log.info("Closing old IndexWriter... core=" + coreName);
- indexWriter.close();
- } catch (Exception e) {
- SolrException.log(log, "Error closing old IndexWriter. core="
- + coreName, e);
- }
- } else {
- try {
- log.info("Rollback old IndexWriter... core=" + coreName);
- indexWriter.rollback();
- } catch (Exception e) {
- SolrException.log(log, "Error rolling back old IndexWriter. core="
- + coreName, e);
- }
- }
+ // even if we failed to acquire, check if we are closed
+ if (closed) {
+ if (acquired) {
+ lock.unlock();
}
- indexWriter = createMainIndexWriter(core, "DirectUpdateHandler2");
- log.info("New IndexWriter is ready to be used.");
- // we need to null this so it picks up the new writer next get call
- refCntWriter = null;
- } finally {
- pauseWriter = false;
- writerPauseLock.notifyAll();
+ throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "SolrCoreState already closed.");
}
- }
+ } while (!acquired);
}
-
- @Override
- public synchronized void closeIndexWriter(SolrCore core, boolean rollback)
- throws IOException {
- log.info("Closing IndexWriter...");
+
+ // closes and opens index writers without any locking
+ private void changeWriter(SolrCore core, boolean rollback, boolean openNewWriter) throws IOException {
String coreName = core.getName();
- synchronized (writerPauseLock) {
- if (closed) {
- throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "Already closed");
- }
-
- // we need to wait for the Writer to fall out of use
- // first lets stop it from being lent out
- pauseWriter = true;
- // then lets wait until it's out of use
- log.info("Waiting until IndexWriter is unused... core=" + coreName);
- try {
- while (!writerFree) {
- try {
- writerPauseLock.wait(100);
- } catch (InterruptedException e) {
- }
- if (closed) {
- throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE,
- "SolrCoreState already closed");
- }
- }
+ // We need to null this so it picks up the new writer next get call.
+ // We do this before anything else in case we hit an exception.
+ refCntWriter = null;
+ IndexWriter iw = indexWriter; // temp reference just for closing
+ indexWriter = null; // null this out now in case we fail, so we won't use the writer again
- if (indexWriter != null) {
- if (!rollback) {
- try {
- log.info("Closing old IndexWriter... core=" + coreName);
- indexWriter.close();
- } catch (Exception e) {
- SolrException.log(log, "Error closing old IndexWriter. core="
- + coreName, e);
- }
- } else {
- try {
- log.info("Rollback old IndexWriter... core=" + coreName);
- indexWriter.rollback();
- } catch (Exception e) {
- SolrException.log(log, "Error rolling back old IndexWriter. core="
- + coreName, e);
- }
- }
+ if (iw != null) {
+ if (!rollback) {
+ try {
+ log.info("Closing old IndexWriter... core=" + coreName);
+ iw.close();
+ } catch (Exception e) {
+ SolrException.log(log, "Error closing old IndexWriter. core=" + coreName, e);
+ }
+ } else {
+ try {
+ log.info("Rollback old IndexWriter... core=" + coreName);
+ iw.rollback();
+ } catch (Exception e) {
+ SolrException.log(log, "Error rolling back old IndexWriter. core=" + coreName, e);
}
- } finally {
- pauseWriter = false;
- writerPauseLock.notifyAll();
}
}
+ if (openNewWriter) {
+ indexWriter = createMainIndexWriter(core, "DirectUpdateHandler2");
+ log.info("New IndexWriter is ready to be used.");
+ }
}
-
+
@Override
- public synchronized void openIndexWriter(SolrCore core) throws IOException {
- log.info("Creating new IndexWriter...");
- synchronized (writerPauseLock) {
- if (closed) {
- throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "Already closed");
- }
-
- try {
- indexWriter = createMainIndexWriter(core, "DirectUpdateHandler2");
- log.info("New IndexWriter is ready to be used.");
- // we need to null this so it picks up the new writer next get call
- refCntWriter = null;
- } finally {
- pauseWriter = false;
- writerPauseLock.notifyAll();
- }
+ public void newIndexWriter(SolrCore core, boolean rollback) throws IOException {
+ lock(iwLock.writeLock());
+ try {
+ changeWriter(core, rollback, true);
+ } finally {
+ iwLock.writeLock().unlock();
+ }
+ }
+
+ @Override
+ public void closeIndexWriter(SolrCore core, boolean rollback) throws IOException {
+ lock(iwLock.writeLock());
+ changeWriter(core, rollback, false);
+ // Do not unlock the writeLock in this method. It will be unlocked by the openIndexWriter call (see base class javadoc)
+ }
+
+ @Override
+ public void openIndexWriter(SolrCore core) throws IOException {
+ try {
+ changeWriter(core, false, true);
+ } finally {
+ iwLock.writeLock().unlock(); //unlock even if we failed
}
}
@Override
- public synchronized void rollbackIndexWriter(SolrCore core) throws IOException {
- newIndexWriter(core, true);
+ public void rollbackIndexWriter(SolrCore core) throws IOException {
+ changeWriter(core, true, true);
}
protected SolrIndexWriter createMainIndexWriter(SolrCore core, String name) throws IOException {
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java?rev=1701043&r1=1701042&r2=1701043&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java Thu Sep 3 15:07:24 2015
@@ -238,12 +238,12 @@ public class DirectUpdateHandler2 extend
} else {
writer.addDocument(cmd.getLuceneDocument());
}
+ if (ulog != null) ulog.add(cmd);
+
} finally {
iw.decref();
}
- synchronized (solrCoreState.getUpdateLock()) {
- if (ulog != null) ulog.add(cmd);
- }
+
}
private void doNormalUpdate(AddUpdateCommand cmd) throws IOException {
@@ -278,20 +278,23 @@ public class DirectUpdateHandler2 extend
bq.add(new BooleanClause(new TermQuery(idTerm), Occur.MUST));
writer.deleteDocuments(new DeleteByQueryWrapper(bq.build(), core.getLatestSchema()));
}
+
+
+ // Add to the transaction log *after* successfully adding to the
+ // index, if there was no error.
+ // This ordering ensures that if we log it, it's definitely been
+ // added to the the index.
+ // This also ensures that if a commit sneaks in-between, that we
+ // know everything in a particular
+ // log version was definitely committed.
+ if (ulog != null) ulog.add(cmd);
+
} finally {
iw.decref();
}
- // Add to the transaction log *after* successfully adding to the
- // index, if there was no error.
- // This ordering ensures that if we log it, it's definitely been
- // added to the the index.
- // This also ensures that if a commit sneaks in-between, that we
- // know everything in a particular
- // log version was definitely committed.
- synchronized (solrCoreState.getUpdateLock()) {
- if (ulog != null) ulog.add(cmd);
- }
+
+
}
private void addAndDelete(AddUpdateCommand cmd, List<UpdateLog.DBQ> deletesAfter) throws IOException {
@@ -323,13 +326,12 @@ public class DirectUpdateHandler2 extend
for (Query q : dbqList) {
writer.deleteDocuments(new DeleteByQueryWrapper(q, core.getLatestSchema()));
}
+ if (ulog != null) ulog.add(cmd, true); // this needs to be protected by update lock
}
} finally {
iw.decref();
}
- synchronized (solrCoreState.getUpdateLock()) {
- if (ulog != null) ulog.add(cmd, true);
- }
+
}
private void updateDeleteTrackers(DeleteUpdateCommand cmd) {
@@ -434,6 +436,9 @@ public class DirectUpdateHandler2 extend
// part of a commit. DBQ needs to signal that a fresh reader will be needed for
// a realtime view of the index. When a new searcher is opened after a DBQ, that
// flag can be cleared. If those thing happen concurrently, it's not thread safe.
+ // Also, ulog.deleteByQuery clears caches and is thus not safe to be called between
+ // preSoftCommit/postSoftCommit and thus we use the updateLock to prevent this (just
+ // as we use around ulog.preCommit... also see comments in ulog.postSoftCommit)
//
synchronized (solrCoreState.getUpdateLock()) {
if (delAll) {
@@ -447,7 +452,7 @@ public class DirectUpdateHandler2 extend
}
}
- if (ulog != null) ulog.deleteByQuery(cmd);
+ if (ulog != null) ulog.deleteByQuery(cmd); // this needs to be protected by the update lock
}
madeIt = true;
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/SolrCoreState.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/SolrCoreState.java?rev=1701043&r1=1701042&r2=1701043&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/SolrCoreState.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/SolrCoreState.java Thu Sep 3 15:07:24 2015
@@ -39,10 +39,10 @@ public abstract class SolrCoreState {
public static Logger log = LoggerFactory.getLogger(SolrCoreState.class);
protected boolean closed = false;
- private final Object deleteLock = new Object();
+ private final Object updateLock = new Object();
public Object getUpdateLock() {
- return deleteLock;
+ return updateLock;
}
private int solrCoreStateRefCnt = 1;