You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by mr...@apache.org on 2009/02/26 14:18:19 UTC
svn commit: r748135 - in
/jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene:
AbstractIndex.java CachingIndexReader.java DocId.java IndexMerger.java
Author: mreutegg
Date: Thu Feb 26 13:18:19 2009
New Revision: 748135
URL: http://svn.apache.org/viewvc?rev=748135&view=rev
Log:
JCR-1337: Optimize first execution queries for DescendantSelfAxisWeight/ChildAxisQuery
JCR-1884: CachingIndexReader.initializeParents() does not scale well with large indexes
Modified:
jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java
jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java
jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DocId.java
jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/IndexMerger.java
Modified: jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java?rev=748135&r1=748134&r2=748135&view=diff
==============================================================================
--- jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java (original)
+++ jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java Thu Feb 26 13:18:19 2009
@@ -232,10 +232,12 @@
* read-only, that is, any attempt to delete a document from the index
* will throw an <code>UnsupportedOperationException</code>.
*
+ * @param initCache if the caches in the index reader should be initialized
+ * before the index reader is returned.
* @return a read-only index reader.
* @throws IOException if an error occurs while obtaining the index reader.
*/
- synchronized ReadOnlyIndexReader getReadOnlyIndexReader()
+ synchronized ReadOnlyIndexReader getReadOnlyIndexReader(boolean initCache)
throws IOException {
// get current modifiable index reader
CommittableIndexReader modifiableReader = getIndexReader();
@@ -271,7 +273,8 @@
}
if (sharedReader == null) {
// create new shared reader
- CachingIndexReader cr = new CachingIndexReader(IndexReader.open(getDirectory()), cache);
+ CachingIndexReader cr = new CachingIndexReader(
+ IndexReader.open(getDirectory()), cache, initCache);
sharedReader = new SharedIndexReader(cr);
}
readOnlyReader = new ReadOnlyIndexReader(sharedReader, deleted, modCount);
@@ -280,6 +283,20 @@
}
/**
+ * Returns a read-only index reader, that can be used concurrently with
+ * other threads writing to this index. The returned index reader is
+ * read-only, that is, any attempt to delete a document from the index
+ * will throw an <code>UnsupportedOperationException</code>.
+ *
+ * @return a read-only index reader.
+ * @throws IOException if an error occurs while obtaining the index reader.
+ */
+ protected ReadOnlyIndexReader getReadOnlyIndexReader()
+ throws IOException {
+ return getReadOnlyIndexReader(false);
+ }
+
+ /**
* Returns an <code>IndexWriter</code> on this index.
* @return an <code>IndexWriter</code> on this index.
* @throws IOException if the writer cannot be obtained.
Modified: jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java
URL: http://svn.apache.org/viewvc/jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java?rev=748135&r1=748134&r2=748135&view=diff
==============================================================================
--- jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java (original)
+++ jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java Thu Feb 26 13:18:19 2009
@@ -22,11 +22,20 @@
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
+import org.apache.jackrabbit.uuid.UUID;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.BitSet;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.text.NumberFormat;
+
+import EDU.oswego.cs.dl.util.concurrent.Executor;
+import EDU.oswego.cs.dl.util.concurrent.PooledExecutor;
+import EDU.oswego.cs.dl.util.concurrent.LinkedQueue;
/**
* Implements an <code>IndexReader</code> that maintains caches to resolve
@@ -41,6 +50,17 @@
private static final Logger log = LoggerFactory.getLogger(CachingIndexReader.class);
/**
+ * The single thread of this executor initializes the
+ * {@link #parents} when background initialization is requested.
+ */
+ private static final Executor SERIAL_EXECUTOR = new PooledExecutor(
+ new LinkedQueue(), 1) {
+ {
+ setKeepAliveTime(500);
+ }
+ };
+
+ /**
* The current value of the global creation tick counter.
*/
private static long currentTick;
@@ -53,6 +73,11 @@
private final DocId[] parents;
/**
+ * Initializes the {@link #parents} cache.
+ */
+ private CacheInitializer cacheInitializer;
+
+ /**
* Tick when this index reader was created.
*/
private final long creationTick = getNextCreationTick();
@@ -69,11 +94,26 @@
* @param delegatee the base <code>IndexReader</code>.
* @param cache a document number cache, or <code>null</code> if not
* available to this reader.
- */
- CachingIndexReader(IndexReader delegatee, DocNumberCache cache) {
+ * @param initCache if the {@link #parents} cache should be initialized
+ * when this index reader is constructed. Otherwise
+ * initialization happens in a background thread.
+ */
+ CachingIndexReader(IndexReader delegatee,
+ DocNumberCache cache,
+ boolean initCache) {
super(delegatee);
this.cache = cache;
parents = new DocId[delegatee.maxDoc()];
+ this.cacheInitializer = new CacheInitializer(delegatee);
+ if (initCache) {
+ cacheInitializer.run();
+ } else {
+ try {
+ SERIAL_EXECUTOR.execute(cacheInitializer);
+ } catch (InterruptedException e) {
+ // ignore
+ }
+ }
}
/**
@@ -200,6 +240,14 @@
return super.termDocs(term);
}
+ protected void doClose() throws IOException {
+ try {
+ cacheInitializer.waitUntilStopped();
+ } catch (InterruptedException e) {
+ // ignore
+ }
+ super.doClose();
+ }
//----------------------< internal >----------------------------------------
@@ -215,6 +263,216 @@
}
/**
+ * Initializes the {@link CachingIndexReader#parents} cache.
+ */
+ private class CacheInitializer implements Runnable {
+
+ /**
+ * From where to read.
+ */
+ private final IndexReader reader;
+
+ /**
+ * Set to <code>true</code> while this initializer does its work.
+ */
+ private boolean running = false;
+
+ /**
+ * Set to <code>true</code> when this index reader is about to be closed.
+ */
+ private volatile boolean stopRequested = false;
+
+ /**
+ * Creates a new initializer with the given <code>reader</code>.
+ *
+ * @param reader an index reader.
+ */
+ public CacheInitializer(IndexReader reader) {
+ this.reader = reader;
+ }
+
+ /**
+ * Initializes the cache.
+ */
+ public void run() {
+ synchronized (this) {
+ running = true;
+ }
+ try {
+ if (stopRequested) {
+ // immediately return when stop is requested
+ return;
+ }
+ initializeParents(reader);
+ } catch (Exception e) {
+ // only log warn message during regular operation
+ if (!stopRequested) {
+ log.warn("Error initializing parents cache.", e);
+ }
+ } finally {
+ synchronized (this) {
+ running = false;
+ notifyAll();
+ }
+ }
+ }
+
+ /**
+ * Waits until this cache initializer is stopped.
+ *
+ * @throws InterruptedException if the current thread is interrupted.
+ */
+ public void waitUntilStopped() throws InterruptedException {
+ stopRequested = true;
+ synchronized (this) {
+ while (running) {
+ wait();
+ }
+ }
+ }
+
+ /**
+ * Initializes the {@link CachingIndexReader#parents} <code>DocId</code>
+ * array.
+ *
+ * @param reader the underlying index reader.
+ * @throws IOException if an error occurs while reading from the index.
+ */
+ private void initializeParents(IndexReader reader) throws IOException {
+ long time = System.currentTimeMillis();
+ final Map docs = new HashMap();
+ // read UUIDs
+ collectTermDocs(reader, new Term(FieldNames.UUID, ""), new TermDocsCollector() {
+ public void collect(Term term, TermDocs tDocs) throws IOException {
+ UUID uuid = UUID.fromString(term.text());
+ if (tDocs.next()) {
+ NodeInfo info = new NodeInfo(tDocs.doc(), uuid);
+ docs.put(new Integer(info.docId), info);
+ }
+ }
+ });
+
+ // read PARENTs
+ collectTermDocs(reader, new Term(FieldNames.PARENT, "0"), new TermDocsCollector() {
+ public void collect(Term term, TermDocs tDocs) throws IOException {
+ while (tDocs.next()) {
+ UUID uuid = UUID.fromString(term.text());
+ Integer docId = new Integer(tDocs.doc());
+ NodeInfo info = (NodeInfo) docs.get(docId);
+ info.parent = uuid;
+ docs.remove(docId);
+ docs.put(info.uuid, info);
+ }
+ }
+ });
+
+ if (stopRequested) {
+ return;
+ }
+
+ double foreignParents = 0;
+ Iterator it = docs.values().iterator();
+ while (it.hasNext()) {
+ NodeInfo info = (NodeInfo) it.next();
+ NodeInfo parent = (NodeInfo) docs.get(info.parent);
+ if (parent != null) {
+ parents[info.docId] = DocId.create(parent.docId);
+ } else if (info.parent != null) {
+ foreignParents++;
+ parents[info.docId] = DocId.create(info.parent);
+ } else {
+ // no parent -> root node
+ parents[info.docId] = DocId.NULL;
+ }
+ }
+ if (log.isDebugEnabled()) {
+ NumberFormat nf = NumberFormat.getPercentInstance();
+ nf.setMaximumFractionDigits(1);
+ time = System.currentTimeMillis() - time;
+ if (parents.length > 0) {
+ foreignParents /= parents.length;
+ }
+ log.debug("initialized {} DocIds in {} ms, {} foreign parents",
+ new Object[]{
+ new Integer(parents.length),
+ new Long(time),
+ nf.format(foreignParents)
+ });
+ }
+ }
+
+ /**
+ * Collects term docs for a given start term. All terms with the same
+ * field as <code>start</code> are enumerated.
+ *
+ * @param reader the index reader.
+ * @param start the term where to start the term enumeration.
+ * @param collector collects the term docs for each term.
+ * @throws IOException if an error occurs while reading from the index.
+ */
+ private void collectTermDocs(IndexReader reader,
+ Term start,
+ TermDocsCollector collector)
+ throws IOException {
+ TermDocs tDocs = reader.termDocs();
+ try {
+ TermEnum terms = reader.terms(start);
+ try {
+ int count = 0;
+ do {
+ Term t = terms.term();
+ if (t != null && t.field() == start.field()) {
+ tDocs.seek(terms);
+ collector.collect(t, tDocs);
+ } else {
+ break;
+ }
+ // once in a while check if we should quit
+ if (++count % 10000 == 0) {
+ if (stopRequested) {
+ break;
+ }
+ }
+ } while (terms.next());
+ } finally {
+ terms.close();
+ }
+ } finally {
+ tDocs.close();
+ }
+ }
+ }
+
+ /**
+ * Simple interface to collect a term and its term docs.
+ */
+ private interface TermDocsCollector {
+
+ /**
+ * Called for each term encountered.
+ *
+ * @param term the term.
+ * @param tDocs the term docs of <code>term</code>.
+ * @throws IOException if an error occurs while reading from the index.
+ */
+ void collect(Term term, TermDocs tDocs) throws IOException;
+ }
+
+ private static class NodeInfo {
+
+ final int docId;
+
+ final UUID uuid;
+
+ UUID parent;
+
+ public NodeInfo(int docId, UUID uuid) {
+ this.docId = docId;
+ this.uuid = uuid;
+ }
+ }
+
+ /**
* Implements an empty TermDocs.
*/
static final TermDocs EMPTY = new TermDocs() {
Modified: jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DocId.java
URL: http://svn.apache.org/viewvc/jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DocId.java?rev=748135&r1=748134&r2=748135&view=diff
==============================================================================
--- jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DocId.java (original)
+++ jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DocId.java Thu Feb 26 13:18:19 2009
@@ -109,6 +109,16 @@
* @return a <code>DocId</code> based on a node UUID.
*/
static DocId create(String uuid) {
+ return new UUIDDocId(UUID.fromString(uuid));
+ }
+
+ /**
+ * Creates a <code>DocId</code> based on a node UUID.
+ *
+ * @param uuid the node uuid.
+ * @return a <code>DocId</code> based on a node UUID.
+ */
+ static DocId create(UUID uuid) {
return new UUIDDocId(uuid);
}
@@ -188,13 +198,10 @@
* Creates a <code>DocId</code> based on a Node uuid.
*
* @param uuid the Node uuid.
- * @throws IllegalArgumentException if the <code>uuid</code> is
- * malformed.
*/
- UUIDDocId(String uuid) {
- UUID tmp = UUID.fromString(uuid);
- this.lsb = tmp.getLeastSignificantBits();
- this.msb = tmp.getMostSignificantBits();
+ UUIDDocId(UUID uuid) {
+ this.lsb = uuid.getLeastSignificantBits();
+ this.msb = uuid.getMostSignificantBits();
}
/**
Modified: jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/IndexMerger.java
URL: http://svn.apache.org/viewvc/jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/IndexMerger.java?rev=748135&r1=748134&r2=748135&view=diff
==============================================================================
--- jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/IndexMerger.java (original)
+++ jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/IndexMerger.java Thu Feb 26 13:18:19 2009
@@ -302,6 +302,12 @@
docCount += readers[i].numDocs();
}
log.info("merged " + docCount + " documents in " + time + " ms into " + index.getName() + ".");
+
+ // force initializing of caches
+ time = System.currentTimeMillis();
+ index.getReadOnlyIndexReader(true).close();
+ time = System.currentTimeMillis() - time;
+ log.debug("reader obtained in {} ms", new Long(time));
} finally {
for (int i = 0; i < readers.length; i++) {
try {