You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by mr...@apache.org on 2005/04/22 11:59:57 UTC
svn commit: r164208 - in
/incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene:
AbstractIndex.java CachingIndexReader.java CachingMultiReader.java
Author: mreutegg
Date: Fri Apr 22 02:59:56 2005
New Revision: 164208
URL: http://svn.apache.org/viewcvs?rev=164208&view=rev
Log:
Improve performance for complex path queries involving descendant-or-self axis. e.g: //foo//bar//*[@prop='bla']
Modified:
incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java
incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java
incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/CachingMultiReader.java
Modified: incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java?rev=164208&r1=164207&r2=164208&view=diff
==============================================================================
--- incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java (original)
+++ incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java Fri Apr 22 02:59:56 2005
@@ -165,13 +165,18 @@
}
if (indexReader != null) {
- indexReader.close();
- log.debug("closing IndexReader.");
- indexReader = null;
+ if (indexReader instanceof CachingIndexReader) {
+ // only commit changes, do not close
+ log.debug("committing IndexReader.");
+ ((CachingIndexReader) indexReader).commitDeleted();
+ } else {
+ indexReader.close();
+ indexReader = null;
+ }
}
if (indexWriter != null) {
+ log.debug("committing IndexWriter.");
indexWriter.close();
- log.debug("closing IndexWriter.");
indexWriter = null;
}
}
Modified: incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java?rev=164208&r1=164207&r2=164208&view=diff
==============================================================================
--- incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java (original)
+++ incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java Fri Apr 22 02:59:56 2005
@@ -22,22 +22,48 @@
import org.apache.lucene.index.FilterIndexReader;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.document.Document;
+import org.apache.log4j.Logger;
import java.io.IOException;
import java.util.Map;
import java.util.HashMap;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Iterator;
/**
- * Implements an <code>IndexReader</code> that caches document ids for the
- * {@link FieldNames.UUID} field.
+ * Implements an <code>IndexReader</code> that maintains caches to resolve
+ * {@link IndexReader#termDocs(Term)} calls efficiently.
+ * <p/>
+ * The caches are:
+ * <ul>
+ * <li>idCache: maps UUID to document number</li>
+ * <li>documentCache: maps document number to {@link Document} instance</li>
+ * <li>parentCache: maps parentUUID to List of document numbers</li>
+ * </ul>
*/
class CachingIndexReader extends FilterIndexReader {
/**
- * The document id cache. Maps UUIDs to document number.
+ * The logger instance for this class.
*/
- private Map cache;
+ private static final Logger log = Logger.getLogger(CachingIndexReader.class);
+
+ /**
+ * The document idCache. Maps UUIDs to document number.
+ */
+ private Map idCache;
+
+ /**
+ * The document cache. Maps document number to Document instance.
+ */
+ private Map documentCache;
+
+ /**
+ * The parent id cache. Maps parent UUID to List of document numbers.
+ */
+ private Map parentCache;
/**
* Creates a new <code>CachingIndexReader</code> based on
@@ -65,55 +91,21 @@
if (term.field() == FieldNames.UUID) {
synchronized (this) {
cacheInit();
- final Integer docNo = (Integer) cache.get(term.text());
+ Integer docNo = (Integer) idCache.get(term.text());
if (docNo == null) {
return EMPTY;
} else {
- return new TermDocs() {
-
- private boolean consumed = false;
-
- private int doc = -1;
-
- public void seek(Term term) {
- throw new UnsupportedOperationException();
- }
-
- public void seek(TermEnum termEnum) {
- throw new UnsupportedOperationException();
- }
-
- public int doc() {
- return doc;
- }
-
- public int freq() {
- return 1;
- }
-
- public boolean next() {
- if (consumed) {
- return false;
- } else {
- doc = docNo.intValue();
- consumed = true;
- return true;
- }
- }
-
- public int read(int[] docs, int[] freqs) {
- docs[0] = docNo.intValue();
- freqs[0] = 1;
- return 1;
- }
-
- public boolean skipTo(int target) {
- throw new UnsupportedOperationException();
- }
-
- public void close() {
- }
- };
+ return new CachingTermDocs(docNo);
+ }
+ }
+ } else if (term.field() == FieldNames.PARENT) {
+ synchronized (this) {
+ cacheInit();
+ List idList = (List) parentCache.get(term.text());
+ if (idList == null) {
+ return EMPTY;
+ } else {
+ return new CachingTermDocs(idList.iterator());
}
}
} else {
@@ -122,18 +114,58 @@
}
/**
- * Removes the <code>TermEnum</code> from the cache and calls the base
+ * Returns the stored fields of the <code>n</code><sup>th</sup>
+ * <code>Document</code> in this index. This implementation returns cached
+ * versions of <code>Document</code> instance. Thus, the returned document
+ * must not be modified!
+ *
+ * @param n the document number.
+ * @return the <code>n</code><sup>th</sup> <code>Document</code> in this
+ * index
+ * @throws IOException if an error occurs while reading from
+ * the index.
+ * @throws IllegalArgumentException if the document with number
+ * <code>n</code> is deleted.
+ */
+ public Document document(int n) throws IOException {
+ if (isDeleted(n)) {
+ throw new IllegalArgumentException("attempt to access a deleted document");
+ }
+ synchronized (this) {
+ cacheInit();
+ return (Document) documentCache.get(new Integer(n));
+ }
+ }
+
+ /**
+ * Commits pending changes to disc.
+ * @throws IOException if an error occurs while writing changes.
+ */
+ public void commitDeleted() throws IOException {
+ commit();
+ }
+
+ /**
+ * Removes the <code>TermEnum</code> from the idCache and calls the base
* <code>IndexReader</code>.
* @param n the number of the document to delete.
* @throws IOException if an error occurs while deleting the document.
*/
protected synchronized void doDelete(int n) throws IOException {
- if (cache != null) {
- // todo keep a second map which contains the doc number to UUID mapping?
- for (Iterator it = cache.values().iterator(); it.hasNext();) {
- if (((Integer) it.next()).intValue() == n) {
- it.remove();
- break;
+ if (idCache != null) {
+ Document d = (Document) documentCache.remove(new Integer(n));
+ if (d != null) {
+ idCache.remove(d.get(FieldNames.UUID));
+ String parentUUID = d.get(FieldNames.PARENT);
+ List parents = (List) parentCache.get(parentUUID);
+ if (parents.size() == 1) {
+ parentCache.remove(parentUUID);
+ } else {
+ // replace existing list, other threads might use iterator
+ // on existing list
+ List repl = new ArrayList(parents);
+ repl.remove(new Integer(n));
+ parentCache.put(parentUUID, repl);
}
}
}
@@ -141,19 +173,35 @@
}
/**
- * Initially fills the cache with all the UUID to document number mappings.
+ * Initially fills the caches: idCache, documentCache, parentCache.
* @throws IOException if an error occurs while reading from the index.
*/
private void cacheInit() throws IOException {
- if (cache == null) {
- Map tmp = new HashMap();
+ if (idCache == null) {
+ long time = System.currentTimeMillis();
+ Map ids = new HashMap(in.numDocs());
+ Map documents = new HashMap(in.numDocs());
+ Map parents = new HashMap(in.numDocs());
for (int i = 0; i < in.maxDoc(); i++) {
if (!in.isDeleted(i)) {
Document d = in.document(i);
- tmp.put(d.get(FieldNames.UUID), new Integer(i));
+ Integer docId = new Integer(i);
+ ids.put(d.get(FieldNames.UUID), docId);
+ documents.put(docId, d);
+ String parentUUID = d.get(FieldNames.PARENT);
+ List docIds = (List) parents.get(parentUUID);
+ if (docIds == null) {
+ docIds = new ArrayList();
+ parents.put(parentUUID, docIds);
+ }
+ docIds.add(docId);
}
}
- cache = tmp;
+ idCache = ids;
+ documentCache = documents;
+ parentCache = parents;
+ time = System.currentTimeMillis() - time;
+ log.debug("IndexReader cache populated in: " + time + " ms.");
}
}
@@ -192,4 +240,98 @@
}
};
+ /**
+ * Implements a <code>TermDocs</code> that takes a list of document
+ * ids.
+ */
+ private static final class CachingTermDocs implements TermDocs {
+
+ /**
+ * The current document number.
+ */
+ private int current = -1;
+
+ /**
+ * Iterator over document numbers as <code>Integer</code> values.
+ */
+ private final Iterator docIds;
+
+ /**
+ * Creates a new <code>CachingTermDocs</code> instance with a single
+ * document id.
+ * @param docId the single document id.
+ */
+ CachingTermDocs(Integer docId) {
+ this(Arrays.asList(new Integer[]{docId}).iterator());
+ }
+
+ /**
+ * Creates a new <code>CachingTermDocs</code> instance that iterates
+ * over the <code>docIds</code>.
+ * @param docIds the actual document numbers / ids.
+ */
+ CachingTermDocs(Iterator docIds) {
+ this.docIds = docIds;
+ }
+
+ /**
+ * @throws UnsupportedOperationException always
+ */
+ public void seek(Term term) {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * @throws UnsupportedOperationException always
+ */
+ public void seek(TermEnum termEnum) {
+ throw new UnsupportedOperationException();
+ }
+
+
+ /**
+ * {@inheritDoc}
+ */
+ public int doc() {
+ return current;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public int freq() {
+ return 1;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public boolean next() {
+ boolean next = docIds.hasNext();
+ if (next) {
+ current = ((Integer) docIds.next()).intValue();
+ }
+ return next;
+ }
+
+ /**
+ * @throws UnsupportedOperationException always
+ */
+ public int read(int[] docs, int[] freqs) {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * @throws UnsupportedOperationException always
+ */
+ public boolean skipTo(int target) {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public void close() {
+ }
+ }
}
Modified: incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/CachingMultiReader.java
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/CachingMultiReader.java?rev=164208&r1=164207&r2=164208&view=diff
==============================================================================
--- incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/CachingMultiReader.java (original)
+++ incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/CachingMultiReader.java Fri Apr 22 02:59:56 2005
@@ -85,6 +85,12 @@
return new OffsetTermDocs(docs, starts[i]);
}
}
+ } else if (term.field() == FieldNames.PARENT) {
+ TermDocs[] termDocs = new TermDocs[subReaders.length];
+ for (int i = 0; i < subReaders.length; i++) {
+ termDocs[i] = subReaders[i].termDocs(term);
+ }
+ return new MultiTermDocs(termDocs, starts);
}
return super.termDocs(term);
}
@@ -144,7 +150,7 @@
}
/**
- * @throws UnsupportedOperationException always
+ * {@inheritDoc}
*/
public boolean next() throws IOException {
return base.next();
@@ -169,6 +175,118 @@
*/
public void close() throws IOException {
base.close();
+ }
+ }
+
+ /**
+ * Implements a <code>TermDocs</code> which spans multiple other
+ * <code>TermDocs</code>.
+ */
+ private static final class MultiTermDocs implements TermDocs {
+
+ /**
+ * The actual <code>TermDocs</code>.
+ */
+ private final TermDocs[] termDocs;
+
+ /**
+ * The document number offsets for each <code>TermDocs</code>.
+ */
+ private final int[] starts;
+
+ /**
+ * The current <code>TermDocs</code> instance. If <code>null</code>
+ * there are no more documents.
+ */
+ private TermDocs current;
+
+ /**
+ * The current index into {@link #termDocs} and {@link #starts}.
+ */
+ private int idx = 0;
+
+ /**
+ * Creates a new <code>MultiTermDocs</code> instance.
+ * @param termDocs the actual <code>TermDocs</code>.
+ * @param starts the document number offsets for each
+ * <code>TermDocs</code>
+ */
+ MultiTermDocs(TermDocs[] termDocs, int[] starts) {
+ this.termDocs = termDocs;
+ this.starts = starts;
+ current = termDocs[idx];
+ }
+
+ /**
+ * @throws UnsupportedOperationException always
+ */
+ public void seek(Term term) {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * @throws UnsupportedOperationException always
+ */
+ public void seek(TermEnum termEnum) {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public int doc() {
+ return starts[idx] + current.doc();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public int freq() {
+ return current.freq();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public boolean next() throws IOException {
+ while (current != null && !current.next()) {
+ if (++idx >= termDocs.length) {
+ // no more TermDocs
+ current = null;
+ } else {
+ // move to next TermDocs
+ current = termDocs[idx];
+ }
+ }
+ return current != null;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public int read(int[] docs, int[] freqs) throws IOException {
+ int count = 0;
+ for (int i = 0; i < docs.length && next(); i++, count++) {
+ docs[i] = doc();
+ freqs[i] = freq();
+ }
+ return count;
+ }
+
+ /**
+ * @throws UnsupportedOperationException always
+ */
+ public boolean skipTo(int target) {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public void close() throws IOException {
+ for (int i = 0; i < termDocs.length; i++) {
+ termDocs[i].close();
+ }
}
}
}