You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by mr...@apache.org on 2005/04/22 11:59:57 UTC

svn commit: r164208 - in /incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene: AbstractIndex.java CachingIndexReader.java CachingMultiReader.java

Author: mreutegg
Date: Fri Apr 22 02:59:56 2005
New Revision: 164208

URL: http://svn.apache.org/viewcvs?rev=164208&view=rev
Log:
Improve performance for complex path queries involving descendant-or-self axis. e.g: //foo//bar//*[@prop='bla']

Modified:
    incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java
    incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java
    incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/CachingMultiReader.java

Modified: incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java?rev=164208&r1=164207&r2=164208&view=diff
==============================================================================
--- incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java (original)
+++ incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java Fri Apr 22 02:59:56 2005
@@ -165,13 +165,18 @@
         }
 
         if (indexReader != null) {
-            indexReader.close();
-            log.debug("closing IndexReader.");
-            indexReader = null;
+            if (indexReader instanceof CachingIndexReader) {
+                // only commit changes, do not close
+                log.debug("committing IndexReader.");
+                ((CachingIndexReader) indexReader).commitDeleted();
+            } else {
+                indexReader.close();
+                indexReader = null;
+            }
         }
         if (indexWriter != null) {
+            log.debug("committing IndexWriter.");
             indexWriter.close();
-            log.debug("closing IndexWriter.");
             indexWriter = null;
         }
     }

Modified: incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java?rev=164208&r1=164207&r2=164208&view=diff
==============================================================================
--- incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java (original)
+++ incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java Fri Apr 22 02:59:56 2005
@@ -22,22 +22,48 @@
 import org.apache.lucene.index.FilterIndexReader;
 import org.apache.lucene.index.TermEnum;
 import org.apache.lucene.document.Document;
+import org.apache.log4j.Logger;
 
 import java.io.IOException;
 import java.util.Map;
 import java.util.HashMap;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Iterator;
 
 /**
- * Implements an <code>IndexReader</code> that caches document ids for the
- * {@link FieldNames.UUID} field.
+ * Implements an <code>IndexReader</code> that maintains caches to resolve
+ * {@link IndexReader#termDocs(Term)} calls efficiently.
+ * <p/>
+ * The caches are:
+ * <ul>
+ * <li>idCache: maps UUID to document number</li>
+ * <li>documentCache: maps document number to {@link Document} instance</li>
+ * <li>parentCache: maps parentUUID to List of document numbers</li>
+ * </ul>
  */
 class CachingIndexReader extends FilterIndexReader {
 
     /**
-     * The document id cache. Maps UUIDs to document number.
+     * The logger instance for this class.
      */
-    private Map cache;
+    private static final Logger log = Logger.getLogger(CachingIndexReader.class);
+
+    /**
+     * The document idCache. Maps UUIDs to document number.
+     */
+    private Map idCache;
+
+    /**
+     * The document cache. Maps document number to Document instance.
+     */
+    private Map documentCache;
+
+    /**
+     * The parent id cache. Maps parent UUID to List of document numbers.
+     */
+    private Map parentCache;
 
     /**
      * Creates a new <code>CachingIndexReader</code> based on
@@ -65,55 +91,21 @@
         if (term.field() == FieldNames.UUID) {
             synchronized (this) {
                 cacheInit();
-                final Integer docNo = (Integer) cache.get(term.text());
+                Integer docNo = (Integer) idCache.get(term.text());
                 if (docNo == null) {
                     return EMPTY;
                 } else {
-                    return new TermDocs() {
-
-                        private boolean consumed = false;
-
-                        private int doc = -1;
-
-                        public void seek(Term term) {
-                            throw new UnsupportedOperationException();
-                        }
-
-                        public void seek(TermEnum termEnum) {
-                            throw new UnsupportedOperationException();
-                        }
-
-                        public int doc() {
-                            return doc;
-                        }
-
-                        public int freq() {
-                            return 1;
-                        }
-
-                        public boolean next() {
-                            if (consumed) {
-                                return false;
-                            } else {
-                                doc = docNo.intValue();
-                                consumed = true;
-                                return true;
-                            }
-                        }
-
-                        public int read(int[] docs, int[] freqs) {
-                            docs[0] = docNo.intValue();
-                            freqs[0] = 1;
-                            return 1;
-                        }
-
-                        public boolean skipTo(int target) {
-                            throw new UnsupportedOperationException();
-                        }
-
-                        public void close() {
-                        }
-                    };
+                    return new CachingTermDocs(docNo);
+                }
+            }
+        } else if (term.field() == FieldNames.PARENT) {
+            synchronized (this) {
+                cacheInit();
+                List idList = (List) parentCache.get(term.text());
+                if (idList == null) {
+                    return EMPTY;
+                } else {
+                    return new CachingTermDocs(idList.iterator());
                 }
             }
         } else {
@@ -122,18 +114,58 @@
     }
 
     /**
-     * Removes the <code>TermEnum</code> from the cache and calls the base
+     * Returns the stored fields of the <code>n</code><sup>th</sup>
+     * <code>Document</code> in this index. This implementation returns cached
+     * versions of <code>Document</code> instance. Thus, the returned document
+     * must not be modified!
+     *
+     * @param n the document number.
+     * @return the <code>n</code><sup>th</sup> <code>Document</code> in this
+     *         index
+     * @throws IOException              if an error occurs while reading from
+     *                                  the index.
+     * @throws IllegalArgumentException if the document with number
+     *                                  <code>n</code> is deleted.
+     */
+    public Document document(int n) throws IOException {
+        if (isDeleted(n)) {
+            throw new IllegalArgumentException("attempt to access a deleted document");
+        }
+        synchronized (this) {
+            cacheInit();
+            return (Document) documentCache.get(new Integer(n));
+        }
+    }
+
+    /**
+     * Commits pending changes to disc.
+     * @throws IOException if an error occurs while writing changes.
+     */
+    public void commitDeleted() throws IOException {
+        commit();
+    }
+
+    /**
+     * Removes the <code>TermEnum</code> from the idCache and calls the base
      * <code>IndexReader</code>.
      * @param n the number of the document to delete.
      * @throws IOException if an error occurs while deleting the document.
      */
     protected synchronized void doDelete(int n) throws IOException {
-        if (cache != null) {
-            // todo keep a second map which contains the doc number to UUID mapping?
-            for (Iterator it = cache.values().iterator(); it.hasNext();) {
-                if (((Integer) it.next()).intValue() == n) {
-                    it.remove();
-                    break;
+        if (idCache != null) {
+            Document d = (Document) documentCache.remove(new Integer(n));
+            if (d != null) {
+                idCache.remove(d.get(FieldNames.UUID));
+                String parentUUID = d.get(FieldNames.PARENT);
+                List parents = (List) parentCache.get(parentUUID);
+                if (parents.size() == 1) {
+                    parentCache.remove(parentUUID);
+                } else {
+                    // replace existing list, other threads might use iterator
+                    // on existing list
+                    List repl = new ArrayList(parents);
+                    repl.remove(new Integer(n));
+                    parentCache.put(parentUUID, repl);
                 }
             }
         }
@@ -141,19 +173,35 @@
     }
 
     /**
-     * Initially fills the cache with all the UUID to document number mappings.
+     * Initially fills the caches: idCache, documentCache, parentCache.
      * @throws IOException if an error occurs while reading from the index.
      */
     private void cacheInit() throws IOException {
-        if (cache == null) {
-            Map tmp = new HashMap();
+        if (idCache == null) {
+            long time = System.currentTimeMillis();
+            Map ids = new HashMap(in.numDocs());
+            Map documents = new HashMap(in.numDocs());
+            Map parents = new HashMap(in.numDocs());
             for (int i = 0; i < in.maxDoc(); i++) {
                 if (!in.isDeleted(i)) {
                     Document d = in.document(i);
-                    tmp.put(d.get(FieldNames.UUID), new Integer(i));
+                    Integer docId = new Integer(i);
+                    ids.put(d.get(FieldNames.UUID), docId);
+                    documents.put(docId, d);
+                    String parentUUID = d.get(FieldNames.PARENT);
+                    List docIds = (List) parents.get(parentUUID);
+                    if (docIds == null) {
+                        docIds = new ArrayList();
+                        parents.put(parentUUID, docIds);
+                    }
+                    docIds.add(docId);
                 }
             }
-            cache = tmp;
+            idCache = ids;
+            documentCache = documents;
+            parentCache = parents;
+            time = System.currentTimeMillis() - time;
+            log.debug("IndexReader cache populated in: " + time + " ms.");
         }
     }
 
@@ -192,4 +240,98 @@
         }
     };
 
+    /**
+     * Implements a <code>TermDocs</code> that takes a list of document
+     * ids.
+     */
+    private static final class CachingTermDocs implements TermDocs {
+
+        /**
+         * The current document number.
+         */
+        private int current = -1;
+
+        /**
+         * Iterator over document numbers as <code>Integer</code> values.
+         */
+        private final Iterator docIds;
+
+        /**
+         * Creates a new <code>CachingTermDocs</code> instance with a single
+         * document id.
+         * @param docId the single document id.
+         */
+        CachingTermDocs(Integer docId) {
+            this(Arrays.asList(new Integer[]{docId}).iterator());
+        }
+
+        /**
+         * Creates a new <code>CachingTermDocs</code> instance that iterates
+         * over the <code>docIds</code>.
+         * @param docIds the actual document numbers / ids.
+         */
+        CachingTermDocs(Iterator docIds) {
+            this.docIds = docIds;
+        }
+
+        /**
+         * @throws UnsupportedOperationException always
+         */
+        public void seek(Term term) {
+            throw new UnsupportedOperationException();
+        }
+
+        /**
+         * @throws UnsupportedOperationException always
+         */
+        public void seek(TermEnum termEnum) {
+            throw new UnsupportedOperationException();
+        }
+
+
+        /**
+         * {@inheritDoc}
+         */
+        public int doc() {
+            return current;
+        }
+
+        /**
+         * {@inheritDoc}
+         */
+        public int freq() {
+            return 1;
+        }
+
+        /**
+         * {@inheritDoc}
+         */
+        public boolean next() {
+            boolean next = docIds.hasNext();
+            if (next) {
+                current = ((Integer) docIds.next()).intValue();
+            }
+            return next;
+        }
+
+        /**
+         * @throws UnsupportedOperationException always
+         */
+        public int read(int[] docs, int[] freqs) {
+            throw new UnsupportedOperationException();
+        }
+
+        /**
+         * @throws UnsupportedOperationException always
+         */
+        public boolean skipTo(int target) {
+            throw new UnsupportedOperationException();
+        }
+
+        /**
+         * {@inheritDoc}
+         */
+        public void close() {
+        }
+    }
 }

Modified: incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/CachingMultiReader.java
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/CachingMultiReader.java?rev=164208&r1=164207&r2=164208&view=diff
==============================================================================
--- incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/CachingMultiReader.java (original)
+++ incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/CachingMultiReader.java Fri Apr 22 02:59:56 2005
@@ -85,6 +85,12 @@
                     return new OffsetTermDocs(docs, starts[i]);
                 }
             }
+        } else if (term.field() == FieldNames.PARENT) {
+            TermDocs[] termDocs = new TermDocs[subReaders.length];
+            for (int i = 0; i < subReaders.length; i++) {
+                termDocs[i] = subReaders[i].termDocs(term);
+            }
+            return new MultiTermDocs(termDocs, starts);
         }
         return super.termDocs(term);
     }
@@ -144,7 +150,7 @@
         }
 
         /**
-         * @throws UnsupportedOperationException always
+         * {@inheritDoc}
          */
         public boolean next() throws IOException {
             return base.next();
@@ -169,6 +175,118 @@
          */
         public void close() throws IOException {
             base.close();
+        }
+    }
+
+    /**
+     * Implements a <code>TermDocs</code> which spans multiple other
+     * <code>TermDocs</code>.
+     */
+    private static final class MultiTermDocs implements TermDocs {
+
+        /**
+         * The actual <code>TermDocs</code>.
+         */
+        private final TermDocs[] termDocs;
+
+        /**
+         * The document number offsets for each <code>TermDocs</code>.
+         */
+        private final int[] starts;
+
+        /**
+         * The current <code>TermDocs</code> instance. If <code>null</code>
+         * there are no more documents.
+         */
+        private TermDocs current;
+
+        /**
+         * The current index into {@link #termDocs} and {@link #starts}.
+         */
+        private int idx = 0;
+
+        /**
+         * Creates a new <code>MultiTermDocs</code> instance.
+         * @param termDocs the actual <code>TermDocs</code>.
+         * @param starts the document number offsets for each
+         *  <code>TermDocs</code>
+         */
+        MultiTermDocs(TermDocs[] termDocs, int[] starts) {
+            this.termDocs = termDocs;
+            this.starts = starts;
+            current = termDocs[idx];
+        }
+
+        /**
+         * @throws UnsupportedOperationException always
+         */
+        public void seek(Term term) {
+            throw new UnsupportedOperationException();
+        }
+
+        /**
+         * @throws UnsupportedOperationException always
+         */
+        public void seek(TermEnum termEnum) {
+            throw new UnsupportedOperationException();
+        }
+
+        /**
+         * {@inheritDoc}
+         */
+        public int doc() {
+            return starts[idx] + current.doc();
+        }
+
+        /**
+         * {@inheritDoc}
+         */
+        public int freq() {
+            return current.freq();
+        }
+
+        /**
+         * {@inheritDoc}
+         */
+        public boolean next() throws IOException {
+            while (current != null && !current.next()) {
+                if (++idx >= termDocs.length) {
+                    // no more TermDocs
+                    current = null;
+                } else {
+                    // move to next TermDocs
+                    current = termDocs[idx];
+                }
+            }
+            return current != null;
+        }
+
+        /**
+         * {@inheritDoc}
+         */
+        public int read(int[] docs, int[] freqs) throws IOException {
+            int count = 0;
+            for (int i = 0; i < docs.length && next(); i++, count++) {
+                docs[i] = doc();
+                freqs[i] = freq();
+            }
+            return count;
+        }
+
+        /**
+         * @throws UnsupportedOperationException always
+         */
+        public boolean skipTo(int target) {
+            throw new UnsupportedOperationException();
+        }
+
+        /**
+         * {@inheritDoc}
+         */
+        public void close() throws IOException {
+            for (int i = 0; i < termDocs.length; i++) {
+                termDocs[i].close();
+            }
         }
     }
 }