You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by mr...@apache.org on 2009/02/26 14:18:19 UTC
svn commit: r748135 - in /jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene: AbstractIndex.java CachingIndexReader.java DocId.java IndexMerger.java

Author: mreutegg
Date: Thu Feb 26 13:18:19 2009
New Revision: 748135

URL: http://svn.apache.org/viewvc?rev=748135&view=rev
Log:
JCR-1337: Optimize first execution queries for DescendantSelfAxisWeight/ChildAxisQuery
JCR-1884: CachingIndexReader.initializeParents() does not scale well with large indexes

Modified:
    jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java
    jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java
    jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DocId.java
    jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/IndexMerger.java

Modified: jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java?rev=748135&r1=748134&r2=748135&view=diff
==============================================================================
--- jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java (original)
+++ jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java Thu Feb 26 13:18:19 2009
@@ -232,10 +232,12 @@
      * read-only, that is, any attempt to delete a document from the index
      * will throw an <code>UnsupportedOperationException</code>.
      *
+     * @param initCache if the caches in the index reader should be initialized
+     *          before the index reader is returned.
      * @return a read-only index reader.
      * @throws IOException if an error occurs while obtaining the index reader.
      */
-    synchronized ReadOnlyIndexReader getReadOnlyIndexReader()
+    synchronized ReadOnlyIndexReader getReadOnlyIndexReader(boolean initCache)
             throws IOException {
         // get current modifiable index reader
         CommittableIndexReader modifiableReader = getIndexReader();
@@ -271,7 +273,8 @@
         }
         if (sharedReader == null) {
             // create new shared reader
-            CachingIndexReader cr = new CachingIndexReader(IndexReader.open(getDirectory()), cache);
+            CachingIndexReader cr = new CachingIndexReader(
+                    IndexReader.open(getDirectory()), cache, initCache);
             sharedReader = new SharedIndexReader(cr);
         }
         readOnlyReader = new ReadOnlyIndexReader(sharedReader, deleted, modCount);
@@ -280,6 +283,20 @@
     }
 
     /**
+     * Returns a read-only index reader, that can be used concurrently with
+     * other threads writing to this index. The returned index reader is
+     * read-only, that is, any attempt to delete a document from the index
+     * will throw an <code>UnsupportedOperationException</code>.
+     *
+     * @return a read-only index reader.
+     * @throws IOException if an error occurs while obtaining the index reader.
+     */
+    protected ReadOnlyIndexReader getReadOnlyIndexReader()
+            throws IOException {
+        return getReadOnlyIndexReader(false);
+    }
+
+    /**
      * Returns an <code>IndexWriter</code> on this index.
      * @return an <code>IndexWriter</code> on this index.
      * @throws IOException if the writer cannot be obtained.

Modified: jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java
URL: http://svn.apache.org/viewvc/jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java?rev=748135&r1=748134&r2=748135&view=diff
==============================================================================
--- jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java (original)
+++ jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java Thu Feb 26 13:18:19 2009
@@ -22,11 +22,20 @@
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermDocs;
 import org.apache.lucene.index.TermEnum;
+import org.apache.jackrabbit.uuid.UUID;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 import java.util.BitSet;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.text.NumberFormat;
+
+import EDU.oswego.cs.dl.util.concurrent.Executor;
+import EDU.oswego.cs.dl.util.concurrent.PooledExecutor;
+import EDU.oswego.cs.dl.util.concurrent.LinkedQueue;
 
 /**
  * Implements an <code>IndexReader</code> that maintains caches to resolve
@@ -41,6 +50,17 @@
     private static final Logger log = LoggerFactory.getLogger(CachingIndexReader.class);
 
     /**
+     * The single thread of this executor initializes the
+     * {@link #parents} when background initialization is requested.
+     */
+    private static final Executor SERIAL_EXECUTOR = new PooledExecutor(
+            new LinkedQueue(), 1) {
+        {
+            setKeepAliveTime(500);
+        }
+    };
+
+    /**
      * The current value of the global creation tick counter.
      */
     private static long currentTick;
@@ -53,6 +73,11 @@
     private final DocId[] parents;
 
     /**
+     * Initializes the {@link #parents} cache.
+     */
+    private CacheInitializer cacheInitializer;
+
+    /**
      * Tick when this index reader was created.
      */
     private final long creationTick = getNextCreationTick();
@@ -69,11 +94,26 @@
      * @param delegatee the base <code>IndexReader</code>.
      * @param cache     a document number cache, or <code>null</code> if not
      *                  available to this reader.
-     */
-    CachingIndexReader(IndexReader delegatee, DocNumberCache cache) {
+     * @param initCache if the {@link #parents} cache should be initialized
+     *                  when this index reader is constructed. Otherwise
+     *                  initialization happens in a background thread.
+     */
+    CachingIndexReader(IndexReader delegatee,
+                       DocNumberCache cache,
+                       boolean initCache) {
         super(delegatee);
         this.cache = cache;
         parents = new DocId[delegatee.maxDoc()];
+        this.cacheInitializer = new CacheInitializer(delegatee);
+        if (initCache) {
+            cacheInitializer.run();
+        } else {
+            try {
+                SERIAL_EXECUTOR.execute(cacheInitializer);
+            } catch (InterruptedException e) {
+                // ignore
+            }
+        }
     }
 
     /**
@@ -200,6 +240,14 @@
         return super.termDocs(term);
     }
 
+    protected void doClose() throws IOException {
+        try {
+            cacheInitializer.waitUntilStopped();
+        } catch (InterruptedException e) {
+            // ignore
+        }
+        super.doClose();
+    }
 
     //----------------------< internal >----------------------------------------
 
@@ -215,6 +263,216 @@
     }
 
     /**
+     * Initializes the {@link CachingIndexReader#parents} cache.
+     */
+    private class CacheInitializer implements Runnable {
+
+        /**
+         * From where to read.
+         */
+        private final IndexReader reader;
+
+        /**
+         * Set to <code>true</code> while this initializer does its work.
+         */
+        private boolean running = false;
+
+        /**
+         * Set to <code>true</code> when this index reader is about to be closed.
+         */
+        private volatile boolean stopRequested = false;
+
+        /**
+         * Creates a new initializer with the given <code>reader</code>.
+         *
+         * @param reader an index reader.
+         */
+        public CacheInitializer(IndexReader reader) {
+            this.reader = reader;
+        }
+
+        /**
+         * Initializes the cache.
+         */
+        public void run() {
+            synchronized (this) {
+                running = true;
+            }
+            try {
+                if (stopRequested) {
+                    // immediately return when stop is requested
+                    return;
+                }
+                initializeParents(reader);
+            } catch (Exception e) {
+                // only log warn message during regular operation
+                if (!stopRequested) {
+                    log.warn("Error initializing parents cache.", e);
+                }
+            } finally {
+                synchronized (this) {
+                    running = false;
+                    notifyAll();
+                }
+            }
+        }
+
+        /**
+         * Waits until this cache initializer is stopped.
+         *
+         * @throws InterruptedException if the current thread is interrupted.
+         */
+        public void waitUntilStopped() throws InterruptedException {
+            stopRequested = true;
+            synchronized (this) {
+                while (running) {
+                    wait();
+                }
+            }
+        }
+
+        /**
+         * Initializes the {@link CachingIndexReader#parents} <code>DocId</code>
+         * array.
+         *
+         * @param reader the underlying index reader.
+         * @throws IOException if an error occurs while reading from the index.
+         */
+        private void initializeParents(IndexReader reader) throws IOException {
+            long time = System.currentTimeMillis();
+            final Map docs = new HashMap();
+            // read UUIDs
+            collectTermDocs(reader, new Term(FieldNames.UUID, ""), new TermDocsCollector() {
+                public void collect(Term term, TermDocs tDocs) throws IOException {
+                    UUID uuid = UUID.fromString(term.text());
+                    if (tDocs.next()) {
+                        NodeInfo info = new NodeInfo(tDocs.doc(), uuid);
+                        docs.put(new Integer(info.docId), info);
+                    }
+                }
+            });
+
+            // read PARENTs
+            collectTermDocs(reader, new Term(FieldNames.PARENT, "0"), new TermDocsCollector() {
+                public void collect(Term term, TermDocs tDocs) throws IOException {
+                    while (tDocs.next()) {
+                        UUID uuid = UUID.fromString(term.text());
+                        Integer docId = new Integer(tDocs.doc());
+                        NodeInfo info = (NodeInfo) docs.get(docId);
+                        info.parent = uuid;
+                        docs.remove(docId);
+                        docs.put(info.uuid, info);
+                    }
+                }
+            });
+
+            if (stopRequested) {
+                return;
+            }
+
+            double foreignParents = 0;
+            Iterator it = docs.values().iterator();
+            while (it.hasNext()) {
+                NodeInfo info = (NodeInfo) it.next();
+                NodeInfo parent = (NodeInfo) docs.get(info.parent);
+                if (parent != null) {
+                    parents[info.docId] = DocId.create(parent.docId);
+                } else if (info.parent != null) {
+                    foreignParents++;
+                    parents[info.docId] = DocId.create(info.parent);
+                } else {
+                    // no parent -> root node
+                    parents[info.docId] = DocId.NULL;
+                }
+            }
+            if (log.isDebugEnabled()) {
+                NumberFormat nf = NumberFormat.getPercentInstance();
+                nf.setMaximumFractionDigits(1);
+                time = System.currentTimeMillis() - time;
+                if (parents.length > 0) {
+                    foreignParents /= parents.length;
+                }
+                log.debug("initialized {} DocIds in {} ms, {} foreign parents",
+                        new Object[]{
+                            new Integer(parents.length),
+                            new Long(time),
+                            nf.format(foreignParents)
+                        });
+            }
+        }
+
+        /**
+         * Collects term docs for a given start term. All terms with the same
+         * field as <code>start</code> are enumerated.
+         *
+         * @param reader the index reader.
+         * @param start the term where to start the term enumeration.
+         * @param collector collects the term docs for each term.
+         * @throws IOException if an error occurs while reading from the index.
+         */
+        private void collectTermDocs(IndexReader reader,
+                                     Term start,
+                                     TermDocsCollector collector)
+                throws IOException {
+            TermDocs tDocs = reader.termDocs();
+            try {
+                TermEnum terms = reader.terms(start);
+                try {
+                    int count = 0;
+                    do {
+                        Term t = terms.term();
+                        if (t != null && t.field() == start.field()) {
+                            tDocs.seek(terms);
+                            collector.collect(t, tDocs);
+                        } else {
+                            break;
+                        }
+                        // once in a while check if we should quit
+                        if (++count % 10000 == 0) {
+                            if (stopRequested) {
+                                break;
+                            }
+                        }
+                    } while (terms.next());
+                } finally {
+                    terms.close();
+                }
+            } finally {
+                tDocs.close();
+            }
+        }
+    }
+
+    /**
+     * Simple interface to collect a term and its term docs.
+     */
+    private interface TermDocsCollector {
+
+        /**
+         * Called for each term encountered.
+         *
+         * @param term the term.
+         * @param tDocs the term docs of <code>term</code>.
+         * @throws IOException if an error occurs while reading from the index.
+         */
+        void collect(Term term, TermDocs tDocs) throws IOException;
+    }
+
+    private static class NodeInfo {
+
+        final int docId;
+
+        final UUID uuid;
+
+        UUID parent;
+
+        public NodeInfo(int docId, UUID uuid) {
+            this.docId = docId;
+            this.uuid = uuid;
+        }
+    }
+
+    /**
      * Implements an empty TermDocs.
      */
     static final TermDocs EMPTY = new TermDocs() {

Modified: jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DocId.java
URL: http://svn.apache.org/viewvc/jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DocId.java?rev=748135&r1=748134&r2=748135&view=diff
==============================================================================
--- jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DocId.java (original)
+++ jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DocId.java Thu Feb 26 13:18:19 2009
@@ -109,6 +109,16 @@
      * @return a <code>DocId</code> based on a node UUID.
      */
     static DocId create(String uuid) {
+        return new UUIDDocId(UUID.fromString(uuid));
+    }
+
+    /**
+     * Creates a <code>DocId</code> based on a node UUID.
+     *
+     * @param uuid the node uuid.
+     * @return a <code>DocId</code> based on a node UUID.
+     */
+    static DocId create(UUID uuid) {
         return new UUIDDocId(uuid);
     }
 
@@ -188,13 +198,10 @@
          * Creates a <code>DocId</code> based on a Node uuid.
          *
          * @param uuid the Node uuid.
-         * @throws IllegalArgumentException if the <code>uuid</code> is
-         *                                  malformed.
          */
-        UUIDDocId(String uuid) {
-            UUID tmp = UUID.fromString(uuid);
-            this.lsb = tmp.getLeastSignificantBits();
-            this.msb = tmp.getMostSignificantBits();
+        UUIDDocId(UUID uuid) {
+            this.lsb = uuid.getLeastSignificantBits();
+            this.msb = uuid.getMostSignificantBits();
         }
 
         /**

Modified: jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/IndexMerger.java
URL: http://svn.apache.org/viewvc/jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/IndexMerger.java?rev=748135&r1=748134&r2=748135&view=diff
==============================================================================
--- jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/IndexMerger.java (original)
+++ jackrabbit/branches/1.4/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/IndexMerger.java Thu Feb 26 13:18:19 2009
@@ -302,6 +302,12 @@
                             docCount += readers[i].numDocs();
                         }
                         log.info("merged " + docCount + " documents in " + time + " ms into " + index.getName() + ".");
+
+                        // force initializing of caches
+                        time = System.currentTimeMillis();
+                        index.getReadOnlyIndexReader(true).close();
+                        time = System.currentTimeMillis() - time;
+                        log.debug("reader obtained in {} ms", new Long(time));
                     } finally {
                         for (int i = 0; i < readers.length; i++) {
                             try {