You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by mr...@apache.org on 2005/04/01 15:41:52 UTC

svn commit: r159687 - in incubator/jackrabbit/trunk: applications/test/ applications/test/workspaces/default/ applications/test/workspaces/test/ src/conf/ src/java/org/apache/jackrabbit/core/search/lucene/

Author: mreutegg
Date: Fri Apr  1 05:41:50 2005
New Revision: 159687

URL: http://svn.apache.org/viewcvs?view=rev&rev=159687
Log:
Improve indexing performance on consecutive small transactions on the same node.

Modified:
    incubator/jackrabbit/trunk/applications/test/repository.xml
    incubator/jackrabbit/trunk/applications/test/workspaces/default/workspace.xml
    incubator/jackrabbit/trunk/applications/test/workspaces/test/workspace.xml
    incubator/jackrabbit/trunk/src/conf/repository.xml
    incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/AbstractIndex.java
    incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/SearchIndex.java
    incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/VolatileIndex.java

Modified: incubator/jackrabbit/trunk/applications/test/repository.xml
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/applications/test/repository.xml?view=diff&r1=159686&r2=159687
==============================================================================
--- incubator/jackrabbit/trunk/applications/test/repository.xml (original)
+++ incubator/jackrabbit/trunk/applications/test/repository.xml Fri Apr  1 05:41:50 2005
@@ -187,6 +187,7 @@
             - maxMergeDocs: maximum number of nodes in segments that will be merged
             - mergeFactor: determines how often segment indices are merged
             - redoSize: maximum number of entries in the redo log until the in-memory index is merged
+            - bufferSize: maximum number of documents that are held in a pending queue until added to the index
         -->
         <SearchIndex class="org.apache.jackrabbit.core.search.lucene.SearchIndex">
             <param name="useCompoundFile" value="true"/>
@@ -194,6 +195,7 @@
             <param name="maxMergeDocs" value="10000"/>
             <param name="mergeFactor" value="10"/>
             <param name="redoSize" value="1000"/>
+            <param name="bufferSize" value="10"/>
 
             <FileSystem class="org.apache.jackrabbit.core.fs.local.LocalFileSystem">
                 <param name="path" value="${wsp.home}/index"/>

Modified: incubator/jackrabbit/trunk/applications/test/workspaces/default/workspace.xml
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/applications/test/workspaces/default/workspace.xml?view=diff&r1=159686&r2=159687
==============================================================================
--- incubator/jackrabbit/trunk/applications/test/workspaces/default/workspace.xml (original)
+++ incubator/jackrabbit/trunk/applications/test/workspaces/default/workspace.xml Fri Apr  1 05:41:50 2005
@@ -24,6 +24,7 @@
     <param name="maxMergeDocs" value="10000" />
     <param name="mergeFactor" value="10" />
     <param name="redoSize" value="1000"/>
+    <param name="bufferSize" value="10"/>
     <FileSystem class="org.apache.jackrabbit.core.fs.local.LocalFileSystem">
       <param name="path" value="${wsp.home}/index" />
     </FileSystem>

Modified: incubator/jackrabbit/trunk/applications/test/workspaces/test/workspace.xml
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/applications/test/workspaces/test/workspace.xml?view=diff&r1=159686&r2=159687
==============================================================================
--- incubator/jackrabbit/trunk/applications/test/workspaces/test/workspace.xml (original)
+++ incubator/jackrabbit/trunk/applications/test/workspaces/test/workspace.xml Fri Apr  1 05:41:50 2005
@@ -24,6 +24,7 @@
     <param name="maxMergeDocs" value="10000" />
     <param name="mergeFactor" value="10" />
     <param name="redoSize" value="1000"/>
+    <param name="bufferSize" value="10"/>
     <FileSystem class="org.apache.jackrabbit.core.fs.local.LocalFileSystem">
       <param name="path" value="${wsp.home}/index" />
     </FileSystem>

Modified: incubator/jackrabbit/trunk/src/conf/repository.xml
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/src/conf/repository.xml?view=diff&r1=159686&r2=159687
==============================================================================
--- incubator/jackrabbit/trunk/src/conf/repository.xml (original)
+++ incubator/jackrabbit/trunk/src/conf/repository.xml Fri Apr  1 05:41:50 2005
@@ -187,6 +187,7 @@
             - maxMergeDocs: maximum number of nodes in segments that will be merged
             - mergeFactor: determines how often segment indices are merged
             - redoSize: maximum number of entries in the redo log until the in-memory index is merged
+            - bufferSize: maximum number of documents that are held in a pending queue until added to the index
         -->
         <SearchIndex class="org.apache.jackrabbit.core.search.lucene.SearchIndex">
             <param name="useCompoundFile" value="true"/>
@@ -194,6 +195,7 @@
             <param name="maxMergeDocs" value="10000"/>
             <param name="mergeFactor" value="10"/>
             <param name="redoSize" value="1000"/>
+            <param name="bufferSize" value="10"/>
 
             <FileSystem class="org.apache.jackrabbit.core.fs.local.LocalFileSystem">
                 <param name="path" value="${wsp.home}/index"/>

Modified: incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/AbstractIndex.java
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/AbstractIndex.java?view=diff&r1=159686&r2=159687
==============================================================================
--- incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/AbstractIndex.java (original)
+++ incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/AbstractIndex.java Fri Apr  1 05:41:50 2005
@@ -21,7 +21,6 @@
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.Term;
-import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.store.Directory;
 import org.apache.log4j.Logger;
 
@@ -94,16 +93,6 @@
      */
     Directory getDirectory() throws IOException {
         return directory;
-    }
-
-    /**
-     * Returns an <code>IndexSearcher</code> based on the <code>IndexReader</code>
-     * returned by {@link #getIndexReader()}.
-     * @return an <code>IndexSearcher</code> on this index.
-     * @throws IOException if an error occurs.
-     */
-    IndexSearcher getIndexSearcher() throws IOException {
-        return new IndexSearcher(getIndexReader());
     }
 
     /**

Modified: incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/SearchIndex.java
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/SearchIndex.java?view=diff&r1=159686&r2=159687
==============================================================================
--- incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/SearchIndex.java (original)
+++ incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/SearchIndex.java Fri Apr  1 05:41:50 2005
@@ -427,4 +427,8 @@
     public void setRedoSize(int size) {
         mergeSize = size;
     }
+
+    public void setBufferSize(int size) {
+        volatileIndex.setBufferSize(size);
+    }
 }

Modified: incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/VolatileIndex.java
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/VolatileIndex.java?view=diff&r1=159686&r2=159687
==============================================================================
--- incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/VolatileIndex.java (original)
+++ incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/search/lucene/VolatileIndex.java Fri Apr  1 05:41:50 2005
@@ -20,18 +20,35 @@
 import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.index.IndexReader;
 import org.apache.jackrabbit.core.fs.FileSystemException;
+import org.apache.commons.collections.SequencedHashMap;
 
 import java.io.IOException;
+import java.util.Map;
+import java.util.Iterator;
 
 /**
  * Implements an in-memory index with a redo log.
  */
 class VolatileIndex extends AbstractIndex {
 
+    /**
+     * Default value for {@link #bufferSize}.
+     */
+    private final int DEFAULT_BUFFER_SIZE = 10;
+
     /** The redo log */
     private final RedoLog redoLog;
 
+    /** Map of pending documents to add to the index */
+    private final Map pending = new SequencedHashMap();
+
+    /**
+     * Number of documents that are buffered before they are added to the index.
+     */
+    private int bufferSize = DEFAULT_BUFFER_SIZE;
+
     /**
      * Creates a new <code>VolatileIndex</code> using an <code>analyzer</code>
      * and a redo <code>log</code>.
@@ -54,7 +71,7 @@
 
     /**
      * Overwrites the default implementation by writing an entry to the
-     * redo log and then calling the <code>super.addDocument()</code> method.
+     * redo log and then adds it to the pending list.
      * @param doc the document to add to the index.
      * @throws IOException if an error occurs while writing to the redo log
      * or the index.
@@ -66,12 +83,17 @@
         } catch (FileSystemException e) {
             throw new IOException(e.getMessage());
         }
-        super.addDocument(doc);
+        pending.put(doc.get(FieldNames.UUID), doc);
+        if (pending.size() >= bufferSize) {
+            commitPending();
+        }
     }
 
     /**
      * Overwrites the default implementation by writing an entry to the redo
-     * log and then calling the <code>super.removeDocument()</code> method.
+     * log and then calling the <code>super.removeDocument()</code> method or
+     * if the document is in the pending list, removes it from there.
+     *
      * @param idTerm the uuid term of the document to remove.
      * @throws IOException if an error occurs while writing to the redo log
      * or the index.
@@ -84,6 +106,54 @@
         } catch (FileSystemException e) {
             throw new IOException(e.getMessage());
         }
-        return super.removeDocument(idTerm);
+        if (pending.remove(idTerm.text()) != null) {
+            // pending document has been removed
+            return 1;
+        } else {
+            // remove document from index
+            return super.getIndexReader().delete(idTerm);
+        }
+    }
+
+    /**
+     * Overwrites the implementation in {@link AbstractIndex} to trigger
+     * commit of pending documents to index.
+     * @return the index reader for this index.
+     * @throws IOException if an error occurs building a reader.
+     */
+    protected synchronized IndexReader getIndexReader() throws IOException {
+        commitPending();
+        return super.getIndexReader();
+    }
+
+    /**
+     * Overwrites the implementation in {@link AbstractIndex} to commit
+     * pending documents.
+     */
+    protected synchronized void commit() throws IOException {
+        commitPending();
+        super.commit();
+    }
+
+    /**
+     * Sets a new buffer size for pending documents to add to the index.
+     * Higher values consume more memory, but help to avoid multiple index
+     * cycles when a node is changed / saved multiple times.
+     *
+     * @param size the new buffer size.
+     */
+    void setBufferSize(int size) {
+        bufferSize = size;
+    }
+
+    /**
+     * Commits pending documents to the index.
+     */
+    private void commitPending() throws IOException {
+        for (Iterator it = pending.values().iterator(); it.hasNext();) {
+            Document doc = (Document) it.next();
+            super.addDocument(doc);
+            it.remove();
+        }
     }
 }