You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by cu...@apache.org on 2005/06/02 19:05:58 UTC

svn commit: r179611 - in /lucene/java/trunk: CHANGES.txt src/java/org/apache/lucene/index/IndexWriter.java

Author: cutting
Date: Thu Jun  2 10:05:58 2005
New Revision: 179611

URL: http://svn.apache.org/viewcvs?rev=179611&view=rev
Log:
Optimize IndexWriter.addIndexes(Directory[]).

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/CHANGES.txt?rev=179611&r1=179610&r2=179611&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Thu Jun  2 10:05:58 2005
@@ -113,7 +113,6 @@
     fields in arbitrarily formats can be cached as ints and floats.
     (Doug Cutting)
 
-
 API Changes
 
  1. Several methods and fields have been deprecated. The API documentation 
@@ -169,7 +168,6 @@
     corrupted when the old version of a file was longer than the new.
     Now any existing file is first removed.  (Doug Cutting)
 
-
 Optimizations
      
  1. Disk usage (peak requirements during indexing and optimization)
@@ -209,6 +207,11 @@
     random access is not required, e.g., when merging segments.  The
     term index is now read into memory lazily at the first
     random-access.  (Doug Cutting)
+
+ 9. Optimize IndexWriter.addIndexes(Directory[]) when the number of
+    added indexes is larger than mergeFactor.  Previously this could
+    result in quadratic performance.  Now performance is n log(n).
+    (Doug Cutting)
 
 Infrastructure
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java?rev=179611&r1=179610&r2=179611&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java Thu Jun  2 10:05:58 2005
@@ -547,6 +547,9 @@
   public synchronized void addIndexes(Directory[] dirs)
       throws IOException {
     optimize();					  // start with zero or 1 seg
+
+    int start = segmentInfos.size();
+
     for (int i = 0; i < dirs.length; i++) {
       SegmentInfos sis = new SegmentInfos();	  // read infos from dir
       sis.read(dirs[i]);
@@ -554,6 +557,16 @@
         segmentInfos.addElement(sis.info(j));	  // add each info
       }
     }
+    
+    // merge newly added segments in log(n) passes
+    while (segmentInfos.size() > start+mergeFactor) {
+      for (int base = start+1; base < segmentInfos.size(); base++) {
+        int end = Math.min(segmentInfos.size(), base+mergeFactor);
+        if (end-base > 1)
+          mergeSegments(base, end);
+      }
+    }
+
     optimize();					  // final cleanup
   }
 
@@ -659,12 +672,19 @@
     and pushes the merged index onto the top of the segmentInfos stack. */
   private final void mergeSegments(int minSegment)
       throws IOException {
+    mergeSegments(minSegment, segmentInfos.size());
+  }
+
+  /** Merges the named range of segments, replacing them in the stack with a
+   * single segment. */
+  private final void mergeSegments(int minSegment, int end)
+    throws IOException {
     final String mergedName = newSegmentName();
     if (infoStream != null) infoStream.print("merging segments");
     SegmentMerger merger = new SegmentMerger(this, mergedName);
 
     final Vector segmentsToDelete = new Vector();
-    for (int i = minSegment; i < segmentInfos.size(); i++) {
+    for (int i = minSegment; i < end; i++) {
       SegmentInfo si = segmentInfos.info(i);
       if (infoStream != null)
         infoStream.print(" " + si.name + " (" + si.docCount + " docs)");
@@ -681,7 +701,8 @@
       infoStream.println(" into "+mergedName+" ("+mergedDocCount+" docs)");
     }
 
-    segmentInfos.setSize(minSegment);          // pop old infos & add new
+    for (int i = end-1; i >= minSegment; i--)     // remove old infos & add new
+      segmentInfos.remove(i);
     segmentInfos.addElement(new SegmentInfo(mergedName, mergedDocCount,
                                             directory));