You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by cu...@apache.org on 2005/06/02 19:05:58 UTC
svn commit: r179611 - in /lucene/java/trunk: CHANGES.txt
src/java/org/apache/lucene/index/IndexWriter.java
Author: cutting
Date: Thu Jun 2 10:05:58 2005
New Revision: 179611
URL: http://svn.apache.org/viewcvs?rev=179611&view=rev
Log:
Optimize IndexWriter.addIndexes(Directory[]).
Modified:
lucene/java/trunk/CHANGES.txt
lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java
Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/CHANGES.txt?rev=179611&r1=179610&r2=179611&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Thu Jun 2 10:05:58 2005
@@ -113,7 +113,6 @@
fields in arbitrarily formats can be cached as ints and floats.
(Doug Cutting)
-
API Changes
1. Several methods and fields have been deprecated. The API documentation
@@ -169,7 +168,6 @@
corrupted when the old version of a file was longer than the new.
Now any existing file is first removed. (Doug Cutting)
-
Optimizations
1. Disk usage (peak requirements during indexing and optimization)
@@ -209,6 +207,11 @@
random access is not required, e.g., when merging segments. The
term index is now read into memory lazily at the first
random-access. (Doug Cutting)
+
+ 9. Optimize IndexWriter.addIndexes(Directory[]) when the number of
+ added indexes is larger than mergeFactor. Previously this could
+ result in quadratic performance. Now performance is n log(n).
+ (Doug Cutting)
Infrastructure
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java?rev=179611&r1=179610&r2=179611&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java Thu Jun 2 10:05:58 2005
@@ -547,6 +547,9 @@
public synchronized void addIndexes(Directory[] dirs)
throws IOException {
optimize(); // start with zero or 1 seg
+
+ int start = segmentInfos.size();
+
for (int i = 0; i < dirs.length; i++) {
SegmentInfos sis = new SegmentInfos(); // read infos from dir
sis.read(dirs[i]);
@@ -554,6 +557,16 @@
segmentInfos.addElement(sis.info(j)); // add each info
}
}
+
+ // merge newly added segments in log(n) passes
+ while (segmentInfos.size() > start+mergeFactor) {
+ for (int base = start+1; base < segmentInfos.size(); base++) {
+ int end = Math.min(segmentInfos.size(), base+mergeFactor);
+ if (end-base > 1)
+ mergeSegments(base, end);
+ }
+ }
+
optimize(); // final cleanup
}
@@ -659,12 +672,19 @@
and pushes the merged index onto the top of the segmentInfos stack. */
private final void mergeSegments(int minSegment)
throws IOException {
+ mergeSegments(minSegment, segmentInfos.size());
+ }
+
+ /** Merges the named range of segments, replacing them in the stack with a
+ * single segment. */
+ private final void mergeSegments(int minSegment, int end)
+ throws IOException {
final String mergedName = newSegmentName();
if (infoStream != null) infoStream.print("merging segments");
SegmentMerger merger = new SegmentMerger(this, mergedName);
final Vector segmentsToDelete = new Vector();
- for (int i = minSegment; i < segmentInfos.size(); i++) {
+ for (int i = minSegment; i < end; i++) {
SegmentInfo si = segmentInfos.info(i);
if (infoStream != null)
infoStream.print(" " + si.name + " (" + si.docCount + " docs)");
@@ -681,7 +701,8 @@
infoStream.println(" into "+mergedName+" ("+mergedDocCount+" docs)");
}
- segmentInfos.setSize(minSegment); // pop old infos & add new
+ for (int i = end-1; i >= minSegment; i--) // remove old infos & add new
+ segmentInfos.remove(i);
segmentInfos.addElement(new SegmentInfo(mergedName, mergedDocCount,
directory));